/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file archrast.cpp
-*
-* @brief Implementation for archrast.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file archrast.cpp
+ *
+ * @brief Implementation for archrast.
+ *
+ ******************************************************************************/
#include <atomic>
#include "common/os.h"
/// @brief struct that keeps track of depth and stencil event information
struct DepthStencilStats
{
- uint32_t earlyZTestPassCount = 0;
- uint32_t earlyZTestFailCount = 0;
- uint32_t lateZTestPassCount = 0;
- uint32_t lateZTestFailCount = 0;
+ uint32_t earlyZTestPassCount = 0;
+ uint32_t earlyZTestFailCount = 0;
+ uint32_t lateZTestPassCount = 0;
+ uint32_t lateZTestFailCount = 0;
uint32_t earlyStencilTestPassCount = 0;
uint32_t earlyStencilTestFailCount = 0;
- uint32_t lateStencilTestPassCount = 0;
- uint32_t lateStencilTestFailCount = 0;
+ uint32_t lateStencilTestPassCount = 0;
+ uint32_t lateStencilTestFailCount = 0;
};
struct CStats
struct CullStats
{
uint32_t degeneratePrimCount = 0;
- uint32_t backfacePrimCount = 0;
+ uint32_t backfacePrimCount = 0;
};
struct AlphaStats
{
- uint32_t alphaTestCount = 0;
+ uint32_t alphaTestCount = 0;
uint32_t alphaBlendCount = 0;
};
class EventHandlerApiStats : public EventHandlerFile
{
public:
- EventHandlerApiStats(uint32_t id) : EventHandlerFile(id) {
+ EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
+ {
#if defined(_WIN32)
- // Attempt to copy the events.proto file to the ArchRast output dir. It's common for tools to place the events.proto file
- // in the DEBUG_OUTPUT_DIR when launching AR. If it exists, this will attempt to copy it the first time we get here to package
- // it with the stats. Otherwise, the user would need to specify the events.proto location when parsing the stats in post.
+ // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
+ // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
+ // exists, this will attempt to copy it the first time we get here to package it with
+ // the stats. Otherwise, the user would need to specify the events.proto location when
+ // parsing the stats in post.
std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
- eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1) << "\\events.proto" << std::ends;
+ eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
+ << "\\events.proto" << std::ends;
// If event.proto already exists, we're done; else do the copy
struct stat buf; // Use a Posix stat for file existence check
- if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0) {
+ if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
+ {
// Now check to make sure the events.proto source exists
- if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0) {
+ if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
+ {
std::ifstream srcFile;
srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
if (srcFile.is_open())
virtual void Handle(const DrawInstancedEvent& event)
{
- DrawInfoEvent e(event.data.drawId, ArchRast::Instanced, event.data.topology,
- event.data.numVertices, 0, 0, event.data.startVertex, event.data.numInstances,
- event.data.startInstance, event.data.tsEnable, event.data.gsEnable, event.data.soEnable, event.data.soTopology, event.data.splitId);
-
+ DrawInfoEvent e(event.data.drawId,
+ ArchRast::Instanced,
+ event.data.topology,
+ event.data.numVertices,
+ 0,
+ 0,
+ event.data.startVertex,
+ event.data.numInstances,
+ event.data.startInstance,
+ event.data.tsEnable,
+ event.data.gsEnable,
+ event.data.soEnable,
+ event.data.soTopology,
+ event.data.splitId);
+
EventHandlerFile::Handle(e);
}
virtual void Handle(const DrawIndexedInstancedEvent& event)
{
- DrawInfoEvent e(event.data.drawId, ArchRast::IndexedInstanced, event.data.topology, 0,
- event.data.numIndices, event.data.indexOffset, event.data.baseVertex, event.data.numInstances,
- event.data.startInstance, event.data.tsEnable, event.data.gsEnable, event.data.soEnable, event.data.soTopology, event.data.splitId);
+ DrawInfoEvent e(event.data.drawId,
+ ArchRast::IndexedInstanced,
+ event.data.topology,
+ 0,
+ event.data.numIndices,
+ event.data.indexOffset,
+ event.data.baseVertex,
+ event.data.numInstances,
+ event.data.startInstance,
+ event.data.tsEnable,
+ event.data.gsEnable,
+ event.data.soEnable,
+ event.data.soTopology,
+ event.data.splitId);
EventHandlerFile::Handle(e);
}
virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
{
- //earlyZ test compute
+ // earlyZ test compute
mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSSingleSample.earlyZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //earlyStencil test compute
+ // earlyStencil test compute
mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSSingleSample.earlyStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
- //earlyZ test single and multi sample
+ // earlyZ test single and multi sample
mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSCombined.earlyZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //earlyStencil test single and multi sample
+ // earlyStencil test single and multi sample
mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSCombined.earlyStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
mNeedFlush = true;
}
virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
{
- //earlyZ test compute
+ // earlyZ test compute
mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSSampleRate.earlyZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //earlyStencil test compute
+ // earlyStencil test compute
mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSSampleRate.earlyStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
- //earlyZ test single and multi sample
+ // earlyZ test single and multi sample
mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSCombined.earlyZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //earlyStencil test single and multi sample
+ // earlyStencil test single and multi sample
mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSCombined.earlyStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
mNeedFlush = true;
}
virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
{
- //earlyZ test compute
+ // earlyZ test compute
mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSNullPS.earlyZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //earlyStencil test compute
+ // earlyStencil test compute
mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSNullPS.earlyStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
mNeedFlush = true;
}
virtual void Handle(const LateDepthStencilInfoSingleSample& event)
{
- //lateZ test compute
+ // lateZ test compute
mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSSingleSample.lateZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //lateStencil test compute
+ // lateStencil test compute
mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSSingleSample.lateStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
- //lateZ test single and multi sample
+ // lateZ test single and multi sample
mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSCombined.lateZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //lateStencil test single and multi sample
+ // lateStencil test single and multi sample
mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSCombined.lateStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
mNeedFlush = true;
}
virtual void Handle(const LateDepthStencilInfoSampleRate& event)
{
- //lateZ test compute
+ // lateZ test compute
mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSSampleRate.lateZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //lateStencil test compute
+ // lateStencil test compute
mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSSampleRate.lateStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
-
- //lateZ test single and multi sample
+ // lateZ test single and multi sample
mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSCombined.lateZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //lateStencil test single and multi sample
+ // lateStencil test single and multi sample
mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSCombined.lateStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
mNeedFlush = true;
}
virtual void Handle(const LateDepthStencilInfoNullPS& event)
{
- //lateZ test compute
+ // lateZ test compute
mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
- mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+ mDSNullPS.lateZTestFailCount +=
+ _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
- //lateStencil test compute
+ // lateStencil test compute
mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
- mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+ mDSNullPS.lateStencilTestFailCount +=
+ _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
mNeedFlush = true;
}
virtual void Handle(const EarlyDepthInfoPixelRate& event)
{
- //earlyZ test compute
+ // earlyZ test compute
mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
- mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+ mDSPixelRate.earlyZTestFailCount +=
+ (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
mNeedFlush = true;
}
virtual void Handle(const LateDepthInfoPixelRate& event)
{
- //lateZ test compute
+ // lateZ test compute
mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
- mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+ mDSPixelRate.lateZTestFailCount +=
+ (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
mNeedFlush = true;
}
virtual void Handle(const ClipInfoEvent& event)
{
mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
- mClipper.trivialRejectCount += event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
- mClipper.trivialAcceptCount += _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
+ mClipper.trivialRejectCount +=
+ event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
+ mClipper.trivialAcceptCount +=
+ _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
}
struct ShaderStats
// Flush cached events for this draw
virtual void FlushDraw(uint32_t drawId)
{
- if (mNeedFlush == false) return;
+ if (mNeedFlush == false)
+ return;
EventHandlerFile::Handle(PSInfo(drawId, mShaderStats[SHADER_PIXEL].numInstExecuted));
EventHandlerFile::Handle(CSInfo(drawId, mShaderStats[SHADER_COMPUTE].numInstExecuted));
- //singleSample
- EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
- EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
- EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
-
- //sampleRate
- EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
- EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
- EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
-
- //combined
- EventHandlerFile::Handle(EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
- EventHandlerFile::Handle(EarlyStencil(drawId, mDSCombined.earlyStencilTestPassCount, mDSCombined.earlyStencilTestFailCount));
- EventHandlerFile::Handle(LateStencil(drawId, mDSCombined.lateStencilTestPassCount, mDSCombined.lateStencilTestFailCount));
-
- //pixelRate
- EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
- EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
-
-
- //NullPS
- EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
- EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
+ // singleSample
+ EventHandlerFile::Handle(EarlyZSingleSample(
+ drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZSingleSample(
+ drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
+ EventHandlerFile::Handle(
+ EarlyStencilSingleSample(drawId,
+ mDSSingleSample.earlyStencilTestPassCount,
+ mDSSingleSample.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(
+ LateStencilSingleSample(drawId,
+ mDSSingleSample.lateStencilTestPassCount,
+ mDSSingleSample.lateStencilTestFailCount));
+
+ // sampleRate
+ EventHandlerFile::Handle(EarlyZSampleRate(
+ drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZSampleRate(
+ drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
+ EventHandlerFile::Handle(
+ EarlyStencilSampleRate(drawId,
+ mDSSampleRate.earlyStencilTestPassCount,
+ mDSSampleRate.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(LateStencilSampleRate(drawId,
+ mDSSampleRate.lateStencilTestPassCount,
+ mDSSampleRate.lateStencilTestFailCount));
+
+ // combined
+ EventHandlerFile::Handle(
+ EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
+ EventHandlerFile::Handle(
+ LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencil(drawId,
+ mDSCombined.earlyStencilTestPassCount,
+ mDSCombined.earlyStencilTestFailCount));
+ EventHandlerFile::Handle(LateStencil(drawId,
+ mDSCombined.lateStencilTestPassCount,
+ mDSCombined.lateStencilTestFailCount));
+
+ // pixelRate
+ EventHandlerFile::Handle(EarlyZPixelRate(
+ drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
+ EventHandlerFile::Handle(LateZPixelRate(
+ drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
+
+
+ // NullPS
+ EventHandlerFile::Handle(
+ EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
+ EventHandlerFile::Handle(EarlyStencilNullPS(
+ drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
// Rasterized Subspans
EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
// Alpha Subspans
- EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
+ EventHandlerFile::Handle(
+ AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
// Primitive Culling
- EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
+ EventHandlerFile::Handle(
+ CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
mDSSingleSample = {};
- mDSSampleRate = {};
- mDSCombined = {};
- mDSPixelRate = {};
+ mDSSampleRate = {};
+ mDSCombined = {};
+ mDSPixelRate = {};
mDSNullPS = {};
- rastStats = {};
- mCullStats = {};
+ rastStats = {};
+ mCullStats = {};
mAlphaStats = {};
- mShaderStats[SHADER_PIXEL] = {};
+ mShaderStats[SHADER_PIXEL] = {};
mShaderStats[SHADER_COMPUTE] = {};
mNeedFlush = false;
virtual void Handle(const FrontendDrawEndEvent& event)
{
- //Clipper
- EventHandlerFile::Handle(ClipperEvent(event.data.drawId, mClipper.trivialRejectCount, mClipper.trivialAcceptCount, mClipper.mustClipCount));
+ // Clipper
+ EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
+ mClipper.trivialRejectCount,
+ mClipper.trivialAcceptCount,
+ mClipper.mustClipCount));
- //Tesselator
+ // Tesselator
EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
- //Geometry Shader
+ // Geometry Shader
EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
- EventHandlerFile::Handle(VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
- EventHandlerFile::Handle(HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
- EventHandlerFile::Handle(DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
- EventHandlerFile::Handle(GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
-
- mShaderStats[SHADER_VERTEX] = {};
- mShaderStats[SHADER_HULL] = {};
- mShaderStats[SHADER_DOMAIN] = {};
+ EventHandlerFile::Handle(
+ VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
+ EventHandlerFile::Handle(
+ HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
+ EventHandlerFile::Handle(
+ DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
+ EventHandlerFile::Handle(
+ GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
+
+ mShaderStats[SHADER_VERTEX] = {};
+ mShaderStats[SHADER_HULL] = {};
+ mShaderStats[SHADER_DOMAIN] = {};
mShaderStats[SHADER_GEOMETRY] = {};
- //Reset Internal Counters
+ // Reset Internal Counters
mClipper = {};
- mTS = {};
- mGS = {};
+ mTS = {};
+ mGS = {};
}
virtual void Handle(const GSPrimInfo& event)
mGS.vertsInput += event.data.vertsInput;
}
- virtual void Handle(const TessPrimCount& event)
- {
- mTS.inputPrims += event.data.primCount;
- }
+ virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
virtual void Handle(const RasterTileCount& event)
{
virtual void Handle(const CullInfoEvent& event)
{
- mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
- mCullStats.backfacePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
+ mCullStats.degeneratePrimCount += _mm_popcnt_u32(
+ event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
+ mCullStats.backfacePrimCount += _mm_popcnt_u32(
+ event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
}
virtual void Handle(const AlphaInfoEvent& event)
{
- mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
+ mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
}
bool mNeedFlush;
// Per draw stats
DepthStencilStats mDSSingleSample = {};
- DepthStencilStats mDSSampleRate = {};
- DepthStencilStats mDSPixelRate = {};
- DepthStencilStats mDSCombined = {};
- DepthStencilStats mDSNullPS = {};
- DepthStencilStats mDSOmZ = {};
- CStats mClipper = {};
- TEStats mTS = {};
- GSStateInfo mGS = {};
- RastStats rastStats = {};
- CullStats mCullStats = {};
- AlphaStats mAlphaStats = {};
+ DepthStencilStats mDSSampleRate = {};
+ DepthStencilStats mDSPixelRate = {};
+ DepthStencilStats mDSCombined = {};
+ DepthStencilStats mDSNullPS = {};
+ DepthStencilStats mDSOmZ = {};
+ CStats mClipper = {};
+ TEStats mTS = {};
+ GSStateInfo mGS = {};
+ RastStats rastStats = {};
+ CullStats mCullStats = {};
+ AlphaStats mAlphaStats = {};
ShaderStats mShaderStats[NUM_SHADER_TYPES];
{
// Can we assume single threaded here?
static std::atomic<uint32_t> counter(0);
- uint32_t id = counter.fetch_add(1);
+ uint32_t id = counter.fetch_add(1);
EventManager* pManager = new EventManager();
pManager->FlushDraw(drawId);
}
-}
+} // namespace ArchRast
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file archrast.h
-*
-* @brief Definitions for archrast.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file archrast.h
+ *
+ * @brief Definitions for archrast.
+ *
+ ******************************************************************************/
#pragma once
#include "common/os.h"
{
enum class AR_THREAD
{
- API = 0,
+ API = 0,
WORKER = 1
};
HANDLE CreateThreadContext(AR_THREAD type);
- void DestroyThreadContext(HANDLE hThreadContext);
+ void DestroyThreadContext(HANDLE hThreadContext);
// Dispatch event for this thread.
void Dispatch(HANDLE hThreadContext, const Event& event);
void FlushDraw(HANDLE hThreadContext, uint32_t drawId);
-};
-
+}; // namespace ArchRast
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file archrast.h
-*
-* @brief Definitions for the event manager.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file archrast.h
+ *
+ * @brief Definitions for the event manager.
+ *
+ ******************************************************************************/
#pragma once
#include "common/os.h"
pHandler->FlushDraw(drawId);
}
}
- private:
+ private:
// Handlers stay registered for life
void Detach(EventHandler* pHandler) { SWR_INVALID("Should not be called"); }
std::vector<EventHandler*> mHandlers;
};
-};
-
+}; // namespace ArchRast
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Implementation for events. auto-generated file
-*
-* DO NOT EDIT
-*
-* Generation Command Line:
-* ${'\n* '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Implementation for events. auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ * ${'\n * '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
#include "common/os.h"
#include "gen_ar_event.hpp"
#include "gen_ar_eventhandler.hpp"
pHandler->Handle(*this);
}
% endfor
+// clan-format on
+
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Definitions for events. auto-generated file
-*
-* DO NOT EDIT
-*
-* Generation Command Line:
-* ${'\n* '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Definitions for events. auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ * ${'\n * '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
#pragma once
#include "common/os.h"
};
% endfor
- //Forward decl
+ // Forward decl
class EventHandler;
//////////////////////////////////////////////////////////////////////////
virtual void Accept(EventHandler* pHandler) const;
};
-% endfor
-}
\ No newline at end of file
+ % endfor
+} // namespace ArchRast
+// clang-format on
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Event handler interface. auto-generated file
-*
-* DO NOT EDIT
-*
-* Generation Command Line:
-* ${'\n* '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Event handler interface. auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ * ${'\n * '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format on
#pragma once
#include "${event_header}"
virtual void Handle(const ${name}& event) {}
% endfor
};
-}
+} // namespace ArchRast
+// clan-format off
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Event handler interface. auto-generated file
-*
-* DO NOT EDIT
-*
-* Generation Command Line:
-* ${'\n* '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Event handler interface. auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ * ${'\n * '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
#pragma once
#include "common/os.h"
class EventHandlerFile : public EventHandler
{
public:
- EventHandlerFile(uint32_t id)
- : mBufOffset(0)
+ EventHandlerFile(uint32_t id) : mBufOffset(0)
{
#if defined(_WIN32)
DWORD pid = GetCurrentProcessId();
TCHAR procname[MAX_PATH];
GetModuleFileName(NULL, procname, MAX_PATH);
- const char* pBaseName = strrchr(procname, '\\');
+ const char* pBaseName = strrchr(procname, '\\');
std::stringstream outDir;
outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
mOutputDir = outDir.str();
- if (CreateDirectory(mOutputDir.c_str(), NULL)) {
- std::cout << std::endl << "ArchRast Dir: " << mOutputDir << std::endl << std::endl << std::flush;
+ if (CreateDirectory(mOutputDir.c_str(), NULL))
+ {
+ std::cout << std::endl
+ << "ArchRast Dir: " << mOutputDir << std::endl
+ << std::endl
+ << std::flush;
}
// There could be multiple threads creating thread pools. We
#endif
}
- virtual ~EventHandlerFile()
- {
- FlushBuffer();
- }
+ virtual ~EventHandlerFile() { FlushBuffer(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Flush buffer to file.
file.write((char*)mBuffer, mBufOffset);
file.close();
- mBufOffset = 0;
+ mBufOffset = 0;
mHeaderBufOffset = 0; // Reset header offset so its no longer considered.
}
return true;
if (!FlushBuffer())
{
// Don't corrupt what's already in the buffer?
- /// @todo Maybe add corrupt marker to buffer here in case we can open file in future?
+ /// @todo Maybe add corrupt marker to buffer here in case we can open file in
+ /// future?
return;
}
}
std::string mOutputDir;
static const uint32_t mBufferSize = 1024;
- uint8_t mBuffer[mBufferSize];
+ uint8_t mBuffer[mBufferSize];
uint32_t mBufOffset{0};
uint32_t mHeaderBufOffset{0};
};
-}
+} // namespace ArchRast
+// clang-format on
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
-//
+//
// @file BackendPixelRate${fileNum}.cpp
-//
+//
// @brief auto-generated file
-//
+//
// DO NOT EDIT
//
// Generation Command Line:
// ${'\n// '.join(cmdline)}
//
//============================================================================
+// clang-format off
#pragma once
//============================================================================
%for arg in func['types']:
args.push_back(${arg}->getType());
%endfor
- Function * pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']}, args);
+ Function* pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']}, args);
return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
%else:
- Function * pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']});
+ Function* pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']});
return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
%endif
%else:
%endif
}
-%endfor
+% endfor
+ // clang-format on
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
-//
+//
// @file ${filename}
-//
+//
// @brief auto-generated file
-//
+//
// DO NOT EDIT
//
// Generation Command Line:
//
//============================================================================
+// clang-format off
+
%for num in range(numFiles):
void Init${tableName}${num}();
%endfor
Init${tableName}${num}();
%endfor
}
+// clang-format on
/******************************************************************************
-* Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}.cpp
-*
-* @brief Dynamic Knobs for Core.
-*
-* ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
-*
-* Generation Command Line:
-* ${'\n* '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}.cpp
+ *
+ * @brief Dynamic Knobs for Core.
+ *
+ * ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
+ *
+ * Generation Command Line:
+ * ${'\n * '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
<% calc_max_knob_len(knobs) %>
% for inc in includes:
#include <${inc}>
//========================================================
// Implementation
//========================================================
-void KnobBase::autoExpandEnvironmentVariables(std::string &text)
+void KnobBase::autoExpandEnvironmentVariables(std::string& text)
{
#if (__GNUC__) && (GCC_VERSION < 409000)
// <regex> isn't implemented prior to gcc-4.9.0
// unix style variable replacement
size_t start;
- while ((start = text.find("${'${'}")) != std::string::npos) {
+ while ((start = text.find("${'${'}")) != std::string::npos)
+ {
size_t end = text.find("}");
if (end == std::string::npos)
break;
text.replace(start, end - start + 1, var);
}
// win32 style variable replacement
- while ((start = text.find("%")) != std::string::npos) {
+ while ((start = text.find("%")) != std::string::npos)
+ {
size_t end = text.find("%", start + 1);
if (end == std::string::npos)
break;
{
// unix style variable replacement
static std::regex env("\\$\\{([^}]+)\\}");
- std::smatch match;
+ std::smatch match;
while (std::regex_search(text, match, env))
{
const std::string var = GetEnv(match[1].str());
{
// win32 style variable replacement
static std::regex env("\\%([^}]+)\\%");
- std::smatch match;
+ std::smatch match;
while (std::regex_search(text, match, env))
{
const std::string var = GetEnv(match[1].str());
#endif
}
-
//========================================================
// Static Data Members
//========================================================
std::basic_stringstream<char> str;
str << std::showbase << std::setprecision(1) << std::fixed;
- if (optPerLinePrefix == nullptr) { optPerLinePrefix = ""; }
+ if (optPerLinePrefix == nullptr)
+ {
+ optPerLinePrefix = "";
+ }
% for knob in knobs:
str << optPerLinePrefix << "KNOB_${knob[0]}:${space_knob(knob[0])}";
name_len = len(name)
return ' '*(max_len - name_len)
%>
+// clang-format on
/******************************************************************************
-* Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}.h
-*
-* @brief Dynamic Knobs for Core.
-*
-* ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
-*
-* Generation Command Line:
-* ${'\n* '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}.h
+ *
+ * @brief Dynamic Knobs for Core.
+ *
+ * ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
+ *
+ * Generation Command Line:
+ * ${'\n * '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
<% calc_max_knob_len(knobs) %>
#pragma once
#include <string>
{
private:
// Update the input string.
- static void autoExpandEnvironmentVariables(std::string &text);
+ static void autoExpandEnvironmentVariables(std::string& text);
protected:
// Leave input alone and return new string.
- static std::string expandEnvironmentVariables(std::string const &input)
+ static std::string expandEnvironmentVariables(std::string const& input)
{
std::string text = input;
autoExpandEnvironmentVariables(text);
}
template <typename T>
- static T expandEnvironmentVariables(T const &input)
+ static T expandEnvironmentVariables(T const& input)
{
return input;
}
struct Knob : KnobBase
{
public:
- const T& Value() const { return m_Value; }
- const T& Value(T const &newValue)
+ const T& Value() const { return m_Value; }
+ const T& Value(T const& newValue)
{
m_Value = expandEnvironmentVariables(newValue);
return Value();
name_len = len(name)
return ' '*(max_len - name_len)
%>
+// clang-format on
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief auto-generated file
-*
-* DO NOT EDIT
-*
-* Generation Command Line:
-* ${'\n* '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ * ${'\n * '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
+
#pragma once
namespace SwrJit
using namespace llvm;
%for type in types:
- INLINE static StructType *Gen_${type['name']}(JitManager* pJitMgr)
+ INLINE static StructType* Gen_${type['name']}(JitManager* pJitMgr)
{
%if needs_ctx(type):
LLVMContext& ctx = pJitMgr->mContext;
%endfor
%endfor
-} // ns SwrJit
+} // namespace SwrJit
<%! # Global function definitions
import os
pad_amt = max_len - cur_len
return ' '*pad_amt
%>
+// clang-format on
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
-//
+//
// @file gen_rasterizer${fileNum}.cpp
-//
+//
// @brief auto-generated file
-//
+//
// DO NOT EDIT
//
// Generation Command Line:
// ${'\n// '.join(cmdline)}
//
//============================================================================
+// clang-format off
#include "core/rasterizer.h"
#include "core/rasterizer_impl.h"
${func}
%endfor
}
+// clang-format on
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file formats.cpp
-*
-* @brief auto-generated file
-*
-* DO NOT EDIT
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file formats.cpp
+ *
+ * @brief auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ ******************************************************************************/
#include "formats.h"
// R32G32B32A32_FLOAT (0x0)
{
"R32G32B32A32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 32, 32 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 32, 32}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32A32_SINT (0x1)
{
"R32G32B32A32_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 32, 32 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 32, 32}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32A32_UINT (0x2)
{
"R32G32B32A32_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 32, 32 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 32, 32}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x3)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R64G64_FLOAT (0x5)
{
"R64G64_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 64, 64, 0, 0 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {64, 64, 0, 0}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32X32_FLOAT (0x6)
{
"R32G32B32X32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 32, 32 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 32, 32}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32A32_SSCALED (0x7)
{
"R32G32B32A32_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 32, 32 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 32, 32}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32A32_USCALED (0x8)
{
"R32G32B32A32_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 32, 32 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 32, 32}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x9)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x10)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x11)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x12)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x13)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x14)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x15)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x16)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x17)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x18)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x19)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R32G32B32A32_SFIXED (0x20)
{
"R32G32B32A32_SFIXED",
- { SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 32, 32 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 32, 32}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x21)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x22)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x23)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x24)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x25)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x26)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x27)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x28)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x29)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x2A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x2B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x2C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x2D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x2E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x2F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x30)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x31)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x32)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x33)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x34)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x35)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x36)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x37)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x38)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x39)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x3A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x3B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x3C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x3D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x3E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x3F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R32G32B32_FLOAT (0x40)
{
"R32G32B32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 32, 32, 32, 0 }, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {32, 32, 32, 0}, // Bits per component
+ 96, // Bits per element
+ 12, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32_SINT (0x41)
{
"R32G32B32_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 32, 32, 32, 0 }, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {32, 32, 32, 0}, // Bits per component
+ 96, // Bits per element
+ 12, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32_UINT (0x42)
{
"R32G32B32_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 32, 32, 32, 0 }, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {32, 32, 32, 0}, // Bits per component
+ 96, // Bits per element
+ 12, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x43)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x44)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R32G32B32_SSCALED (0x45)
{
"R32G32B32_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 32, 32, 32, 0 }, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {32, 32, 32, 0}, // Bits per component
+ 96, // Bits per element
+ 12, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32B32_USCALED (0x46)
{
"R32G32B32_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 32, 32, 32, 0 }, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {32, 32, 32, 0}, // Bits per component
+ 96, // Bits per element
+ 12, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x47)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x48)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x49)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x4A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x4B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x4C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x4D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x4E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x4F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R32G32B32_SFIXED (0x50)
{
"R32G32B32_SFIXED",
- { SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 32, 32, 32, 0 }, // Bits per component
- 96, // Bits per element
- 12, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {32, 32, 32, 0}, // Bits per component
+ 96, // Bits per element
+ 12, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x51)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x52)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x53)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x54)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x55)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x56)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x57)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x58)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x59)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x5A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x5B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x5C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x5D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x5E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x5F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x60)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x61)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x62)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x63)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x64)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x65)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x66)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x67)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x68)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x69)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x6A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x6B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x6C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x6D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x6E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x6F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x70)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x71)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x72)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x73)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x74)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x75)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x76)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x77)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x78)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x79)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x7A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x7B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x7C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x7D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x7E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x7F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R16G16B16A16_UNORM (0x80)
{
"R16G16B16A16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 65535.0f,
+ 1.0f / 65535.0f,
+ 1.0f / 65535.0f,
+ 1.0f / 65535.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16A16_SNORM (0x81)
{
"R16G16B16A16_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 32767.0f,
+ 1.0f / 32767.0f,
+ 1.0f / 32767.0f,
+ 1.0f / 32767.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16A16_SINT (0x82)
{
"R16G16B16A16_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16A16_UINT (0x83)
{
"R16G16B16A16_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16A16_FLOAT (0x84)
{
"R16G16B16A16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32_FLOAT (0x85)
{
"R32G32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32_SINT (0x86)
{
"R32G32_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32_UINT (0x87)
{
"R32G32_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32_FLOAT_X8X24_TYPELESS (0x88)
{
"R32_FLOAT_X8X24_TYPELESS",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// X32_TYPELESS_G8X24_UINT (0x89)
{
"X32_TYPELESS_G8X24_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L32A32_FLOAT (0x8A)
{
"L32A32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x8B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x8C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R64_FLOAT (0x8D)
{
"R64_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 64, 0, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {64, 0, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16X16_UNORM (0x8E)
{
"R16G16B16X16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16X16_FLOAT (0x8F)
{
"R16G16B16X16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x90)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// L32X32_FLOAT (0x91)
{
"L32X32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// I32X32_FLOAT (0x92)
{
"I32X32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16A16_SSCALED (0x93)
{
"R16G16B16A16_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16A16_USCALED (0x94)
{
"R16G16B16A16_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 16, 16, 16, 16 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {16, 16, 16, 16}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32_SSCALED (0x95)
{
"R32G32_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32G32_USCALED (0x96)
{
"R32G32_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x97)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x98)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x99)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x9A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x9B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x9C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x9D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x9E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x9F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R32G32_SFIXED (0xA0)
{
"R32G32_SFIXED",
- { SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 32, 32, 0, 0 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {32, 32, 0, 0}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xA1)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA3)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA8)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xA9)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xAA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xAB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xAC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xAD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xAE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xAF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB0)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB1)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB3)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB8)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xB9)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xBA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xBB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xBC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xBD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xBE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xBF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// B8G8R8A8_UNORM (0xC0)
{
"B8G8R8A8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B8G8R8A8_UNORM_SRGB (0xC1)
{
"B8G8R8A8_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R10G10B10A2_UNORM (0xC2)
{
"R10G10B10A2_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R10G10B10A2_UNORM_SRGB (0xC3)
{
"R10G10B10A2_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R10G10B10A2_UINT (0xC4)
{
"R10G10B10A2_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xC5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xC6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R8G8B8A8_UNORM (0xC7)
{
"R8G8B8A8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8A8_UNORM_SRGB (0xC8)
{
"R8G8B8A8_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8A8_SNORM (0xC9)
{
"R8G8B8A8_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8A8_SINT (0xCA)
{
"R8G8B8A8_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8A8_UINT (0xCB)
{
"R8G8B8A8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16_UNORM (0xCC)
{
"R16G16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16_SNORM (0xCD)
{
"R16G16_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 32767.0f, 1.0f / 32767.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 32767.0f, 1.0f / 32767.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16_SINT (0xCE)
{
"R16G16_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16_UINT (0xCF)
{
"R16G16_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16_FLOAT (0xD0)
{
"R16G16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10A2_UNORM (0xD1)
{
"B10G10R10A2_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10A2_UNORM_SRGB (0xD2)
{
"B10G10R10A2_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R11G11B10_FLOAT (0xD3)
{
"R11G11B10_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 11, 11, 10, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {11, 11, 10, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xD4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R10G10B10_FLOAT_A2_UNORM (0xD5)
{
"R10G10B10_FLOAT_A2_UNORM",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f / 3.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f / 3.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32_SINT (0xD6)
{
"R32_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32_UINT (0xD7)
{
"R32_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32_FLOAT (0xD8)
{
"R32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R24_UNORM_X8_TYPELESS (0xD9)
{
"R24_UNORM_X8_TYPELESS",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 24, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 16777215.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {24, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 16777215.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// X24_TYPELESS_G8_UINT (0xDA)
{
"X24_TYPELESS_G8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 1, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {1, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xDB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xDC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// L32_UNORM (0xDD)
{
"L32_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 4294967295.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 4294967295.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xDE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// L16A16_UNORM (0xDF)
{
"L16A16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// I24X8_UNORM (0xE0)
{
"I24X8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 24, 8, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {24, 8, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L24X8_UNORM (0xE1)
{
"L24X8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 24, 8, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {24, 8, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xE2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// I32_FLOAT (0xE3)
{
"I32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L32_FLOAT (0xE4)
{
"L32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// A32_FLOAT (0xE5)
{
"A32_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 3, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {3, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xE6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xE7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xE8)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// B8G8R8X8_UNORM (0xE9)
{
"B8G8R8X8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B8G8R8X8_UNORM_SRGB (0xEA)
{
"B8G8R8X8_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8X8_UNORM (0xEB)
{
"R8G8B8X8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8X8_UNORM_SRGB (0xEC)
{
"R8G8B8X8_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R9G9B9E5_SHAREDEXP (0xED)
{
"R9G9B9E5_SHAREDEXP",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 9, 9, 9, 5 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {9, 9, 9, 5}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10X2_UNORM (0xEE)
{
"B10G10R10X2_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xEF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// L16A16_FLOAT (0xF0)
{
"L16A16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xF1)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xF2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R10G10B10X2_USCALED (0xF3)
{
"R10G10B10X2_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8A8_SSCALED (0xF4)
{
"R8G8B8A8_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8A8_USCALED (0xF5)
{
"R8G8B8A8_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16_SSCALED (0xF6)
{
"R16G16_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16_USCALED (0xF7)
{
"R16G16_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 16, 16, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {16, 16, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32_SSCALED (0xF8)
{
"R32_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32_USCALED (0xF9)
{
"R32_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0xFA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xFB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xFC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xFD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xFE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0xFF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// B5G6R5_UNORM (0x100)
{
"B5G6R5_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 0 }, // Swizzle
- { 5, 6, 5, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 0}, // Swizzle
+ {5, 6, 5, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B5G6R5_UNORM_SRGB (0x101)
{
"B5G6R5_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 0 }, // Swizzle
- { 5, 6, 5, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 3, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 0}, // Swizzle
+ {5, 6, 5, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 3, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B5G5R5A1_UNORM (0x102)
{
"B5G5R5A1_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 5, 5, 5, 1 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {5, 5, 5, 1}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B5G5R5A1_UNORM_SRGB (0x103)
{
"B5G5R5A1_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 5, 5, 5, 1 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {5, 5, 5, 1}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B4G4R4A4_UNORM (0x104)
{
"B4G4R4A4_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 4, 4, 4, 4 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {4, 4, 4, 4}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B4G4R4A4_UNORM_SRGB (0x105)
{
"B4G4R4A4_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 4, 4, 4, 4 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {4, 4, 4, 4}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8_UNORM (0x106)
{
"R8G8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8_SNORM (0x107)
{
"R8G8_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 127.0f, 1.0f / 127.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 127.0f, 1.0f / 127.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8_SINT (0x108)
{
"R8G8_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8_UINT (0x109)
{
"R8G8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16_UNORM (0x10A)
{
"R16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16_SNORM (0x10B)
{
"R16_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 32767.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 32767.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16_SINT (0x10C)
{
"R16_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16_UINT (0x10D)
{
"R16_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16_FLOAT (0x10E)
{
"R16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x10F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x110)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// I16_UNORM (0x111)
{
"I16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L16_UNORM (0x112)
{
"L16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// A16_UNORM (0x113)
{
"A16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 3, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {3, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 65535.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L8A8_UNORM (0x114)
{
"L8A8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// I16_FLOAT (0x115)
{
"I16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L16_FLOAT (0x116)
{
"L16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// A16_FLOAT (0x117)
{
"A16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 3, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {3, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L8A8_UNORM_SRGB (0x118)
{
"L8A8_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, true, false, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, true, false, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x119)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// B5G5R5X1_UNORM (0x11A)
{
"B5G5R5X1_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 5, 5, 5, 1 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {5, 5, 5, 1}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B5G5R5X1_UNORM_SRGB (0x11B)
{
"B5G5R5X1_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 5, 5, 5, 1 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {5, 5, 5, 1}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8_SSCALED (0x11C)
{
"R8G8_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8_USCALED (0x11D)
{
"R8G8_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16_SSCALED (0x11E)
{
"R16_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16_USCALED (0x11F)
{
"R16_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 16, 0, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {16, 0, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x120)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x121)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x122)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x123)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// A1B5G5R5_UNORM (0x124)
{
"A1B5G5R5_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 3, 2, 1, 0 }, // Swizzle
- { 1, 5, 5, 5 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 1.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {3, 2, 1, 0}, // Swizzle
+ {1, 5, 5, 5}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 1.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// A4B4G4R4_UNORM (0x125)
{
"A4B4G4R4_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 3, 2, 1, 0 }, // Swizzle
- { 4, 4, 4, 4 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {3, 2, 1, 0}, // Swizzle
+ {4, 4, 4, 4}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L8A8_UINT (0x126)
{
"L8A8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L8A8_SINT (0x127)
{
"L8A8_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 3, 0, 0 }, // Swizzle
- { 8, 8, 0, 0 }, // Bits per component
- 16, // Bits per element
- 2, // Bytes per element
- 2, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 3, 0, 0}, // Swizzle
+ {8, 8, 0, 0}, // Bits per component
+ 16, // Bits per element
+ 2, // Bytes per element
+ 2, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x128)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x129)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x12A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x12B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x12C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x12D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x12E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x12F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x130)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x131)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x132)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x133)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x134)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x135)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x136)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x137)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x138)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x139)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x13A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x13B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x13C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x13D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x13E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x13F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R8_UNORM (0x140)
{
"R8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8_SNORM (0x141)
{
"R8_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8_SINT (0x142)
{
"R8_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8_UINT (0x143)
{
"R8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// A8_UNORM (0x144)
{
"A8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 3, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {3, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// I8_UNORM (0x145)
{
"I8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L8_UNORM (0x146)
{
"L8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x147)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x148)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R8_SSCALED (0x149)
{
"R8_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8_USCALED (0x14A)
{
"R8_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x14B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// L8_UNORM_SRGB (0x14C)
{
"L8_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x14D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x14E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
- // padding (0x14F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
+ // padding (0x14F)
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x150)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x151)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// L8_UINT (0x152)
{
"L8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// L8_SINT (0x153)
{
"L8_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// I8_UINT (0x154)
{
"I8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// I8_SINT (0x155)
{
"I8_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- true, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ true, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x156)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x157)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x158)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x159)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x15A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x15B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x15C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x15D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x15E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x15F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x160)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x161)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x162)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x163)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x164)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x165)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x166)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x167)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x168)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x169)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x16A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x16B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x16C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x16D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x16E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x16F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x170)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x171)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x172)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x173)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x174)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x175)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x176)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x177)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x178)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x179)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x17A)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x17B)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x17C)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x17D)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x17E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x17F)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// DXT1_RGB_SRGB (0x180)
{
"DXT1_RGB_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// padding (0x181)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x182)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// YCRCB_SWAPUVY (0x183)
{
"YCRCB_SWAPUVY",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- true, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 2, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ true, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 2, // bcWidth
+ 1, // bcHeight
},
// padding (0x184)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x185)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// BC1_UNORM (0x186)
{
"BC1_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC2_UNORM (0x187)
{
"BC2_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC3_UNORM (0x188)
{
"BC3_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC4_UNORM (0x189)
{
"BC4_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC5_UNORM (0x18A)
{
"BC5_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC1_UNORM_SRGB (0x18B)
{
"BC1_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 1, // Num components
+ true, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC2_UNORM_SRGB (0x18C)
{
"BC2_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ true, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC3_UNORM_SRGB (0x18D)
{
"BC3_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ true, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// padding (0x18E)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// YCRCB_SWAPUV (0x18F)
{
"YCRCB_SWAPUV",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- true, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 2, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ true, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 2, // bcWidth
+ 1, // bcHeight
},
// padding (0x190)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// DXT1_RGB (0x191)
{
"DXT1_RGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// padding (0x192)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R8G8B8_UNORM (0x193)
{
"R8G8B8_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 8, 8, 8, 0 }, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {8, 8, 8, 0}, // Bits per component
+ 24, // Bits per element
+ 3, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8_SNORM (0x194)
{
"R8G8B8_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 8, 8, 8, 0 }, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {8, 8, 8, 0}, // Bits per component
+ 24, // Bits per element
+ 3, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8_SSCALED (0x195)
{
"R8G8B8_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 8, 8, 8, 0 }, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {8, 8, 8, 0}, // Bits per component
+ 24, // Bits per element
+ 3, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8_USCALED (0x196)
{
"R8G8B8_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 8, 8, 8, 0 }, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {8, 8, 8, 0}, // Bits per component
+ 24, // Bits per element
+ 3, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R64G64B64A64_FLOAT (0x197)
{
"R64G64B64A64_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 64, 64, 64, 64 }, // Bits per component
- 256, // Bits per element
- 32, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {64, 64, 64, 64}, // Bits per component
+ 256, // Bits per element
+ 32, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R64G64B64_FLOAT (0x198)
{
"R64G64B64_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 64, 64, 64, 0 }, // Bits per component
- 192, // Bits per element
- 24, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {64, 64, 64, 0}, // Bits per component
+ 192, // Bits per element
+ 24, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// BC4_SNORM (0x199)
{
"BC4_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 64, // Bits per element
- 8, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 64, // Bits per element
+ 8, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC5_SNORM (0x19A)
{
"BC5_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// R16G16B16_FLOAT (0x19B)
{
"R16G16B16_FLOAT",
- { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 16, 16, 16, 0 }, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {16, 16, 16, 0}, // Bits per component
+ 48, // Bits per element
+ 6, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16_UNORM (0x19C)
{
"R16G16B16_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 16, 16, 16, 0 }, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {16, 16, 16, 0}, // Bits per component
+ 48, // Bits per element
+ 6, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16_SNORM (0x19D)
{
"R16G16B16_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 16, 16, 16, 0 }, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {16, 16, 16, 0}, // Bits per component
+ 48, // Bits per element
+ 6, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16_SSCALED (0x19E)
{
"R16G16B16_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 16, 16, 16, 0 }, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {16, 16, 16, 0}, // Bits per component
+ 48, // Bits per element
+ 6, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16_USCALED (0x19F)
{
"R16G16B16_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 16, 16, 16, 0 }, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {16, 16, 16, 0}, // Bits per component
+ 48, // Bits per element
+ 6, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x1A0)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// BC6H_SF16 (0x1A1)
{
"BC6H_SF16",
- { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 127.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC7_UNORM (0x1A2)
{
"BC7_UNORM",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC7_UNORM_SRGB (0x1A3)
{
"BC7_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- true, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ true, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// BC6H_UF16 (0x1A4)
{
"BC6H_UF16",
- { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 8, 8, 8 }, // Bits per component
- 128, // Bits per element
- 16, // Bytes per element
- 1, // Num components
- false, // isSRGB
- true, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, false, false, false }, // Is normalized?
- { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
- 4, // bcWidth
- 4, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 8, 8, 8}, // Bits per component
+ 128, // Bits per element
+ 16, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ true, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, false, false, false}, // Is normalized?
+ {1.0f / 255.0f, 0, 0, 0}, // To float scale factor
+ 4, // bcWidth
+ 4, // bcHeight
},
// padding (0x1A5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1A6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1A7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R8G8B8_UNORM_SRGB (0x1A8)
{
"R8G8B8_UNORM_SRGB",
- { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 8, 8, 8, 0 }, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- true, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, false }, // Is normalized?
- { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {8, 8, 8, 0}, // Bits per component
+ 24, // Bits per element
+ 3, // Bytes per element
+ 3, // Num components
+ true, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, false}, // Is normalized?
+ {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x1A9)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1AA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1AB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1AC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1AD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1AE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1AF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R16G16B16_UINT (0x1B0)
{
"R16G16B16_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 16, 16, 16, 0 }, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {16, 16, 16, 0}, // Bits per component
+ 48, // Bits per element
+ 6, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R16G16B16_SINT (0x1B1)
{
"R16G16B16_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 16, 16, 16, 0 }, // Bits per component
- 48, // Bits per element
- 6, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {16, 16, 16, 0}, // Bits per component
+ 48, // Bits per element
+ 6, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R32_SFIXED (0x1B2)
{
"R32_SFIXED",
- { SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 0, 0, 0 }, // Swizzle
- { 32, 0, 0, 0 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 0, 0, 0}, // Swizzle
+ {32, 0, 0, 0}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R10G10B10A2_SNORM (0x1B3)
{
"R10G10B10A2_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R10G10B10A2_USCALED (0x1B4)
{
"R10G10B10A2_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R10G10B10A2_SSCALED (0x1B5)
{
"R10G10B10A2_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R10G10B10A2_SINT (0x1B6)
{
"R10G10B10A2_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10A2_SNORM (0x1B7)
{
"B10G10R10A2_SNORM",
- { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { true, true, true, true }, // Is normalized?
- { 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {true, true, true, true}, // Is normalized?
+ {1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10A2_USCALED (0x1B8)
{
"B10G10R10A2_USCALED",
- { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10A2_SSCALED (0x1B9)
{
"B10G10R10A2_SSCALED",
- { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
- { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+ {0, 0, 0, 0x3f800000}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10A2_UINT (0x1BA)
{
"B10G10R10A2_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// B10G10R10A2_SINT (0x1BB)
{
"B10G10R10A2_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 2, 1, 0, 3 }, // Swizzle
- { 10, 10, 10, 2 }, // Bits per component
- 32, // Bits per element
- 4, // Bytes per element
- 4, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {2, 1, 0, 3}, // Swizzle
+ {10, 10, 10, 2}, // Bits per component
+ 32, // Bits per element
+ 4, // Bytes per element
+ 4, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 1.0f}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x1BC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1BD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1BE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1BF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C0)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C1)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C3)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1C7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// R8G8B8_UINT (0x1C8)
{
"R8G8B8_UINT",
- { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 8, 8, 8, 0 }, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {8, 8, 8, 0}, // Bits per component
+ 24, // Bits per element
+ 3, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// R8G8B8_SINT (0x1C9)
{
"R8G8B8_SINT",
- { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 0 }, // Swizzle
- { 8, 8, 8, 0 }, // Bits per component
- 24, // Bits per element
- 3, // Bytes per element
- 3, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 0}, // Swizzle
+ {8, 8, 8, 0}, // Bits per component
+ 24, // Bits per element
+ 3, // Bytes per element
+ 3, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 1.0f, 1.0f, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
// padding (0x1CA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1CB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1CC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1CD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1CE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1CF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D0)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D1)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D3)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D8)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1D9)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1DA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1DB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1DC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1DD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1DE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1DF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E0)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E1)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E3)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E8)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1E9)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1EA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1EB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1EC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1ED)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1EE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1EF)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F0)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F1)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F2)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F3)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F4)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F5)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F6)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F7)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F8)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1F9)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1FA)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1FB)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1FC)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1FD)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// padding (0x1FE)
- {
- nullptr,
- { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
- 0, 0, 0, false, false, false, false,
- { false, false, false, false },
- { 0.0f, 0.0f, 0.0f, 0.0f },
- 1, 1
- },
+ {nullptr,
+ {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ 0,
+ 0,
+ 0,
+ false,
+ false,
+ false,
+ false,
+ {false, false, false, false},
+ {0.0f, 0.0f, 0.0f, 0.0f},
+ 1,
+ 1},
// RAW (0x1FF)
{
"RAW",
- { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
- { 0, 0, 0, 0x1 }, // Defaults for missing components
- { 0, 1, 2, 3 }, // Swizzle
- { 8, 0, 0, 0 }, // Bits per component
- 8, // Bits per element
- 1, // Bytes per element
- 1, // Num components
- false, // isSRGB
- false, // isBC
- false, // isSubsampled
- false, // isLuminance
- { false, false, false, false }, // Is normalized?
- { 1.0f, 0, 0, 0 }, // To float scale factor
- 1, // bcWidth
- 1, // bcHeight
+ {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+ {0, 0, 0, 0x1}, // Defaults for missing components
+ {0, 1, 2, 3}, // Swizzle
+ {8, 0, 0, 0}, // Bits per component
+ 8, // Bits per element
+ 1, // Bytes per element
+ 1, // Num components
+ false, // isSRGB
+ false, // isBC
+ false, // isSubsampled
+ false, // isLuminance
+ {false, false, false, false}, // Is normalized?
+ {1.0f, 0, 0, 0}, // To float scale factor
+ 1, // bcWidth
+ 1, // bcHeight
},
};
-
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file formats.h
-*
-* @brief auto-generated file
-*
-* DO NOT EDIT
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file formats.h
+ *
+ * @brief auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ ******************************************************************************/
#pragma once
//////////////////////////////////////////////////////////////////////////
enum SWR_FORMAT
{
- R32G32B32A32_FLOAT = 0x0,
- R32G32B32A32_SINT = 0x1,
- R32G32B32A32_UINT = 0x2,
- R64G64_FLOAT = 0x5,
- R32G32B32X32_FLOAT = 0x6,
- R32G32B32A32_SSCALED = 0x7,
- R32G32B32A32_USCALED = 0x8,
- R32G32B32A32_SFIXED = 0x20,
- R32G32B32_FLOAT = 0x40,
- R32G32B32_SINT = 0x41,
- R32G32B32_UINT = 0x42,
- R32G32B32_SSCALED = 0x45,
- R32G32B32_USCALED = 0x46,
- R32G32B32_SFIXED = 0x50,
- R16G16B16A16_UNORM = 0x80,
- R16G16B16A16_SNORM = 0x81,
- R16G16B16A16_SINT = 0x82,
- R16G16B16A16_UINT = 0x83,
- R16G16B16A16_FLOAT = 0x84,
- R32G32_FLOAT = 0x85,
- R32G32_SINT = 0x86,
- R32G32_UINT = 0x87,
- R32_FLOAT_X8X24_TYPELESS = 0x88,
- X32_TYPELESS_G8X24_UINT = 0x89,
- L32A32_FLOAT = 0x8A,
- R64_FLOAT = 0x8D,
- R16G16B16X16_UNORM = 0x8E,
- R16G16B16X16_FLOAT = 0x8F,
- L32X32_FLOAT = 0x91,
- I32X32_FLOAT = 0x92,
- R16G16B16A16_SSCALED = 0x93,
- R16G16B16A16_USCALED = 0x94,
- R32G32_SSCALED = 0x95,
- R32G32_USCALED = 0x96,
- R32G32_SFIXED = 0xA0,
- B8G8R8A8_UNORM = 0xC0,
- B8G8R8A8_UNORM_SRGB = 0xC1,
- R10G10B10A2_UNORM = 0xC2,
- R10G10B10A2_UNORM_SRGB = 0xC3,
- R10G10B10A2_UINT = 0xC4,
- R8G8B8A8_UNORM = 0xC7,
- R8G8B8A8_UNORM_SRGB = 0xC8,
- R8G8B8A8_SNORM = 0xC9,
- R8G8B8A8_SINT = 0xCA,
- R8G8B8A8_UINT = 0xCB,
- R16G16_UNORM = 0xCC,
- R16G16_SNORM = 0xCD,
- R16G16_SINT = 0xCE,
- R16G16_UINT = 0xCF,
- R16G16_FLOAT = 0xD0,
- B10G10R10A2_UNORM = 0xD1,
- B10G10R10A2_UNORM_SRGB = 0xD2,
- R11G11B10_FLOAT = 0xD3,
- R10G10B10_FLOAT_A2_UNORM = 0xD5,
- R32_SINT = 0xD6,
- R32_UINT = 0xD7,
- R32_FLOAT = 0xD8,
- R24_UNORM_X8_TYPELESS = 0xD9,
- X24_TYPELESS_G8_UINT = 0xDA,
- L32_UNORM = 0xDD,
- L16A16_UNORM = 0xDF,
- I24X8_UNORM = 0xE0,
- L24X8_UNORM = 0xE1,
- I32_FLOAT = 0xE3,
- L32_FLOAT = 0xE4,
- A32_FLOAT = 0xE5,
- B8G8R8X8_UNORM = 0xE9,
- B8G8R8X8_UNORM_SRGB = 0xEA,
- R8G8B8X8_UNORM = 0xEB,
- R8G8B8X8_UNORM_SRGB = 0xEC,
- R9G9B9E5_SHAREDEXP = 0xED,
- B10G10R10X2_UNORM = 0xEE,
- L16A16_FLOAT = 0xF0,
- R10G10B10X2_USCALED = 0xF3,
- R8G8B8A8_SSCALED = 0xF4,
- R8G8B8A8_USCALED = 0xF5,
- R16G16_SSCALED = 0xF6,
- R16G16_USCALED = 0xF7,
- R32_SSCALED = 0xF8,
- R32_USCALED = 0xF9,
- B5G6R5_UNORM = 0x100,
- B5G6R5_UNORM_SRGB = 0x101,
- B5G5R5A1_UNORM = 0x102,
- B5G5R5A1_UNORM_SRGB = 0x103,
- B4G4R4A4_UNORM = 0x104,
- B4G4R4A4_UNORM_SRGB = 0x105,
- R8G8_UNORM = 0x106,
- R8G8_SNORM = 0x107,
- R8G8_SINT = 0x108,
- R8G8_UINT = 0x109,
- R16_UNORM = 0x10A,
- R16_SNORM = 0x10B,
- R16_SINT = 0x10C,
- R16_UINT = 0x10D,
- R16_FLOAT = 0x10E,
- I16_UNORM = 0x111,
- L16_UNORM = 0x112,
- A16_UNORM = 0x113,
- L8A8_UNORM = 0x114,
- I16_FLOAT = 0x115,
- L16_FLOAT = 0x116,
- A16_FLOAT = 0x117,
- L8A8_UNORM_SRGB = 0x118,
- B5G5R5X1_UNORM = 0x11A,
- B5G5R5X1_UNORM_SRGB = 0x11B,
- R8G8_SSCALED = 0x11C,
- R8G8_USCALED = 0x11D,
- R16_SSCALED = 0x11E,
- R16_USCALED = 0x11F,
- A1B5G5R5_UNORM = 0x124,
- A4B4G4R4_UNORM = 0x125,
- L8A8_UINT = 0x126,
- L8A8_SINT = 0x127,
- R8_UNORM = 0x140,
- R8_SNORM = 0x141,
- R8_SINT = 0x142,
- R8_UINT = 0x143,
- A8_UNORM = 0x144,
- I8_UNORM = 0x145,
- L8_UNORM = 0x146,
- R8_SSCALED = 0x149,
- R8_USCALED = 0x14A,
- L8_UNORM_SRGB = 0x14C,
- L8_UINT = 0x152,
- L8_SINT = 0x153,
- I8_UINT = 0x154,
- I8_SINT = 0x155,
- DXT1_RGB_SRGB = 0x180,
- YCRCB_SWAPUVY = 0x183,
- BC1_UNORM = 0x186,
- BC2_UNORM = 0x187,
- BC3_UNORM = 0x188,
- BC4_UNORM = 0x189,
- BC5_UNORM = 0x18A,
- BC1_UNORM_SRGB = 0x18B,
- BC2_UNORM_SRGB = 0x18C,
- BC3_UNORM_SRGB = 0x18D,
- YCRCB_SWAPUV = 0x18F,
- DXT1_RGB = 0x191,
- R8G8B8_UNORM = 0x193,
- R8G8B8_SNORM = 0x194,
- R8G8B8_SSCALED = 0x195,
- R8G8B8_USCALED = 0x196,
- R64G64B64A64_FLOAT = 0x197,
- R64G64B64_FLOAT = 0x198,
- BC4_SNORM = 0x199,
- BC5_SNORM = 0x19A,
- R16G16B16_FLOAT = 0x19B,
- R16G16B16_UNORM = 0x19C,
- R16G16B16_SNORM = 0x19D,
- R16G16B16_SSCALED = 0x19E,
- R16G16B16_USCALED = 0x19F,
- BC6H_SF16 = 0x1A1,
- BC7_UNORM = 0x1A2,
- BC7_UNORM_SRGB = 0x1A3,
- BC6H_UF16 = 0x1A4,
- R8G8B8_UNORM_SRGB = 0x1A8,
- R16G16B16_UINT = 0x1B0,
- R16G16B16_SINT = 0x1B1,
- R32_SFIXED = 0x1B2,
- R10G10B10A2_SNORM = 0x1B3,
- R10G10B10A2_USCALED = 0x1B4,
- R10G10B10A2_SSCALED = 0x1B5,
- R10G10B10A2_SINT = 0x1B6,
- B10G10R10A2_SNORM = 0x1B7,
- B10G10R10A2_USCALED = 0x1B8,
- B10G10R10A2_SSCALED = 0x1B9,
- B10G10R10A2_UINT = 0x1BA,
- B10G10R10A2_SINT = 0x1BB,
- R8G8B8_UINT = 0x1C8,
- R8G8B8_SINT = 0x1C9,
- RAW = 0x1FF,
- NUM_SWR_FORMATS = 0x200,
+ R32G32B32A32_FLOAT = 0x0,
+ R32G32B32A32_SINT = 0x1,
+ R32G32B32A32_UINT = 0x2,
+ R64G64_FLOAT = 0x5,
+ R32G32B32X32_FLOAT = 0x6,
+ R32G32B32A32_SSCALED = 0x7,
+ R32G32B32A32_USCALED = 0x8,
+ R32G32B32A32_SFIXED = 0x20,
+ R32G32B32_FLOAT = 0x40,
+ R32G32B32_SINT = 0x41,
+ R32G32B32_UINT = 0x42,
+ R32G32B32_SSCALED = 0x45,
+ R32G32B32_USCALED = 0x46,
+ R32G32B32_SFIXED = 0x50,
+ R16G16B16A16_UNORM = 0x80,
+ R16G16B16A16_SNORM = 0x81,
+ R16G16B16A16_SINT = 0x82,
+ R16G16B16A16_UINT = 0x83,
+ R16G16B16A16_FLOAT = 0x84,
+ R32G32_FLOAT = 0x85,
+ R32G32_SINT = 0x86,
+ R32G32_UINT = 0x87,
+ R32_FLOAT_X8X24_TYPELESS = 0x88,
+ X32_TYPELESS_G8X24_UINT = 0x89,
+ L32A32_FLOAT = 0x8A,
+ R64_FLOAT = 0x8D,
+ R16G16B16X16_UNORM = 0x8E,
+ R16G16B16X16_FLOAT = 0x8F,
+ L32X32_FLOAT = 0x91,
+ I32X32_FLOAT = 0x92,
+ R16G16B16A16_SSCALED = 0x93,
+ R16G16B16A16_USCALED = 0x94,
+ R32G32_SSCALED = 0x95,
+ R32G32_USCALED = 0x96,
+ R32G32_SFIXED = 0xA0,
+ B8G8R8A8_UNORM = 0xC0,
+ B8G8R8A8_UNORM_SRGB = 0xC1,
+ R10G10B10A2_UNORM = 0xC2,
+ R10G10B10A2_UNORM_SRGB = 0xC3,
+ R10G10B10A2_UINT = 0xC4,
+ R8G8B8A8_UNORM = 0xC7,
+ R8G8B8A8_UNORM_SRGB = 0xC8,
+ R8G8B8A8_SNORM = 0xC9,
+ R8G8B8A8_SINT = 0xCA,
+ R8G8B8A8_UINT = 0xCB,
+ R16G16_UNORM = 0xCC,
+ R16G16_SNORM = 0xCD,
+ R16G16_SINT = 0xCE,
+ R16G16_UINT = 0xCF,
+ R16G16_FLOAT = 0xD0,
+ B10G10R10A2_UNORM = 0xD1,
+ B10G10R10A2_UNORM_SRGB = 0xD2,
+ R11G11B10_FLOAT = 0xD3,
+ R10G10B10_FLOAT_A2_UNORM = 0xD5,
+ R32_SINT = 0xD6,
+ R32_UINT = 0xD7,
+ R32_FLOAT = 0xD8,
+ R24_UNORM_X8_TYPELESS = 0xD9,
+ X24_TYPELESS_G8_UINT = 0xDA,
+ L32_UNORM = 0xDD,
+ L16A16_UNORM = 0xDF,
+ I24X8_UNORM = 0xE0,
+ L24X8_UNORM = 0xE1,
+ I32_FLOAT = 0xE3,
+ L32_FLOAT = 0xE4,
+ A32_FLOAT = 0xE5,
+ B8G8R8X8_UNORM = 0xE9,
+ B8G8R8X8_UNORM_SRGB = 0xEA,
+ R8G8B8X8_UNORM = 0xEB,
+ R8G8B8X8_UNORM_SRGB = 0xEC,
+ R9G9B9E5_SHAREDEXP = 0xED,
+ B10G10R10X2_UNORM = 0xEE,
+ L16A16_FLOAT = 0xF0,
+ R10G10B10X2_USCALED = 0xF3,
+ R8G8B8A8_SSCALED = 0xF4,
+ R8G8B8A8_USCALED = 0xF5,
+ R16G16_SSCALED = 0xF6,
+ R16G16_USCALED = 0xF7,
+ R32_SSCALED = 0xF8,
+ R32_USCALED = 0xF9,
+ B5G6R5_UNORM = 0x100,
+ B5G6R5_UNORM_SRGB = 0x101,
+ B5G5R5A1_UNORM = 0x102,
+ B5G5R5A1_UNORM_SRGB = 0x103,
+ B4G4R4A4_UNORM = 0x104,
+ B4G4R4A4_UNORM_SRGB = 0x105,
+ R8G8_UNORM = 0x106,
+ R8G8_SNORM = 0x107,
+ R8G8_SINT = 0x108,
+ R8G8_UINT = 0x109,
+ R16_UNORM = 0x10A,
+ R16_SNORM = 0x10B,
+ R16_SINT = 0x10C,
+ R16_UINT = 0x10D,
+ R16_FLOAT = 0x10E,
+ I16_UNORM = 0x111,
+ L16_UNORM = 0x112,
+ A16_UNORM = 0x113,
+ L8A8_UNORM = 0x114,
+ I16_FLOAT = 0x115,
+ L16_FLOAT = 0x116,
+ A16_FLOAT = 0x117,
+ L8A8_UNORM_SRGB = 0x118,
+ B5G5R5X1_UNORM = 0x11A,
+ B5G5R5X1_UNORM_SRGB = 0x11B,
+ R8G8_SSCALED = 0x11C,
+ R8G8_USCALED = 0x11D,
+ R16_SSCALED = 0x11E,
+ R16_USCALED = 0x11F,
+ A1B5G5R5_UNORM = 0x124,
+ A4B4G4R4_UNORM = 0x125,
+ L8A8_UINT = 0x126,
+ L8A8_SINT = 0x127,
+ R8_UNORM = 0x140,
+ R8_SNORM = 0x141,
+ R8_SINT = 0x142,
+ R8_UINT = 0x143,
+ A8_UNORM = 0x144,
+ I8_UNORM = 0x145,
+ L8_UNORM = 0x146,
+ R8_SSCALED = 0x149,
+ R8_USCALED = 0x14A,
+ L8_UNORM_SRGB = 0x14C,
+ L8_UINT = 0x152,
+ L8_SINT = 0x153,
+ I8_UINT = 0x154,
+ I8_SINT = 0x155,
+ DXT1_RGB_SRGB = 0x180,
+ YCRCB_SWAPUVY = 0x183,
+ BC1_UNORM = 0x186,
+ BC2_UNORM = 0x187,
+ BC3_UNORM = 0x188,
+ BC4_UNORM = 0x189,
+ BC5_UNORM = 0x18A,
+ BC1_UNORM_SRGB = 0x18B,
+ BC2_UNORM_SRGB = 0x18C,
+ BC3_UNORM_SRGB = 0x18D,
+ YCRCB_SWAPUV = 0x18F,
+ DXT1_RGB = 0x191,
+ R8G8B8_UNORM = 0x193,
+ R8G8B8_SNORM = 0x194,
+ R8G8B8_SSCALED = 0x195,
+ R8G8B8_USCALED = 0x196,
+ R64G64B64A64_FLOAT = 0x197,
+ R64G64B64_FLOAT = 0x198,
+ BC4_SNORM = 0x199,
+ BC5_SNORM = 0x19A,
+ R16G16B16_FLOAT = 0x19B,
+ R16G16B16_UNORM = 0x19C,
+ R16G16B16_SNORM = 0x19D,
+ R16G16B16_SSCALED = 0x19E,
+ R16G16B16_USCALED = 0x19F,
+ BC6H_SF16 = 0x1A1,
+ BC7_UNORM = 0x1A2,
+ BC7_UNORM_SRGB = 0x1A3,
+ BC6H_UF16 = 0x1A4,
+ R8G8B8_UNORM_SRGB = 0x1A8,
+ R16G16B16_UINT = 0x1B0,
+ R16G16B16_SINT = 0x1B1,
+ R32_SFIXED = 0x1B2,
+ R10G10B10A2_SNORM = 0x1B3,
+ R10G10B10A2_USCALED = 0x1B4,
+ R10G10B10A2_SSCALED = 0x1B5,
+ R10G10B10A2_SINT = 0x1B6,
+ B10G10R10A2_SNORM = 0x1B7,
+ B10G10R10A2_USCALED = 0x1B8,
+ B10G10R10A2_SSCALED = 0x1B9,
+ B10G10R10A2_UINT = 0x1BA,
+ B10G10R10A2_SINT = 0x1BB,
+ R8G8B8_UINT = 0x1C8,
+ R8G8B8_SINT = 0x1C9,
+ RAW = 0x1FF,
+ NUM_SWR_FORMATS = 0x200,
};
//////////////////////////////////////////////////////////////////////////
// lookup table for unorm8 srgb -> float conversion
extern const uint32_t srgb8Table[256];
-
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#ifndef __SWR_INTRIN_H__
#define __SWR_INTRIN_H__
#if !defined(SIMD_ARCH)
#define SIMD_ARCH KNOB_ARCH
-#endif
+#endif
#include "simdlib_types.hpp"
-typedef SIMDImpl::SIMD128Impl::Float simd4scalar;
-typedef SIMDImpl::SIMD128Impl::Double simd4scalard;
-typedef SIMDImpl::SIMD128Impl::Integer simd4scalari;
-typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector;
-typedef SIMDImpl::SIMD128Impl::Mask simd4mask;
-
-typedef SIMDImpl::SIMD256Impl::Float simd8scalar;
-typedef SIMDImpl::SIMD256Impl::Double simd8scalard;
-typedef SIMDImpl::SIMD256Impl::Integer simd8scalari;
-typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector;
-typedef SIMDImpl::SIMD256Impl::Mask simd8mask;
-
-typedef SIMDImpl::SIMD512Impl::Float simd16scalar;
-typedef SIMDImpl::SIMD512Impl::Double simd16scalard;
-typedef SIMDImpl::SIMD512Impl::Integer simd16scalari;
-typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector;
-typedef SIMDImpl::SIMD512Impl::Mask simd16mask;
-
-#if KNOB_SIMD_WIDTH == 8
-typedef simd8scalar simdscalar;
-typedef simd8scalard simdscalard;
-typedef simd8scalari simdscalari;
-typedef simd8vector simdvector;
-typedef simd8mask simdmask;
+typedef SIMDImpl::SIMD128Impl::Float simd4scalar;
+typedef SIMDImpl::SIMD128Impl::Double simd4scalard;
+typedef SIMDImpl::SIMD128Impl::Integer simd4scalari;
+typedef SIMDImpl::SIMD128Impl::Vec4 simd4vector;
+typedef SIMDImpl::SIMD128Impl::Mask simd4mask;
+
+typedef SIMDImpl::SIMD256Impl::Float simd8scalar;
+typedef SIMDImpl::SIMD256Impl::Double simd8scalard;
+typedef SIMDImpl::SIMD256Impl::Integer simd8scalari;
+typedef SIMDImpl::SIMD256Impl::Vec4 simd8vector;
+typedef SIMDImpl::SIMD256Impl::Mask simd8mask;
+
+typedef SIMDImpl::SIMD512Impl::Float simd16scalar;
+typedef SIMDImpl::SIMD512Impl::Double simd16scalard;
+typedef SIMDImpl::SIMD512Impl::Integer simd16scalari;
+typedef SIMDImpl::SIMD512Impl::Vec4 simd16vector;
+typedef SIMDImpl::SIMD512Impl::Mask simd16mask;
+
+#if KNOB_SIMD_WIDTH == 8
+typedef simd8scalar simdscalar;
+typedef simd8scalard simdscalard;
+typedef simd8scalari simdscalari;
+typedef simd8vector simdvector;
+typedef simd8mask simdmask;
#else
#error Unsupported vector width
#endif
#else
UINT result = 0;
- // copied from http://wm.ite.pl/articles/pdep-soft-emu.html
+ // copied from http://wm.ite.pl/articles/pdep-soft-emu.html
// using bsf instead of funky loop
DWORD maskIndex;
while (_BitScanForward(&maskIndex, mask))
#if KNOB_ARCH >= KNOB_ARCH_AVX2
return _pext_u32(a, mask);
#else
- UINT result = 0;
- DWORD maskIndex;
+ UINT result = 0;
+ DWORD maskIndex;
uint32_t currentBit = 0;
while (_BitScanForward(&maskIndex, mask))
{
#endif
}
-#endif//__SWR_INTRIN_H__
+#endif //__SWR_INTRIN_H__
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#pragma once
class InstructionSet
{
public:
- InstructionSet() : CPU_Rep() {};
+ InstructionSet() : CPU_Rep(){};
// getters
std::string Vendor(void) { return CPU_Rep.vendor_; }
class InstructionSet_Internal
{
public:
- InstructionSet_Internal()
- : nIds_{ 0 },
- nExIds_{ 0 },
- isIntel_{ false },
- isAMD_{ false },
- f_1_ECX_{ 0 },
- f_1_EDX_{ 0 },
- f_7_EBX_{ 0 },
- f_7_ECX_{ 0 },
- f_81_ECX_{ 0 },
- f_81_EDX_{ 0 },
- data_{},
- extdata_{}
+ InstructionSet_Internal() :
+ nIds_{0}, nExIds_{0}, isIntel_{false}, isAMD_{false}, f_1_ECX_{0}, f_1_EDX_{0},
+ f_7_EBX_{0}, f_7_ECX_{0}, f_81_ECX_{0}, f_81_EDX_{0}, data_{}, extdata_{}
{
- //int cpuInfo[4] = {-1};
+ // int cpuInfo[4] = {-1};
std::array<int, 4> cpui;
// Calling __cpuid with 0x0 as the function_id argument
#if defined(_MSC_VER) && !defined(__clang__)
__cpuidex(cpui.data(), i, 0);
#else
- int *data = cpui.data();
+ int* data = cpui.data();
__cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
#endif
data_.push_back(cpui);
// Capture vendor string
char vendor[0x20];
memset(vendor, 0, sizeof(vendor));
- *reinterpret_cast<int*>(vendor) = data_[0][1];
+ *reinterpret_cast<int*>(vendor) = data_[0][1];
*reinterpret_cast<int*>(vendor + 4) = data_[0][3];
*reinterpret_cast<int*>(vendor + 8) = data_[0][2];
- vendor_ = vendor;
+ vendor_ = vendor;
if (vendor_ == "GenuineIntel")
{
isIntel_ = true;
#if defined(_MSC_VER) && !defined(__clang__)
__cpuidex(cpui.data(), i, 0);
#else
- int *data = cpui.data();
+ int* data = cpui.data();
__cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
#endif
extdata_.push_back(cpui);
}
};
- int nIds_;
- unsigned nExIds_;
- std::string vendor_;
- std::string brand_;
- bool isIntel_;
- bool isAMD_;
- std::bitset<32> f_1_ECX_;
- std::bitset<32> f_1_EDX_;
- std::bitset<32> f_7_EBX_;
- std::bitset<32> f_7_ECX_;
- std::bitset<32> f_81_ECX_;
- std::bitset<32> f_81_EDX_;
+ int nIds_;
+ unsigned nExIds_;
+ std::string vendor_;
+ std::string brand_;
+ bool isIntel_;
+ bool isAMD_;
+ std::bitset<32> f_1_ECX_;
+ std::bitset<32> f_1_EDX_;
+ std::bitset<32> f_7_EBX_;
+ std::bitset<32> f_7_ECX_;
+ std::bitset<32> f_81_ECX_;
+ std::bitset<32> f_81_EDX_;
std::vector<std::array<int, 4>> data_;
std::vector<std::array<int, 4>> extdata_;
};
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#include "common/os.h"
#include <vector>
#include <pthread.h>
#endif // Linux
-
-
#if defined(_WIN32)
static const DWORD MS_VC_EXCEPTION = 0x406D1388;
-#pragma pack(push,8)
+#pragma pack(push, 8)
typedef struct tagTHREADNAME_INFO
{
- DWORD dwType; // Must be 0x1000.
- LPCSTR szName; // Pointer to name (in user addr space).
- DWORD dwThreadID; // Thread ID (-1=caller thread).
- DWORD dwFlags; // Reserved for future use, must be zero.
+ DWORD dwType; // Must be 0x1000.
+ LPCSTR szName; // Pointer to name (in user addr space).
+ DWORD dwThreadID; // Thread ID (-1=caller thread).
+ DWORD dwFlags; // Reserved for future use, must be zero.
} THREADNAME_INFO;
#pragma pack(pop)
void LegacySetThreadName(const char* pThreadName)
{
THREADNAME_INFO info;
- info.dwType = 0x1000;
- info.szName = pThreadName;
+ info.dwType = 0x1000;
+ info.szName = pThreadName;
info.dwThreadID = GetCurrentThreadId();
- info.dwFlags = 0;
+ info.dwFlags = 0;
if (!IsDebuggerPresent())
{
return;
}
-#pragma warning(push)
-#pragma warning(disable: 6320 6322)
- __try {
+#pragma warning(push)
+#pragma warning(disable : 6320 6322)
+ __try
+ {
RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info);
}
- __except (EXCEPTION_EXECUTE_HANDLER) {
+ __except (EXCEPTION_EXECUTE_HANDLER)
+ {
}
-#pragma warning(pop)
+#pragma warning(pop)
}
#endif // _WIN32
{
#if defined(_WIN32)
// The SetThreadDescription API was brought in version 1607 of Windows 10.
- typedef HRESULT(WINAPI* PFNSetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription);
+ typedef HRESULT(WINAPI * PFNSetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription);
// The SetThreadDescription API works even if no debugger is attached.
- auto pfnSetThreadDescription =
- reinterpret_cast<PFNSetThreadDescription>(
- GetProcAddress(GetModuleHandleA("Kernel32.dll"), "SetThreadDescription"));
+ auto pfnSetThreadDescription = reinterpret_cast<PFNSetThreadDescription>(
+ GetProcAddress(GetModuleHandleA("Kernel32.dll"), "SetThreadDescription"));
if (!pfnSetThreadDescription)
{
// try KernelBase.dll
- pfnSetThreadDescription =
- reinterpret_cast<PFNSetThreadDescription>(
- GetProcAddress(GetModuleHandleA("KernelBase.dll"), "SetThreadDescription"));
+ pfnSetThreadDescription = reinterpret_cast<PFNSetThreadDescription>(
+ GetProcAddress(GetModuleHandleA("KernelBase.dll"), "SetThreadDescription"));
}
if (pfnSetThreadDescription)
{
- std::string utf8Name = pThreadName;
+ std::string utf8Name = pThreadName;
std::wstring wideName;
wideName.resize(utf8Name.size() + 1);
swprintf_s(&(wideName.front()), wideName.size(), L"%S", utf8Name.c_str());
#endif // Linux
}
-static void SplitString(std::vector<std::string>& out_segments, const std::string& input, char splitToken)
+static void
+SplitString(std::vector<std::string>& out_segments, const std::string& input, char splitToken)
{
out_segments.clear();
std::istringstream f(input);
- std::string s;
+ std::string s;
while (std::getline(f, s, splitToken))
{
if (s.size())
/// Execute Command (block until finished)
/// @returns process exit value
-int SWR_API ExecCmd(
- const std::string& cmd, ///< (In) Command line string
- const char* pOptEnvStrings, ///< (Optional In) Environment block for new process
- std::string* pOptStdOut, ///< (Optional Out) Standard Output text
- std::string* pOptStdErr, ///< (Optional Out) Standard Error text
- const std::string* pOptStdIn) ///< (Optional In) Standard Input text
+int SWR_API ExecCmd(const std::string& cmd, ///< (In) Command line string
+ const char* pOptEnvStrings, ///< (Optional In) Environment block for new process
+ std::string* pOptStdOut, ///< (Optional Out) Standard Output text
+ std::string* pOptStdErr, ///< (Optional Out) Standard Error text
+ const std::string* pOptStdIn) ///< (Optional In) Standard Input text
{
int rvalue = -1;
};
std::array<WinPipe, 3> hPipes = {};
- SECURITY_ATTRIBUTES saAttr = { sizeof(SECURITY_ATTRIBUTES) };
- saAttr.bInheritHandle = TRUE; //Pipe handles are inherited by child process.
+ SECURITY_ATTRIBUTES saAttr = {sizeof(SECURITY_ATTRIBUTES)};
+ saAttr.bInheritHandle = TRUE; // Pipe handles are inherited by child process.
saAttr.lpSecurityDescriptor = NULL;
{
}
STARTUPINFOA StartupInfo{};
- StartupInfo.cb = sizeof(STARTUPINFOA);
+ StartupInfo.cb = sizeof(STARTUPINFOA);
StartupInfo.dwFlags = STARTF_USESTDHANDLES;
StartupInfo.dwFlags |= STARTF_USESHOWWINDOW;
StartupInfo.wShowWindow = SW_HIDE;
StartupInfo.hStdInput = hPipes[0].hRead;
}
StartupInfo.hStdOutput = hPipes[1].hWrite;
- StartupInfo.hStdError = hPipes[2].hWrite;
+ StartupInfo.hStdError = hPipes[2].hWrite;
PROCESS_INFORMATION procInfo{};
// CreateProcess can modify the string
std::string local_cmd = cmd;
- BOOL ProcessValue = CreateProcessA(
- NULL,
- (LPSTR)local_cmd.c_str(),
- NULL,
- NULL,
- TRUE,
- 0,
- (LPVOID)pOptEnvStrings,
- NULL,
- &StartupInfo,
- &procInfo);
+ BOOL ProcessValue = CreateProcessA(NULL,
+ (LPSTR)local_cmd.c_str(),
+ NULL,
+ NULL,
+ TRUE,
+ 0,
+ (LPVOID)pOptEnvStrings,
+ NULL,
+ &StartupInfo,
+ &procInfo);
if (ProcessValue && procInfo.hProcess)
{
- auto ReadFromPipe = [](HANDLE hPipe, std::string* pOutStr)
- {
- char buf[1024];
- DWORD dwRead = 0;
+ auto ReadFromPipe = [](HANDLE hPipe, std::string* pOutStr) {
+ char buf[1024];
+ DWORD dwRead = 0;
DWORD dwAvail = 0;
while (true)
{
break;
}
- if (!::ReadFile(hPipe, buf, std::min<size_t>(sizeof(buf) - 1, size_t(dwAvail)), &dwRead, NULL) || !dwRead)
+ if (!::ReadFile(hPipe,
+ buf,
+ std::min<size_t>(sizeof(buf) - 1, size_t(dwAvail)),
+ &dwRead,
+ NULL) ||
+ !dwRead)
{
// error, the child process might ended
break;
}
}
};
- bool bProcessEnded = false;
- size_t bytesWritten = 0;
+ bool bProcessEnded = false;
+ size_t bytesWritten = 0;
do
{
if (pOptStdIn && (pOptStdIn->size() > bytesWritten))
{
DWORD bytesToWrite = static_cast<DWORD>(pOptStdIn->size()) - bytesWritten;
- if (!::WriteFile(
- hPipes[0].hWrite,
- pOptStdIn->data() + bytesWritten,
- bytesToWrite, &bytesToWrite, nullptr))
+ if (!::WriteFile(hPipes[0].hWrite,
+ pOptStdIn->data() + bytesWritten,
+ bytesToWrite,
+ &bytesToWrite,
+ nullptr))
{
// Failed to write to pipe
break;
ReadFromPipe(hPipes[1].hRead, pOptStdOut);
ReadFromPipe(hPipes[2].hRead, pOptStdErr);
- }
- while (!bProcessEnded);
+ } while (!bProcessEnded);
DWORD exitVal = 0;
if (!GetExitCodeProcess(procInfo.hProcess, &exitVal))
/****************************************************************************
-* Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#ifndef __SWR_OS_H__
#define __SWR_OS_H__
#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
#define SWR_API __cdecl
-#define SWR_VISIBLE __declspec(dllexport)
+#define SWR_VISIBLE __declspec(dllexport)
#ifndef NOMINMAX
#define NOMINMAX
#define DEBUGBREAK __debugbreak()
#define PRAGMA_WARNING_PUSH_DISABLE(...) \
- __pragma(warning(push));\
- __pragma(warning(disable:__VA_ARGS__));
+ __pragma(warning(push)); \
+ __pragma(warning(disable : __VA_ARGS__));
#define PRAGMA_WARNING_POP() __pragma(warning(pop))
-static inline void *AlignedMalloc(size_t _Size, size_t _Alignment)
+static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)
{
return _aligned_malloc(_Size, _Alignment);
}
#include <stdio.h>
#include <limits.h>
-typedef void VOID;
-typedef void* LPVOID;
-typedef int INT;
-typedef unsigned int UINT;
-typedef void* HANDLE;
-typedef int LONG;
-typedef unsigned int DWORD;
+typedef void VOID;
+typedef void* LPVOID;
+typedef int INT;
+typedef unsigned int UINT;
+typedef void* HANDLE;
+typedef int LONG;
+typedef unsigned int DWORD;
#undef FALSE
#define FALSE 0
#ifndef INLINE
#define INLINE __inline
#endif
-#define DEBUGBREAK asm ("int $3")
+#define DEBUGBREAK asm("int $3")
#if !defined(__CYGWIN__)
#endif
#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
- #define __declspec(x) __declspec_##x
- #define __declspec_align(y) __attribute__((aligned(y)))
- #define __declspec_deprecated __attribute__((deprecated))
- #define __declspec_dllexport
- #define __declspec_dllimport
- #define __declspec_noinline __attribute__((__noinline__))
- #define __declspec_nothrow __attribute__((nothrow))
- #define __declspec_novtable
- #define __declspec_thread __thread
+#define __declspec(x) __declspec_##x
+#define __declspec_align(y) __attribute__((aligned(y)))
+#define __declspec_deprecated __attribute__((deprecated))
+#define __declspec_dllexport
+#define __declspec_dllimport
+#define __declspec_noinline __attribute__((__noinline__))
+#define __declspec_nothrow __attribute__((nothrow))
+#define __declspec_novtable
+#define __declspec_thread __thread
#else
- #define __declspec(X)
+#define __declspec(X)
#endif
#endif
-#define GCC_VERSION (__GNUC__ * 10000 \
- + __GNUC_MINOR__ * 100 \
- + __GNUC_PATCHLEVEL__)
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
-inline
-uint64_t __rdtsc()
+inline uint64_t __rdtsc()
{
long low, high;
asm volatile("rdtsc" : "=a"(low), "=d"(high));
}
#endif
-#if !defined( __clang__) && !defined(__INTEL_COMPILER)
+#if !defined(__clang__) && !defined(__INTEL_COMPILER)
// Intrinsic not defined in gcc
-static INLINE
-void _mm256_storeu2_m128i(__m128i *hi, __m128i *lo, __m256i a)
+static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
{
_mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
_mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
#endif
#endif
-inline
-unsigned char _BitScanForward(unsigned long *Index, unsigned long Mask)
+inline unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask)
{
*Index = __builtin_ctz(Mask);
return (Mask != 0);
}
-inline
-unsigned char _BitScanForward(unsigned int *Index, unsigned int Mask)
+inline unsigned char _BitScanForward(unsigned int* Index, unsigned int Mask)
{
*Index = __builtin_ctz(Mask);
return (Mask != 0);
}
-inline
-unsigned char _BitScanReverse(unsigned long *Index, unsigned long Mask)
+inline unsigned char _BitScanReverse(unsigned long* Index, unsigned long Mask)
{
*Index = __builtin_clz(Mask);
return (Mask != 0);
}
-inline
-unsigned char _BitScanReverse(unsigned int *Index, unsigned int Mask)
+inline unsigned char _BitScanReverse(unsigned int* Index, unsigned int Mask)
{
*Index = __builtin_clz(Mask);
return (Mask != 0);
#define _BitScanForward64 _BitScanForward
#define _BitScanReverse64 _BitScanReverse
-inline
-void *AlignedMalloc(size_t size, size_t alignment)
+inline void* AlignedMalloc(size_t size, size_t alignment)
{
- void *ret;
+ void* ret;
if (posix_memalign(&ret, alignment, size))
{
return NULL;
return ret;
}
-static inline
-void AlignedFree(void* p)
+static inline void AlignedFree(void* p)
{
free(p);
}
-#define _countof(a) (sizeof(a)/sizeof(*(a)))
+#define _countof(a) (sizeof(a) / sizeof(*(a)))
#define sprintf_s sprintf
-#define strcpy_s(dst,size,src) strncpy(dst,src,size)
+#define strcpy_s(dst, size, src) strncpy(dst, src, size)
#define GetCurrentProcessId getpid
-#define InterlockedCompareExchange(Dest, Exchange, Comparand) __sync_val_compare_and_swap(Dest, Comparand, Exchange)
+#define InterlockedCompareExchange(Dest, Exchange, Comparand) \
+ __sync_val_compare_and_swap(Dest, Comparand, Exchange)
#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
#define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
#define THREAD thread_local
// Universal types
-typedef uint8_t KILOBYTE[1024];
-typedef KILOBYTE MEGABYTE[1024];
-typedef MEGABYTE GIGABYTE[1024];
+typedef uint8_t KILOBYTE[1024];
+typedef KILOBYTE MEGABYTE[1024];
+typedef MEGABYTE GIGABYTE[1024];
#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
#define ATTR_UNUSED
#endif
-#define SWR_FUNC(_retType, _funcName, /* args */...) \
- typedef _retType (SWR_API * PFN##_funcName)(__VA_ARGS__); \
- _retType SWR_API _funcName(__VA_ARGS__);
+#define SWR_FUNC(_retType, _funcName, /* args */...) \
+ typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
+ _retType SWR_API _funcName(__VA_ARGS__);
// Defined in os.cpp
void SWR_API SetCurrentThreadName(const char* pThreadName);
/// Execute Command (block until finished)
/// @returns process exit value
-int SWR_API ExecCmd(
- const std::string& cmd, ///< (In) Command line string
- const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
- std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text
- std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
- const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
-
-#endif//__SWR_OS_H__
+int SWR_API
+ ExecCmd(const std::string& cmd, ///< (In) Command line string
+ const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
+ std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text
+ std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
+ const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
+
+#endif //__SWR_OS_H__
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rdtsc_buckets.cpp
-*
-* @brief implementation of rdtsc buckets.
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rdtsc_buckets.cpp
+ *
+ * @brief implementation of rdtsc buckets.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "rdtsc_buckets.h"
#include <inttypes.h>
BUCKET_THREAD newThread;
newThread.name = name;
newThread.root.children.reserve(mBuckets.size());
- newThread.root.id = 0;
+ newThread.root.id = 0;
newThread.root.pParent = nullptr;
- newThread.pCurrent = &newThread.root;
+ newThread.pCurrent = &newThread.root;
mThreadMutex.lock();
// assign unique thread id for this thread
- size_t id = mThreads.size();
+ size_t id = mThreads.size();
newThread.id = (UINT)id;
- tlsThreadId = (UINT)id;
+ tlsThreadId = (UINT)id;
// store new thread
mThreads.push_back(newThread);
return (UINT)id;
}
-void BucketManager::PrintBucket(FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket)
+void BucketManager::PrintBucket(
+ FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket)
{
- const char *arrows[] = {
+ const char* arrows[] = {
"",
"|-> ",
" |-> ",
// compute average cycle count per invocation
uint64_t CPE = bucket.elapsed / bucket.count;
- BUCKET_DESC &desc = mBuckets[bucket.id];
+ BUCKET_DESC& desc = mBuckets[bucket.id];
// construct hierarchy visualization
char hier[80];
strcat(hier, desc.name.c_str());
// print out
- fprintf(f, "%6.2f %6.2f %-10" PRIu64 " %-10" PRIu64 " %-10u %-10lu %-10u %s\n",
- percentTotal,
- percentParent,
- bucket.elapsed,
- CPE,
- bucket.count,
- (unsigned long)0,
- (uint32_t)0,
- hier
- );
+ fprintf(f,
+ "%6.2f %6.2f %-10" PRIu64 " %-10" PRIu64 " %-10u %-10lu %-10u %s\n",
+ percentTotal,
+ percentParent,
+ bucket.elapsed,
+ CPE,
+ bucket.count,
+ (unsigned long)0,
+ (uint32_t)0,
+ hier);
// dump all children of this bucket
for (const BUCKET& child : bucket.children)
fprintf(f, " %%Tot %%Par Cycles CPE NumEvent CPE2 NumEvent2 Bucket\n");
// compute thread level total cycle counts across all buckets from root
- const BUCKET& root = thread.root;
- uint64_t totalCycles = 0;
+ const BUCKET& root = thread.root;
+ uint64_t totalCycles = 0;
for (const BUCKET& child : root.children)
{
totalCycles += child.elapsed;
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rdtsc_buckets.h
-*
-* @brief declaration for rdtsc buckets.
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rdtsc_buckets.h
+ *
+ * @brief declaration for rdtsc buckets.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "os.h"
class BucketManager
{
public:
- BucketManager() { }
+ BucketManager() {}
~BucketManager();
// removes all registered thread data
// @param id generated by RegisterBucket
INLINE void StartBucket(UINT id)
{
- if (!mCapturing) return;
+ if (!mCapturing)
+ return;
SWR_ASSERT(tlsThreadId < mThreads.size());
{
bt.pCurrent->children.resize(mBuckets.size());
}
- BUCKET &child = bt.pCurrent->children[id];
+ BUCKET& child = bt.pCurrent->children[id];
child.pParent = bt.pCurrent;
- child.id = id;
- child.start = tsc;
+ child.id = id;
+ child.start = tsc;
// update thread's currently executing bucket
bt.pCurrent = &child;
INLINE void StopBucket(UINT id)
{
SWR_ASSERT(tlsThreadId < mThreads.size());
- BUCKET_THREAD &bt = mThreads[tlsThreadId];
+ BUCKET_THREAD& bt = mThreads[tlsThreadId];
if (bt.level == 0)
{
uint64_t tsc = __rdtsc();
{
- if (bt.pCurrent->start == 0) return;
+ if (bt.pCurrent->start == 0)
+ return;
SWR_ASSERT(bt.pCurrent->id == id, "Mismatched buckets detected");
bt.pCurrent->elapsed += (tsc - bt.pCurrent->start);
INLINE void AddEvent(uint32_t id, uint32_t count)
{
- if (!mCapturing) return;
+ if (!mCapturing)
+ return;
SWR_ASSERT(tlsThreadId < mThreads.size());
{
bt.pCurrent->children.resize(mBuckets.size());
}
- BUCKET &child = bt.pCurrent->children[id];
+ BUCKET& child = bt.pCurrent->children[id];
child.pParent = bt.pCurrent;
- child.id = id;
+ child.id = id;
child.count += count;
}
}
private:
- void PrintBucket(FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket);
+ void PrintBucket(
+ FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket);
void PrintThread(FILE* f, const BUCKET_THREAD& thread);
// list of active threads that have registered with this manager
std::vector<BUCKET_DESC> mBuckets;
// is capturing currently enabled
- volatile bool mCapturing{ false };
+ volatile bool mCapturing{false};
// has capturing completed
- volatile bool mDoneCapturing{ false };
+ volatile bool mDoneCapturing{false};
std::mutex mThreadMutex;
};
-
// C helpers for jitter
void BucketManager_StartBucket(BucketManager* pBucketMgr, uint32_t id);
void BucketManager_StopBucket(BucketManager* pBucketMgr, uint32_t id);
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rdtsc_buckets.h
-*
-* @brief declaration for rdtsc buckets.
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rdtsc_buckets.h
+ *
+ * @brief declaration for rdtsc buckets.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include <vector>
struct BUCKET
{
- uint32_t id{ 0 };
- uint64_t start{ 0 };
- uint64_t elapsed{ 0 };
- uint32_t count{ 0 };
+ uint32_t id{0};
+ uint64_t start{0};
+ uint64_t elapsed{0};
+ uint32_t count{0};
- BUCKET* pParent{ nullptr };
+ BUCKET* pParent{nullptr};
std::vector<BUCKET> children;
};
std::string name;
// id for this thread, assigned by the thread manager
- uint32_t id{ 0 };
+ uint32_t id{0};
// root of the bucket hierarchy for this thread
BUCKET root;
// currently executing bucket somewhere in the hierarchy
- BUCKET* pCurrent{ nullptr };
+ BUCKET* pCurrent{nullptr};
// currently executing hierarchy level
- uint32_t level{ 0 };
+ uint32_t level{0};
// threadviz file object
- FILE* vizFile{ nullptr };
+ FILE* vizFile{nullptr};
BUCKET_THREAD() {}
BUCKET_THREAD(const BUCKET_THREAD& that)
{
- name = that.name;
- id = that.id;
- root = that.root;
+ name = that.name;
+ id = that.id;
+ root = that.root;
pCurrent = &root;
- vizFile = that.vizFile;
+ vizFile = that.vizFile;
}
};
struct VIZ_START_DATA
{
- uint8_t type;
+ uint8_t type;
uint32_t bucketId;
uint64_t timestamp;
};
struct VIZ_STOP_DATA
{
- uint8_t type;
+ uint8_t type;
uint64_t timestamp;
};
inline void Deserialize(FILE* f, std::string& string)
{
- char cstr[256];
+ char cstr[256];
uint8_t length;
fread(&length, sizeof(length), 1, f);
fread(cstr, length, 1, f);
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#ifndef __SWR_SIMD16INTRIN_H__
#define __SWR_SIMD16INTRIN_H__
#if ENABLE_AVX512_SIMD16
#if KNOB_SIMD16_WIDTH == 16
-typedef SIMD512 SIMD16;
+typedef SIMD512 SIMD16;
#else
#error Unsupported vector width
-#endif//KNOB_SIMD16_WIDTH == 16
+#endif // KNOB_SIMD16_WIDTH == 16
-#define _simd16_setzero_ps SIMD16::setzero_ps
-#define _simd16_setzero_si SIMD16::setzero_si
-#define _simd16_set1_ps SIMD16::set1_ps
-#define _simd16_set1_epi8 SIMD16::set1_epi8
-#define _simd16_set1_epi32 SIMD16::set1_epi32
-#define _simd16_set_ps SIMD16::set_ps
-#define _simd16_set_epi32 SIMD16::set_epi32
-#define _simd16_load_ps SIMD16::load_ps
-#define _simd16_loadu_ps SIMD16::loadu_ps
-#if 1
-#define _simd16_load1_ps SIMD16::broadcast_ss
-#endif
-#define _simd16_load_si SIMD16::load_si
-#define _simd16_loadu_si SIMD16::loadu_si
-#define _simd16_broadcast_ss(m) SIMD16::broadcast_ss((float const*)m)
-#define _simd16_store_ps SIMD16::store_ps
-#define _simd16_store_si SIMD16::store_si
-#define _simd16_extract_ps(a, imm8) SIMD16::extract_ps<imm8>(a)
-#define _simd16_extract_si(a, imm8) SIMD16::extract_si<imm8>(a)
-#define _simd16_insert_ps(a, b, imm8) SIMD16::insert_ps<imm8>(a, b)
-#define _simd16_insert_si(a, b, imm8) SIMD16::insert_si<imm8>(a, b)
-#define _simd16_maskstore_ps SIMD16::maskstore_ps
-#define _simd16_blend_ps(a, b, mask) SIMD16::blend_ps<mask>(a, b)
-#define _simd16_blendv_ps SIMD16::blendv_ps
-#define _simd16_blendv_epi32 SIMD16::blendv_epi32
-#define _simd16_mul_ps SIMD16::mul_ps
-#define _simd16_div_ps SIMD16::div_ps
-#define _simd16_add_ps SIMD16::add_ps
-#define _simd16_sub_ps SIMD16::sub_ps
-#define _simd16_rsqrt_ps SIMD16::rsqrt_ps
-#define _simd16_min_ps SIMD16::min_ps
-#define _simd16_max_ps SIMD16::max_ps
-#define _simd16_movemask_ps SIMD16::movemask_ps
-#define _simd16_movemask_pd SIMD16::movemask_pd
-#define _simd16_cvtps_epi32 SIMD16::cvtps_epi32
-#define _simd16_cvttps_epi32 SIMD16::cvttps_epi32
-#define _simd16_cvtepi32_ps SIMD16::cvtepi32_ps
-#define _simd16_cmp_ps(a, b, comp) SIMD16::cmp_ps<SIMD16::CompareType(comp)>(a, b)
-#define _simd16_cmplt_ps SIMD16::cmplt_ps
-#define _simd16_cmpgt_ps SIMD16::cmpgt_ps
-#define _simd16_cmpneq_ps SIMD16::cmpneq_ps
-#define _simd16_cmpeq_ps SIMD16::cmpeq_ps
-#define _simd16_cmpge_ps SIMD16::cmpge_ps
-#define _simd16_cmple_ps SIMD16::cmple_ps
-#define _simd16_castsi_ps SIMD16::castsi_ps
-#define _simd16_castps_si SIMD16::castps_si
-#define _simd16_castsi_pd SIMD16::castsi_pd
-#define _simd16_castpd_si SIMD16::castpd_si
-#define _simd16_castpd_ps SIMD16::castpd_ps
-#define _simd16_castps_pd SIMD16::castps_pd
-#define _simd16_and_ps SIMD16::and_ps
-#define _simd16_andnot_ps SIMD16::andnot_ps
-#define _simd16_or_ps SIMD16::or_ps
-#define _simd16_xor_ps SIMD16::xor_ps
-#define _simd16_round_ps(a, mode) SIMD16::round_ps<SIMD16::RoundMode(mode)>(a)
-#define _simd16_mul_epi32 SIMD16::mul_epi32
-#define _simd16_mullo_epi32 SIMD16::mullo_epi32
-#define _simd16_sub_epi32 SIMD16::sub_epi32
-#define _simd16_sub_epi64 SIMD16::sub_epi64
-#define _simd16_min_epi32 SIMD16::min_epi32
-#define _simd16_max_epi32 SIMD16::max_epi32
-#define _simd16_min_epu32 SIMD16::min_epu32
-#define _simd16_max_epu32 SIMD16::max_epu32
-#define _simd16_add_epi32 SIMD16::add_epi32
-#define _simd16_and_si SIMD16::and_si
-#define _simd16_andnot_si SIMD16::andnot_si
-#define _simd16_or_si SIMD16::or_si
-#define _simd16_xor_si SIMD16::xor_si
-#define _simd16_cmpeq_epi32 SIMD16::cmpeq_epi32
-#define _simd16_cmpgt_epi32 SIMD16::cmpgt_epi32
-#define _simd16_cmplt_epi32 SIMD16::cmplt_epi32
-#define _simd16_testz_ps SIMD16::testz_ps
-#define _simd16_unpacklo_ps SIMD16::unpacklo_ps
-#define _simd16_unpackhi_ps SIMD16::unpackhi_ps
-#define _simd16_unpacklo_pd SIMD16::unpacklo_pd
-#define _simd16_unpackhi_pd SIMD16::unpackhi_pd
-#define _simd16_unpacklo_epi8 SIMD16::unpacklo_epi8
-#define _simd16_unpackhi_epi8 SIMD16::unpackhi_epi8
-#define _simd16_unpacklo_epi16 SIMD16::unpacklo_epi16
-#define _simd16_unpackhi_epi16 SIMD16::unpackhi_epi16
-#define _simd16_unpacklo_epi32 SIMD16::unpacklo_epi32
-#define _simd16_unpackhi_epi32 SIMD16::unpackhi_epi32
-#define _simd16_unpacklo_epi64 SIMD16::unpacklo_epi64
-#define _simd16_unpackhi_epi64 SIMD16::unpackhi_epi64
-#define _simd16_slli_epi32(a, i) SIMD16::slli_epi32<i>(a)
-#define _simd16_srli_epi32(a, i) SIMD16::srli_epi32<i>(a)
-#define _simd16_srai_epi32(a, i) SIMD16::srai_epi32<i>(a)
-#define _simd16_fmadd_ps SIMD16::fmadd_ps
-#define _simd16_fmsub_ps SIMD16::fmsub_ps
-#define _simd16_adds_epu8 SIMD16::adds_epu8
-#define _simd16_subs_epu8 SIMD16::subs_epu8
-#define _simd16_add_epi8 SIMD16::add_epi8
-#define _simd16_shuffle_epi8 SIMD16::shuffle_epi8
+#define _simd16_setzero_ps SIMD16::setzero_ps
+#define _simd16_setzero_si SIMD16::setzero_si
+#define _simd16_set1_ps SIMD16::set1_ps
+#define _simd16_set1_epi8 SIMD16::set1_epi8
+#define _simd16_set1_epi32 SIMD16::set1_epi32
+#define _simd16_set_ps SIMD16::set_ps
+#define _simd16_set_epi32 SIMD16::set_epi32
+#define _simd16_load_ps SIMD16::load_ps
+#define _simd16_loadu_ps SIMD16::loadu_ps
+#if 1
+#define _simd16_load1_ps SIMD16::broadcast_ss
+#endif
+#define _simd16_load_si SIMD16::load_si
+#define _simd16_loadu_si SIMD16::loadu_si
+#define _simd16_broadcast_ss(m) SIMD16::broadcast_ss((float const*)m)
+#define _simd16_store_ps SIMD16::store_ps
+#define _simd16_store_si SIMD16::store_si
+#define _simd16_extract_ps(a, imm8) SIMD16::extract_ps<imm8>(a)
+#define _simd16_extract_si(a, imm8) SIMD16::extract_si<imm8>(a)
+#define _simd16_insert_ps(a, b, imm8) SIMD16::insert_ps<imm8>(a, b)
+#define _simd16_insert_si(a, b, imm8) SIMD16::insert_si<imm8>(a, b)
+#define _simd16_maskstore_ps SIMD16::maskstore_ps
+#define _simd16_blend_ps(a, b, mask) SIMD16::blend_ps<mask>(a, b)
+#define _simd16_blendv_ps SIMD16::blendv_ps
+#define _simd16_blendv_epi32 SIMD16::blendv_epi32
+#define _simd16_mul_ps SIMD16::mul_ps
+#define _simd16_div_ps SIMD16::div_ps
+#define _simd16_add_ps SIMD16::add_ps
+#define _simd16_sub_ps SIMD16::sub_ps
+#define _simd16_rsqrt_ps SIMD16::rsqrt_ps
+#define _simd16_min_ps SIMD16::min_ps
+#define _simd16_max_ps SIMD16::max_ps
+#define _simd16_movemask_ps SIMD16::movemask_ps
+#define _simd16_movemask_pd SIMD16::movemask_pd
+#define _simd16_cvtps_epi32 SIMD16::cvtps_epi32
+#define _simd16_cvttps_epi32 SIMD16::cvttps_epi32
+#define _simd16_cvtepi32_ps SIMD16::cvtepi32_ps
+#define _simd16_cmp_ps(a, b, comp) SIMD16::cmp_ps<SIMD16::CompareType(comp)>(a, b)
+#define _simd16_cmplt_ps SIMD16::cmplt_ps
+#define _simd16_cmpgt_ps SIMD16::cmpgt_ps
+#define _simd16_cmpneq_ps SIMD16::cmpneq_ps
+#define _simd16_cmpeq_ps SIMD16::cmpeq_ps
+#define _simd16_cmpge_ps SIMD16::cmpge_ps
+#define _simd16_cmple_ps SIMD16::cmple_ps
+#define _simd16_castsi_ps SIMD16::castsi_ps
+#define _simd16_castps_si SIMD16::castps_si
+#define _simd16_castsi_pd SIMD16::castsi_pd
+#define _simd16_castpd_si SIMD16::castpd_si
+#define _simd16_castpd_ps SIMD16::castpd_ps
+#define _simd16_castps_pd SIMD16::castps_pd
+#define _simd16_and_ps SIMD16::and_ps
+#define _simd16_andnot_ps SIMD16::andnot_ps
+#define _simd16_or_ps SIMD16::or_ps
+#define _simd16_xor_ps SIMD16::xor_ps
+#define _simd16_round_ps(a, mode) SIMD16::round_ps<SIMD16::RoundMode(mode)>(a)
+#define _simd16_mul_epi32 SIMD16::mul_epi32
+#define _simd16_mullo_epi32 SIMD16::mullo_epi32
+#define _simd16_sub_epi32 SIMD16::sub_epi32
+#define _simd16_sub_epi64 SIMD16::sub_epi64
+#define _simd16_min_epi32 SIMD16::min_epi32
+#define _simd16_max_epi32 SIMD16::max_epi32
+#define _simd16_min_epu32 SIMD16::min_epu32
+#define _simd16_max_epu32 SIMD16::max_epu32
+#define _simd16_add_epi32 SIMD16::add_epi32
+#define _simd16_and_si SIMD16::and_si
+#define _simd16_andnot_si SIMD16::andnot_si
+#define _simd16_or_si SIMD16::or_si
+#define _simd16_xor_si SIMD16::xor_si
+#define _simd16_cmpeq_epi32 SIMD16::cmpeq_epi32
+#define _simd16_cmpgt_epi32 SIMD16::cmpgt_epi32
+#define _simd16_cmplt_epi32 SIMD16::cmplt_epi32
+#define _simd16_testz_ps SIMD16::testz_ps
+#define _simd16_unpacklo_ps SIMD16::unpacklo_ps
+#define _simd16_unpackhi_ps SIMD16::unpackhi_ps
+#define _simd16_unpacklo_pd SIMD16::unpacklo_pd
+#define _simd16_unpackhi_pd SIMD16::unpackhi_pd
+#define _simd16_unpacklo_epi8 SIMD16::unpacklo_epi8
+#define _simd16_unpackhi_epi8 SIMD16::unpackhi_epi8
+#define _simd16_unpacklo_epi16 SIMD16::unpacklo_epi16
+#define _simd16_unpackhi_epi16 SIMD16::unpackhi_epi16
+#define _simd16_unpacklo_epi32 SIMD16::unpacklo_epi32
+#define _simd16_unpackhi_epi32 SIMD16::unpackhi_epi32
+#define _simd16_unpacklo_epi64 SIMD16::unpacklo_epi64
+#define _simd16_unpackhi_epi64 SIMD16::unpackhi_epi64
+#define _simd16_slli_epi32(a, i) SIMD16::slli_epi32<i>(a)
+#define _simd16_srli_epi32(a, i) SIMD16::srli_epi32<i>(a)
+#define _simd16_srai_epi32(a, i) SIMD16::srai_epi32<i>(a)
+#define _simd16_fmadd_ps SIMD16::fmadd_ps
+#define _simd16_fmsub_ps SIMD16::fmsub_ps
+#define _simd16_adds_epu8 SIMD16::adds_epu8
+#define _simd16_subs_epu8 SIMD16::subs_epu8
+#define _simd16_add_epi8 SIMD16::add_epi8
+#define _simd16_shuffle_epi8 SIMD16::shuffle_epi8
-#define _simd16_i32gather_ps(m, index, scale) SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
-#define _simd16_mask_i32gather_ps(a, m, index, mask, scale) SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
+#define _simd16_i32gather_ps(m, index, scale) \
+ SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
+#define _simd16_mask_i32gather_ps(a, m, index, mask, scale) \
+ SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
-#define _simd16_abs_epi32 SIMD16::abs_epi32
+#define _simd16_abs_epi32 SIMD16::abs_epi32
-#define _simd16_cmpeq_epi64 SIMD16::cmpeq_epi64
-#define _simd16_cmpgt_epi64 SIMD16::cmpgt_epi64
-#define _simd16_cmpeq_epi16 SIMD16::cmpeq_epi16
-#define _simd16_cmpgt_epi16 SIMD16::cmpgt_epi16
-#define _simd16_cmpeq_epi8 SIMD16::cmpeq_epi8
-#define _simd16_cmpgt_epi8 SIMD16::cmpgt_epi8
+#define _simd16_cmpeq_epi64 SIMD16::cmpeq_epi64
+#define _simd16_cmpgt_epi64 SIMD16::cmpgt_epi64
+#define _simd16_cmpeq_epi16 SIMD16::cmpeq_epi16
+#define _simd16_cmpgt_epi16 SIMD16::cmpgt_epi16
+#define _simd16_cmpeq_epi8 SIMD16::cmpeq_epi8
+#define _simd16_cmpgt_epi8 SIMD16::cmpgt_epi8
-#define _simd16_permute_ps_i(a, i) SIMD16::permute_ps<i>(a)
-#define _simd16_permute_ps SIMD16::permute_ps
-#define _simd16_permute_epi32 SIMD16::permute_epi32
-#define _simd16_sllv_epi32 SIMD16::sllv_epi32
-#define _simd16_srlv_epi32 SIMD16::sllv_epi32
-#define _simd16_permute2f128_ps(a, b, i) SIMD16::permute2f128_ps<i>(a, b)
-#define _simd16_permute2f128_pd(a, b, i) SIMD16::permute2f128_pd<i>(a, b)
-#define _simd16_permute2f128_si(a, b, i) SIMD16::permute2f128_si<i>(a, b)
-#define _simd16_shuffle_ps(a, b, i) SIMD16::shuffle_ps<i>(a, b)
-#define _simd16_shuffle_pd(a, b, i) SIMD16::shuffle_pd<i>(a, b)
-#define _simd16_shuffle_epi32(a, b, imm8) SIMD16::shuffle_epi32<imm8>(a, b)
-#define _simd16_shuffle_epi64(a, b, imm8) SIMD16::shuffle_epi64<imm8>(a, b)
-#define _simd16_cvtepu8_epi16 SIMD16::cvtepu8_epi16
-#define _simd16_cvtepu8_epi32 SIMD16::cvtepu8_epi32
-#define _simd16_cvtepu16_epi32 SIMD16::cvtepu16_epi32
-#define _simd16_cvtepu16_epi64 SIMD16::cvtepu16_epi64
-#define _simd16_cvtepu32_epi64 SIMD16::cvtepu32_epi64
-#define _simd16_packus_epi16 SIMD16::packus_epi16
-#define _simd16_packs_epi16 SIMD16::packs_epi16
-#define _simd16_packus_epi32 SIMD16::packus_epi32
-#define _simd16_packs_epi32 SIMD16::packs_epi32
-#define _simd16_cmplt_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::LT_OQ>
-#define _simd16_cmpeq_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>
-#define _simd16_int2mask(mask) simd16mask(mask)
-#define _simd16_mask2int(mask) int(mask)
-#define _simd16_vmask_ps SIMD16::vmask_ps
+#define _simd16_permute_ps_i(a, i) SIMD16::permute_ps<i>(a)
+#define _simd16_permute_ps SIMD16::permute_ps
+#define _simd16_permute_epi32 SIMD16::permute_epi32
+#define _simd16_sllv_epi32 SIMD16::sllv_epi32
+#define _simd16_srlv_epi32 SIMD16::sllv_epi32
+#define _simd16_permute2f128_ps(a, b, i) SIMD16::permute2f128_ps<i>(a, b)
+#define _simd16_permute2f128_pd(a, b, i) SIMD16::permute2f128_pd<i>(a, b)
+#define _simd16_permute2f128_si(a, b, i) SIMD16::permute2f128_si<i>(a, b)
+#define _simd16_shuffle_ps(a, b, i) SIMD16::shuffle_ps<i>(a, b)
+#define _simd16_shuffle_pd(a, b, i) SIMD16::shuffle_pd<i>(a, b)
+#define _simd16_shuffle_epi32(a, b, imm8) SIMD16::shuffle_epi32<imm8>(a, b)
+#define _simd16_shuffle_epi64(a, b, imm8) SIMD16::shuffle_epi64<imm8>(a, b)
+#define _simd16_cvtepu8_epi16 SIMD16::cvtepu8_epi16
+#define _simd16_cvtepu8_epi32 SIMD16::cvtepu8_epi32
+#define _simd16_cvtepu16_epi32 SIMD16::cvtepu16_epi32
+#define _simd16_cvtepu16_epi64 SIMD16::cvtepu16_epi64
+#define _simd16_cvtepu32_epi64 SIMD16::cvtepu32_epi64
+#define _simd16_packus_epi16 SIMD16::packus_epi16
+#define _simd16_packs_epi16 SIMD16::packs_epi16
+#define _simd16_packus_epi32 SIMD16::packus_epi32
+#define _simd16_packs_epi32 SIMD16::packs_epi32
+#define _simd16_cmplt_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::LT_OQ>
+#define _simd16_cmpeq_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>
+#define _simd16_int2mask(mask) simd16mask(mask)
+#define _simd16_mask2int(mask) int(mask)
+#define _simd16_vmask_ps SIMD16::vmask_ps
-#endif//ENABLE_AVX512_SIMD16
+#endif // ENABLE_AVX512_SIMD16
-#endif//__SWR_SIMD16INTRIN_H_
+#endif //__SWR_SIMD16INTRIN_H_
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#ifndef __SWR_SIMDINTRIN_H__
#define __SWR_SIMDINTRIN_H__
#include "common/simdlib.hpp"
#if KNOB_SIMD_WIDTH == 8
-typedef SIMD256 SIMD;
+typedef SIMD256 SIMD;
#else
#error Unsupported vector width
-#endif//KNOB_SIMD16_WIDTH == 16
-
-
-#define _simd128_maskstore_ps SIMD128::maskstore_ps
-#define _simd128_fmadd_ps SIMD128::fmadd_ps
-
-#define _simd_load_ps SIMD::load_ps
-#define _simd_load1_ps SIMD::broadcast_ss
-#define _simd_loadu_ps SIMD::loadu_ps
-#define _simd_setzero_ps SIMD::setzero_ps
-#define _simd_set1_ps SIMD::set1_ps
-#define _simd_blend_ps(a, b, i) SIMD::blend_ps<i>(a, b)
-#define _simd_blend_epi32(a, b, i) SIMD::blend_epi32<i>(a, b)
-#define _simd_blendv_ps SIMD::blendv_ps
-#define _simd_store_ps SIMD::store_ps
-#define _simd_mul_ps SIMD::mul_ps
-#define _simd_add_ps SIMD::add_ps
-#define _simd_sub_ps SIMD::sub_ps
-#define _simd_rsqrt_ps SIMD::rsqrt_ps
-#define _simd_min_ps SIMD::min_ps
-#define _simd_max_ps SIMD::max_ps
-#define _simd_movemask_ps SIMD::movemask_ps
-#define _simd_cvtps_epi32 SIMD::cvtps_epi32
-#define _simd_cvttps_epi32 SIMD::cvttps_epi32
-#define _simd_cvtepi32_ps SIMD::cvtepi32_ps
-#define _simd_cmplt_ps SIMD::cmplt_ps
-#define _simd_cmpgt_ps SIMD::cmpgt_ps
-#define _simd_cmpneq_ps SIMD::cmpneq_ps
-#define _simd_cmpeq_ps SIMD::cmpeq_ps
-#define _simd_cmpge_ps SIMD::cmpge_ps
-#define _simd_cmple_ps SIMD::cmple_ps
-#define _simd_cmp_ps(a, b, imm) SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)
-#define _simd_and_ps SIMD::and_ps
-#define _simd_or_ps SIMD::or_ps
-#define _simd_rcp_ps SIMD::rcp_ps
-#define _simd_div_ps SIMD::div_ps
-#define _simd_castsi_ps SIMD::castsi_ps
-#define _simd_castps_pd SIMD::castps_pd
-#define _simd_castpd_ps SIMD::castpd_ps
-#define _simd_andnot_ps SIMD::andnot_ps
-#define _simd_round_ps(a, i) SIMD::round_ps<SIMD::RoundMode(i)>(a)
-#define _simd_castpd_ps SIMD::castpd_ps
-#define _simd_broadcast_ps(a) SIMD::broadcast_ps((SIMD128::Float const *)(a))
-#define _simd_stream_ps SIMD::stream_ps
-
-#define _simd_movemask_pd SIMD::movemask_pd
-#define _simd_castsi_pd SIMD::castsi_pd
-
-#define _simd_mul_epi32 SIMD::mul_epi32
-#define _simd_mullo_epi32 SIMD::mullo_epi32
-#define _simd_sub_epi32 SIMD::sub_epi32
-#define _simd_sub_epi64 SIMD::sub_epi64
-#define _simd_min_epi32 SIMD::min_epi32
-#define _simd_min_epu32 SIMD::min_epu32
-#define _simd_max_epi32 SIMD::max_epi32
-#define _simd_max_epu32 SIMD::max_epu32
-#define _simd_add_epi32 SIMD::add_epi32
-#define _simd_and_si SIMD::and_si
-#define _simd_andnot_si SIMD::andnot_si
-#define _simd_cmpeq_epi32 SIMD::cmpeq_epi32
-#define _simd_cmplt_epi32 SIMD::cmplt_epi32
-#define _simd_cmpgt_epi32 SIMD::cmpgt_epi32
-#define _simd_or_si SIMD::or_si
-#define _simd_xor_si SIMD::xor_si
-#define _simd_castps_si SIMD::castps_si
-#define _simd_adds_epu8 SIMD::adds_epu8
-#define _simd_subs_epu8 SIMD::subs_epu8
-#define _simd_add_epi8 SIMD::add_epi8
-#define _simd_cmpeq_epi64 SIMD::cmpeq_epi64
-#define _simd_cmpgt_epi64 SIMD::cmpgt_epi64
-#define _simd_cmpgt_epi8 SIMD::cmpgt_epi8
-#define _simd_cmpeq_epi8 SIMD::cmpeq_epi8
-#define _simd_cmpgt_epi16 SIMD::cmpgt_epi16
-#define _simd_cmpeq_epi16 SIMD::cmpeq_epi16
-#define _simd_movemask_epi8 SIMD::movemask_epi8
-#define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)
-#define _simd_permute_ps SIMD::permute_ps
-#define _simd_permute_epi32 SIMD::permute_epi32
-#define _simd_srlv_epi32 SIMD::srlv_epi32
-#define _simd_sllv_epi32 SIMD::sllv_epi32
-
-#define _simd_unpacklo_epi8 SIMD::unpacklo_epi8
-#define _simd_unpackhi_epi8 SIMD::unpackhi_epi8
-#define _simd_unpacklo_epi16 SIMD::unpacklo_epi16
-#define _simd_unpackhi_epi16 SIMD::unpackhi_epi16
-#define _simd_unpacklo_epi32 SIMD::unpacklo_epi32
-#define _simd_unpackhi_epi32 SIMD::unpackhi_epi32
-#define _simd_unpacklo_epi64 SIMD::unpacklo_epi64
-#define _simd_unpackhi_epi64 SIMD::unpackhi_epi64
-
-#define _simd_slli_epi32(a,i) SIMD::slli_epi32<i>(a)
-#define _simd_srai_epi32(a,i) SIMD::srai_epi32<i>(a)
-#define _simd_srli_epi32(a,i) SIMD::srli_epi32<i>(a)
-#define _simd_srlisi_ps(a,i) SIMD::srlisi_ps<i>(a)
-
-#define _simd_fmadd_ps SIMD::fmadd_ps
-#define _simd_fmsub_ps SIMD::fmsub_ps
-#define _simd_shuffle_epi8 SIMD::shuffle_epi8
-
-#define _simd_i32gather_ps(p, o, s) SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)
-#define _simd_mask_i32gather_ps(r, p, o, m, s) SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)
-#define _simd_abs_epi32 SIMD::abs_epi32
-
-#define _simd_cvtepu8_epi16 SIMD::cvtepu8_epi16
-#define _simd_cvtepu8_epi32 SIMD::cvtepu8_epi32
-#define _simd_cvtepu16_epi32 SIMD::cvtepu16_epi32
-#define _simd_cvtepu16_epi64 SIMD::cvtepu16_epi64
-#define _simd_cvtepu32_epi64 SIMD::cvtepu32_epi64
-
-#define _simd_packus_epi16 SIMD::packus_epi16
-#define _simd_packs_epi16 SIMD::packs_epi16
-#define _simd_packus_epi32 SIMD::packus_epi32
-#define _simd_packs_epi32 SIMD::packs_epi32
-
-#define _simd_unpacklo_ps SIMD::unpacklo_ps
-#define _simd_unpackhi_ps SIMD::unpackhi_ps
-#define _simd_unpacklo_pd SIMD::unpacklo_pd
-#define _simd_unpackhi_pd SIMD::unpackhi_pd
-#define _simd_insertf128_ps SIMD::insertf128_ps
-#define _simd_insertf128_pd SIMD::insertf128_pd
-#define _simd_insertf128_si(a, b, i) SIMD::insertf128_si<i>(a, b)
-#define _simd_extractf128_ps(a, i) SIMD::extractf128_ps<i>(a)
-#define _simd_extractf128_pd(a, i) SIMD::extractf128_pd<i>(a)
-#define _simd_extractf128_si(a, i) SIMD::extractf128_si<i>(a)
-#define _simd_permute2f128_ps(a, b, i) SIMD::permute2f128_ps<i>(a, b)
-#define _simd_permute2f128_pd(a, b, i) SIMD::permute2f128_pd<i>(a, b)
-#define _simd_permute2f128_si(a, b, i) SIMD::permute2f128_si<i>(a, b)
-#define _simd_shuffle_ps(a, b, i) SIMD::shuffle_ps<i>(a, b)
-#define _simd_shuffle_pd(a, b, i) SIMD::shuffle_pd<i>(a, b)
-#define _simd_shuffle_epi32(a, b, imm8) SIMD::shuffle_epi32<imm8>(a, b)
-#define _simd_shuffle_epi64(a, b, imm8) SIMD::shuffle_epi64<imm8>(a, b)
-#define _simd_set1_epi32 SIMD::set1_epi32
-#define _simd_set_epi32 SIMD::set_epi32
-#define _simd_set_ps SIMD::set_ps
-#define _simd_set1_epi8 SIMD::set1_epi8
-#define _simd_setzero_si SIMD::setzero_si
-#define _simd_cvttps_epi32 SIMD::cvttps_epi32
-#define _simd_store_si SIMD::store_si
-#define _simd_broadcast_ss SIMD::broadcast_ss
-#define _simd_maskstore_ps SIMD::maskstore_ps
-#define _simd_load_si SIMD::load_si
-#define _simd_loadu_si SIMD::loadu_si
-#define _simd_sub_ps SIMD::sub_ps
-#define _simd_testz_ps SIMD::testz_ps
-#define _simd_testz_si SIMD::testz_si
-#define _simd_xor_ps SIMD::xor_ps
-
-#define _simd_loadu2_si SIMD::loadu2_si
-#define _simd_storeu2_si SIMD::storeu2_si
-
-#define _simd_blendv_epi32 SIMD::blendv_epi32
-#define _simd_vmask_ps SIMD::vmask_ps
-
-template<int mask> SIMDINLINE
-SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const &a, SIMD128::Integer const &b)
+#endif // KNOB_SIMD16_WIDTH == 16
+
+#define _simd128_maskstore_ps SIMD128::maskstore_ps
+#define _simd128_fmadd_ps SIMD128::fmadd_ps
+
+#define _simd_load_ps SIMD::load_ps
+#define _simd_load1_ps SIMD::broadcast_ss
+#define _simd_loadu_ps SIMD::loadu_ps
+#define _simd_setzero_ps SIMD::setzero_ps
+#define _simd_set1_ps SIMD::set1_ps
+#define _simd_blend_ps(a, b, i) SIMD::blend_ps<i>(a, b)
+#define _simd_blend_epi32(a, b, i) SIMD::blend_epi32<i>(a, b)
+#define _simd_blendv_ps SIMD::blendv_ps
+#define _simd_store_ps SIMD::store_ps
+#define _simd_mul_ps SIMD::mul_ps
+#define _simd_add_ps SIMD::add_ps
+#define _simd_sub_ps SIMD::sub_ps
+#define _simd_rsqrt_ps SIMD::rsqrt_ps
+#define _simd_min_ps SIMD::min_ps
+#define _simd_max_ps SIMD::max_ps
+#define _simd_movemask_ps SIMD::movemask_ps
+#define _simd_cvtps_epi32 SIMD::cvtps_epi32
+#define _simd_cvttps_epi32 SIMD::cvttps_epi32
+#define _simd_cvtepi32_ps SIMD::cvtepi32_ps
+#define _simd_cmplt_ps SIMD::cmplt_ps
+#define _simd_cmpgt_ps SIMD::cmpgt_ps
+#define _simd_cmpneq_ps SIMD::cmpneq_ps
+#define _simd_cmpeq_ps SIMD::cmpeq_ps
+#define _simd_cmpge_ps SIMD::cmpge_ps
+#define _simd_cmple_ps SIMD::cmple_ps
+#define _simd_cmp_ps(a, b, imm) SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)
+#define _simd_and_ps SIMD::and_ps
+#define _simd_or_ps SIMD::or_ps
+#define _simd_rcp_ps SIMD::rcp_ps
+#define _simd_div_ps SIMD::div_ps
+#define _simd_castsi_ps SIMD::castsi_ps
+#define _simd_castps_pd SIMD::castps_pd
+#define _simd_castpd_ps SIMD::castpd_ps
+#define _simd_andnot_ps SIMD::andnot_ps
+#define _simd_round_ps(a, i) SIMD::round_ps<SIMD::RoundMode(i)>(a)
+#define _simd_castpd_ps SIMD::castpd_ps
+#define _simd_broadcast_ps(a) SIMD::broadcast_ps((SIMD128::Float const*)(a))
+#define _simd_stream_ps SIMD::stream_ps
+
+#define _simd_movemask_pd SIMD::movemask_pd
+#define _simd_castsi_pd SIMD::castsi_pd
+
+#define _simd_mul_epi32 SIMD::mul_epi32
+#define _simd_mullo_epi32 SIMD::mullo_epi32
+#define _simd_sub_epi32 SIMD::sub_epi32
+#define _simd_sub_epi64 SIMD::sub_epi64
+#define _simd_min_epi32 SIMD::min_epi32
+#define _simd_min_epu32 SIMD::min_epu32
+#define _simd_max_epi32 SIMD::max_epi32
+#define _simd_max_epu32 SIMD::max_epu32
+#define _simd_add_epi32 SIMD::add_epi32
+#define _simd_and_si SIMD::and_si
+#define _simd_andnot_si SIMD::andnot_si
+#define _simd_cmpeq_epi32 SIMD::cmpeq_epi32
+#define _simd_cmplt_epi32 SIMD::cmplt_epi32
+#define _simd_cmpgt_epi32 SIMD::cmpgt_epi32
+#define _simd_or_si SIMD::or_si
+#define _simd_xor_si SIMD::xor_si
+#define _simd_castps_si SIMD::castps_si
+#define _simd_adds_epu8 SIMD::adds_epu8
+#define _simd_subs_epu8 SIMD::subs_epu8
+#define _simd_add_epi8 SIMD::add_epi8
+#define _simd_cmpeq_epi64 SIMD::cmpeq_epi64
+#define _simd_cmpgt_epi64 SIMD::cmpgt_epi64
+#define _simd_cmpgt_epi8 SIMD::cmpgt_epi8
+#define _simd_cmpeq_epi8 SIMD::cmpeq_epi8
+#define _simd_cmpgt_epi16 SIMD::cmpgt_epi16
+#define _simd_cmpeq_epi16 SIMD::cmpeq_epi16
+#define _simd_movemask_epi8 SIMD::movemask_epi8
+#define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)
+#define _simd_permute_ps SIMD::permute_ps
+#define _simd_permute_epi32 SIMD::permute_epi32
+#define _simd_srlv_epi32 SIMD::srlv_epi32
+#define _simd_sllv_epi32 SIMD::sllv_epi32
+
+#define _simd_unpacklo_epi8 SIMD::unpacklo_epi8
+#define _simd_unpackhi_epi8 SIMD::unpackhi_epi8
+#define _simd_unpacklo_epi16 SIMD::unpacklo_epi16
+#define _simd_unpackhi_epi16 SIMD::unpackhi_epi16
+#define _simd_unpacklo_epi32 SIMD::unpacklo_epi32
+#define _simd_unpackhi_epi32 SIMD::unpackhi_epi32
+#define _simd_unpacklo_epi64 SIMD::unpacklo_epi64
+#define _simd_unpackhi_epi64 SIMD::unpackhi_epi64
+
+#define _simd_slli_epi32(a, i) SIMD::slli_epi32<i>(a)
+#define _simd_srai_epi32(a, i) SIMD::srai_epi32<i>(a)
+#define _simd_srli_epi32(a, i) SIMD::srli_epi32<i>(a)
+#define _simd_srlisi_ps(a, i) SIMD::srlisi_ps<i>(a)
+
+#define _simd_fmadd_ps SIMD::fmadd_ps
+#define _simd_fmsub_ps SIMD::fmsub_ps
+#define _simd_shuffle_epi8 SIMD::shuffle_epi8
+
+#define _simd_i32gather_ps(p, o, s) SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)
+#define _simd_mask_i32gather_ps(r, p, o, m, s) \
+ SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)
+#define _simd_abs_epi32 SIMD::abs_epi32
+
+#define _simd_cvtepu8_epi16 SIMD::cvtepu8_epi16
+#define _simd_cvtepu8_epi32 SIMD::cvtepu8_epi32
+#define _simd_cvtepu16_epi32 SIMD::cvtepu16_epi32
+#define _simd_cvtepu16_epi64 SIMD::cvtepu16_epi64
+#define _simd_cvtepu32_epi64 SIMD::cvtepu32_epi64
+
+#define _simd_packus_epi16 SIMD::packus_epi16
+#define _simd_packs_epi16 SIMD::packs_epi16
+#define _simd_packus_epi32 SIMD::packus_epi32
+#define _simd_packs_epi32 SIMD::packs_epi32
+
+#define _simd_unpacklo_ps SIMD::unpacklo_ps
+#define _simd_unpackhi_ps SIMD::unpackhi_ps
+#define _simd_unpacklo_pd SIMD::unpacklo_pd
+#define _simd_unpackhi_pd SIMD::unpackhi_pd
+#define _simd_insertf128_ps SIMD::insertf128_ps
+#define _simd_insertf128_pd SIMD::insertf128_pd
+#define _simd_insertf128_si(a, b, i) SIMD::insertf128_si<i>(a, b)
+#define _simd_extractf128_ps(a, i) SIMD::extractf128_ps<i>(a)
+#define _simd_extractf128_pd(a, i) SIMD::extractf128_pd<i>(a)
+#define _simd_extractf128_si(a, i) SIMD::extractf128_si<i>(a)
+#define _simd_permute2f128_ps(a, b, i) SIMD::permute2f128_ps<i>(a, b)
+#define _simd_permute2f128_pd(a, b, i) SIMD::permute2f128_pd<i>(a, b)
+#define _simd_permute2f128_si(a, b, i) SIMD::permute2f128_si<i>(a, b)
+#define _simd_shuffle_ps(a, b, i) SIMD::shuffle_ps<i>(a, b)
+#define _simd_shuffle_pd(a, b, i) SIMD::shuffle_pd<i>(a, b)
+#define _simd_shuffle_epi32(a, b, imm8) SIMD::shuffle_epi32<imm8>(a, b)
+#define _simd_shuffle_epi64(a, b, imm8) SIMD::shuffle_epi64<imm8>(a, b)
+#define _simd_set1_epi32 SIMD::set1_epi32
+#define _simd_set_epi32 SIMD::set_epi32
+#define _simd_set_ps SIMD::set_ps
+#define _simd_set1_epi8 SIMD::set1_epi8
+#define _simd_setzero_si SIMD::setzero_si
+#define _simd_cvttps_epi32 SIMD::cvttps_epi32
+#define _simd_store_si SIMD::store_si
+#define _simd_broadcast_ss SIMD::broadcast_ss
+#define _simd_maskstore_ps SIMD::maskstore_ps
+#define _simd_load_si SIMD::load_si
+#define _simd_loadu_si SIMD::loadu_si
+#define _simd_sub_ps SIMD::sub_ps
+#define _simd_testz_ps SIMD::testz_ps
+#define _simd_testz_si SIMD::testz_si
+#define _simd_xor_ps SIMD::xor_ps
+
+#define _simd_loadu2_si SIMD::loadu2_si
+#define _simd_storeu2_si SIMD::storeu2_si
+
+#define _simd_blendv_epi32 SIMD::blendv_epi32
+#define _simd_vmask_ps SIMD::vmask_ps
+
+template <int mask>
+SIMDINLINE SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const& a, SIMD128::Integer const& b)
{
- return SIMD128::castps_si(SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
+ return SIMD128::castps_si(
+ SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
}
SIMDINLINE
-void _simd_mov(simdscalar &r, unsigned int rlane, simdscalar& s, unsigned int slane)
+void _simd_mov(simdscalar& r, unsigned int rlane, simdscalar& s, unsigned int slane)
{
OSALIGNSIMD(float) rArray[KNOB_SIMD_WIDTH], sArray[KNOB_SIMD_WIDTH];
SIMD256::store_ps(rArray, r);
SIMD256::store_ps(sArray, s);
rArray[rlane] = sArray[slane];
- r = SIMD256::load_ps(rArray);
+ r = SIMD256::load_ps(rArray);
}
// Populates a simdvector from a vector. So p = xyzw becomes xxxx yyyy zzzz wwww.
#endif
-#define _simdvec_dp3_ps SIMD::vec4_dp3_ps
-#define _simdvec_dp4_ps SIMD::vec4_dp4_ps
-#define _simdvec_rcp_length_ps SIMD::vec4_rcp_length_ps
-#define _simdvec_normalize_ps SIMD::vec4_normalize_ps
-#define _simdvec_mul_ps SIMD::vec4_mul_ps
-#define _simdvec_add_ps SIMD::vec4_add_ps
-#define _simdvec_min_ps SIMD::vec4_min_ps
-#define _simdvec_max_ps SIMD::vec4_max_ps
-#define _simd_mat4x4_vec4_multiply SIMD::mat4x4_vec4_multiply
-#define _simd_mat3x3_vec3_w0_multiply SIMD::mat3x3_vec3_w0_multiply
-#define _simd_mat4x4_vec3_w1_multiply SIMD::mat4x4_vec3_w1_multiply
-#define _simd_mat4x3_vec3_w1_multiply SIMD::mat4x3_vec3_w1_multiply
+#define _simdvec_dp3_ps SIMD::vec4_dp3_ps
+#define _simdvec_dp4_ps SIMD::vec4_dp4_ps
+#define _simdvec_rcp_length_ps SIMD::vec4_rcp_length_ps
+#define _simdvec_normalize_ps SIMD::vec4_normalize_ps
+#define _simdvec_mul_ps SIMD::vec4_mul_ps
+#define _simdvec_add_ps SIMD::vec4_add_ps
+#define _simdvec_min_ps SIMD::vec4_min_ps
+#define _simdvec_max_ps SIMD::vec4_max_ps
+#define _simd_mat4x4_vec4_multiply SIMD::mat4x4_vec4_multiply
+#define _simd_mat3x3_vec3_w0_multiply SIMD::mat3x3_vec3_w0_multiply
+#define _simd_mat4x4_vec3_w1_multiply SIMD::mat4x4_vec3_w1_multiply
+#define _simd_mat4x3_vec3_w1_multiply SIMD::mat4x3_vec3_w1_multiply
//////////////////////////////////////////////////////////////////////////
/// @brief Compute plane equation vA * vX + vB * vY + vC
-SIMDINLINE simdscalar vplaneps(simdscalar const &vA, simdscalar const &vB, simdscalar const &vC, simdscalar const &vX, simdscalar const &vY)
+SIMDINLINE simdscalar vplaneps(simdscalar const& vA,
+ simdscalar const& vB,
+ simdscalar const& vC,
+ simdscalar const& vX,
+ simdscalar const& vY)
{
simdscalar vOut = _simd_fmadd_ps(vA, vX, vC);
- vOut = _simd_fmadd_ps(vB, vY, vOut);
+ vOut = _simd_fmadd_ps(vB, vY, vOut);
return vOut;
}
//////////////////////////////////////////////////////////////////////////
/// @brief Compute plane equation vA * vX + vB * vY + vC
-SIMDINLINE simd4scalar vplaneps(simd4scalar const &vA, simd4scalar const &vB, simd4scalar const &vC, simd4scalar const &vX, simd4scalar const &vY)
+SIMDINLINE simd4scalar vplaneps(simd4scalar const& vA,
+ simd4scalar const& vB,
+ simd4scalar const& vC,
+ simd4scalar const& vX,
+ simd4scalar const& vY)
{
simd4scalar vOut = _simd128_fmadd_ps(vA, vX, vC);
- vOut = _simd128_fmadd_ps(vB, vY, vOut);
+ vOut = _simd128_fmadd_ps(vB, vY, vOut);
return vOut;
}
/// @param vI - barycentric I
/// @param vJ - barycentric J
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template<UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simdscalar InterpolateComponent(simdscalar const &vI, simdscalar const &vJ, const float *pInterpBuffer)
+template <UINT Attrib, UINT Comp, UINT numComponents = 4>
+static SIMDINLINE simdscalar InterpolateComponent(simdscalar const& vI,
+ simdscalar const& vJ,
+ const float* pInterpBuffer)
{
- const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
- const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
- const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
+ const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+ const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
+ const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
simdscalar vA = _simd_broadcast_ss(pInterpA);
simdscalar vB = _simd_broadcast_ss(pInterpB);
simdscalar vC = _simd_broadcast_ss(pInterpC);
simdscalar vk = _simd_sub_ps(_simd_sub_ps(_simd_set1_ps(1.0f), vI), vJ);
- vC = _simd_mul_ps(vk, vC);
-
+ vC = _simd_mul_ps(vk, vC);
+
return vplaneps(vA, vB, vC, vI, vJ);
}
//////////////////////////////////////////////////////////////////////////
/// @brief Interpolates a single component (flat shade).
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template<UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simdscalar InterpolateComponentFlat(const float *pInterpBuffer)
+template <UINT Attrib, UINT Comp, UINT numComponents = 4>
+static SIMDINLINE simdscalar InterpolateComponentFlat(const float* pInterpBuffer)
{
- const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+ const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
simdscalar vA = _simd_broadcast_ss(pInterpA);
/// @param vI - barycentric I
/// @param vJ - barycentric J
/// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template<UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const &vI, simd4scalar const &vJ, const float *pInterpBuffer)
+template <UINT Attrib, UINT Comp, UINT numComponents = 4>
+static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const& vI,
+ simd4scalar const& vJ,
+ const float* pInterpBuffer)
{
- const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
- const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
- const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
+ const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+ const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
+ const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
simd4scalar vA = SIMD128::broadcast_ss(pInterpA);
simd4scalar vB = SIMD128::broadcast_ss(pInterpB);
simd4scalar vC = SIMD128::broadcast_ss(pInterpC);
simd4scalar vk = SIMD128::sub_ps(SIMD128::sub_ps(SIMD128::set1_ps(1.0f), vI), vJ);
- vC = SIMD128::mul_ps(vk, vC);
+ vC = SIMD128::mul_ps(vk, vC);
return vplaneps(vA, vB, vC, vI, vJ);
}
-static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const &a)
+static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const& a)
{
simd4scalari ai = SIMD128::castps_si(a);
return SIMD128::castsi_ps(SIMD128::and_si(ai, SIMD128::set1_epi32(0x7fffffff)));
}
-static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const &a)
+static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const& a)
{
simdscalari ai = _simd_castps_si(a);
return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));
}
-
#if ENABLE_AVX512_SIMD16
#include "simd16intrin.h"
-#endif//ENABLE_AVX512_SIMD16
+#endif // ENABLE_AVX512_SIMD16
-#endif//__SWR_SIMDINTRIN_H__
+#endif //__SWR_SIMDINTRIN_H__
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#pragma once
#include "simdlib_types.hpp"
#include "simdlib_128_avx.inl"
#undef __SIMD_LIB_AVX_HPP__
}; // struct AVXImpl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
+#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
#if SIMD_ARCH >= SIMD_ARCH_AVX2
struct AVX2Impl : AVXImpl
#include "simdlib_128_avx2.inl"
#undef __SIMD_LIB_AVX2_HPP__
}; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
+#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
#if SIMD_ARCH >= SIMD_ARCH_AVX512
struct AVX512Impl : AVX2Impl
#include "simdlib_128_avx512_core.inl"
#endif // defined(SIMD_ARCH_KNIGHTS)
#undef __SIMD_LIB_AVX512_HPP__
-#endif // SIMD_OPT_128_AVX512
+#endif // SIMD_OPT_128_AVX512
}; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
+#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
struct Traits : SIMDImpl::Traits
{
#error Invalid value for SIMD_ARCH
#endif
- using Float = SIMD128Impl::Float;
- using Double = SIMD128Impl::Double;
- using Integer = SIMD128Impl::Integer;
- using Vec4 = SIMD128Impl::Vec4;
- using Mask = SIMD128Impl::Mask;
+ using Float = SIMD128Impl::Float;
+ using Double = SIMD128Impl::Double;
+ using Integer = SIMD128Impl::Integer;
+ using Vec4 = SIMD128Impl::Vec4;
+ using Mask = SIMD128Impl::Mask;
};
- } // ns SIMD128Impl
+ } // namespace SIMD128Impl
namespace SIMD256Impl
{
#include "simdlib_256_avx.inl"
#undef __SIMD_LIB_AVX_HPP__
}; // struct AVXImpl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
+#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
#if SIMD_ARCH >= SIMD_ARCH_AVX2
struct AVX2Impl : AVXImpl
#include "simdlib_256_avx2.inl"
#undef __SIMD_LIB_AVX2_HPP__
}; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
+#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
#if SIMD_ARCH >= SIMD_ARCH_AVX512
struct AVX512Impl : AVX2Impl
#include "simdlib_256_avx512_core.inl"
#endif // defined(SIMD_ARCH_KNIGHTS)
#undef __SIMD_LIB_AVX512_HPP__
-#endif // SIMD_OPT_256_AVX512
+#endif // SIMD_OPT_256_AVX512
}; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
+#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
struct Traits : SIMDImpl::Traits
{
#error Invalid value for SIMD_ARCH
#endif
- using Float = SIMD256Impl::Float;
- using Double = SIMD256Impl::Double;
- using Integer = SIMD256Impl::Integer;
- using Vec4 = SIMD256Impl::Vec4;
- using Mask = SIMD256Impl::Mask;
+ using Float = SIMD256Impl::Float;
+ using Double = SIMD256Impl::Double;
+ using Integer = SIMD256Impl::Integer;
+ using Vec4 = SIMD256Impl::Vec4;
+ using Mask = SIMD256Impl::Mask;
};
- } // ns SIMD256Impl
+ } // namespace SIMD256Impl
namespace SIMD512Impl
{
#if SIMD_ARCH >= SIMD_ARCH_AVX
- template<typename SIMD256T>
+ template <typename SIMD256T>
struct AVXImplBase
{
#define __SIMD_LIB_AVX_HPP__
using AVXImpl = AVXImplBase<SIMD256Impl::AVXImpl>;
#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
#if SIMD_ARCH >= SIMD_ARCH_AVX2
using AVX2Impl = AVXImplBase<SIMD256Impl::AVX2Impl>;
#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
-
#if SIMD_ARCH >= SIMD_ARCH_AVX512
struct AVX512Impl : AVXImplBase<SIMD256Impl::AVX512Impl>
{
#endif // defined(SIMD_ARCH_KNIGHTS)
#undef __SIMD_LIB_AVX512_HPP__
}; // struct AVX512ImplBase
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
+#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
struct Traits : SIMDImpl::Traits
{
#error Invalid value for SIMD_ARCH
#endif
- using Float = SIMD512Impl::Float;
- using Double = SIMD512Impl::Double;
- using Integer = SIMD512Impl::Integer;
- using Vec4 = SIMD512Impl::Vec4;
- using Mask = SIMD512Impl::Mask;
+ using Float = SIMD512Impl::Float;
+ using Double = SIMD512Impl::Double;
+ using Integer = SIMD512Impl::Integer;
+ using Vec4 = SIMD512Impl::Vec4;
+ using Mask = SIMD512Impl::Mask;
};
- } // ns SIMD512Impl
-} // ns SIMDImpl
+ } // namespace SIMD512Impl
+} // namespace SIMDImpl
template <typename Traits>
struct SIMDBase : Traits::IsaImpl
{
- using CompareType = typename Traits::CompareType;
- using ScaleFactor = typename Traits::ScaleFactor;
- using RoundMode = typename Traits::RoundMode;
- using SIMD = typename Traits::IsaImpl;
- using Float = typename Traits::Float;
- using Double = typename Traits::Double;
- using Integer = typename Traits::Integer;
- using Vec4 = typename Traits::Vec4;
- using Mask = typename Traits::Mask;
+ using CompareType = typename Traits::CompareType;
+ using ScaleFactor = typename Traits::ScaleFactor;
+ using RoundMode = typename Traits::RoundMode;
+ using SIMD = typename Traits::IsaImpl;
+ using Float = typename Traits::Float;
+ using Double = typename Traits::Double;
+ using Integer = typename Traits::Integer;
+ using Vec4 = typename Traits::Vec4;
+ using Mask = typename Traits::Mask;
static const size_t VECTOR_BYTES = sizeof(Float);
// Populates a SIMD Vec4 from a non-simd vector. So p = xyzw becomes xxxx yyyy zzzz wwww.
- static SIMDINLINE
- void vec4_load1_ps(Vec4& r, const float *p)
+ static SIMDINLINE void vec4_load1_ps(Vec4& r, const float* p)
{
r[0] = SIMD::set1_ps(p[0]);
r[1] = SIMD::set1_ps(p[1]);
r[3] = SIMD::set1_ps(p[3]);
}
- static SIMDINLINE
- void vec4_set1_vps(Vec4& r, Float const &s)
+ static SIMDINLINE void vec4_set1_vps(Vec4& r, Float const& s)
{
r[0] = s;
r[1] = s;
r[3] = s;
}
- static SIMDINLINE
- Float vec4_dp3_ps(const Vec4& v0, const Vec4& v1)
+ static SIMDINLINE Float vec4_dp3_ps(const Vec4& v0, const Vec4& v1)
{
Float tmp, r;
- r = SIMD::mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
+ r = SIMD::mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
- tmp = SIMD::mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
- r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y)
+ tmp = SIMD::mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
+ r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y)
- tmp = SIMD::mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
- r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
+ tmp = SIMD::mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
+ r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
return r;
}
- static SIMDINLINE
- Float vec4_dp4_ps(const Vec4& v0, const Vec4& v1)
+ static SIMDINLINE Float vec4_dp4_ps(const Vec4& v0, const Vec4& v1)
{
Float tmp, r;
- r = SIMD::mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
+ r = SIMD::mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
- tmp = SIMD::mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
- r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y)
+ tmp = SIMD::mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
+ r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y)
- tmp = SIMD::mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
- r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
+ tmp = SIMD::mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
+ r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
- tmp = SIMD::mul_ps(v0[3], v1[3]); // (v0.w*v1.w)
- r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
+ tmp = SIMD::mul_ps(v0[3], v1[3]); // (v0.w*v1.w)
+ r = SIMD::add_ps(r, tmp); // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
return r;
}
- static SIMDINLINE
- Float vec4_rcp_length_ps(const Vec4& v)
+ static SIMDINLINE Float vec4_rcp_length_ps(const Vec4& v)
{
Float length = vec4_dp4_ps(v, v);
return SIMD::rsqrt_ps(length);
}
- static SIMDINLINE
- void vec4_normalize_ps(Vec4& r, const Vec4& v)
+ static SIMDINLINE void vec4_normalize_ps(Vec4& r, const Vec4& v)
{
Float rcpLength = vec4_rcp_length_ps(v);
r[3] = SIMD::mul_ps(v[3], rcpLength);
}
- static SIMDINLINE
- void vec4_mul_ps(Vec4& r, const Vec4& v, Float const &s)
+ static SIMDINLINE void vec4_mul_ps(Vec4& r, const Vec4& v, Float const& s)
{
r[0] = SIMD::mul_ps(v[0], s);
r[1] = SIMD::mul_ps(v[1], s);
r[3] = SIMD::mul_ps(v[3], s);
}
- static SIMDINLINE
- void vec4_mul_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
+ static SIMDINLINE void vec4_mul_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
{
r[0] = SIMD::mul_ps(v0[0], v1[0]);
r[1] = SIMD::mul_ps(v0[1], v1[1]);
r[3] = SIMD::mul_ps(v0[3], v1[3]);
}
- static SIMDINLINE
- void vec4_add_ps(Vec4& r, const Vec4& v0, Float const &s)
+ static SIMDINLINE void vec4_add_ps(Vec4& r, const Vec4& v0, Float const& s)
{
r[0] = SIMD::add_ps(v0[0], s);
r[1] = SIMD::add_ps(v0[1], s);
r[3] = SIMD::add_ps(v0[3], s);
}
- static SIMDINLINE
- void vec4_add_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
+ static SIMDINLINE void vec4_add_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
{
r[0] = SIMD::add_ps(v0[0], v1[0]);
r[1] = SIMD::add_ps(v0[1], v1[1]);
r[3] = SIMD::add_ps(v0[3], v1[3]);
}
- static SIMDINLINE
- void vec4_min_ps(Vec4& r, const Vec4& v0, Float const &s)
+ static SIMDINLINE void vec4_min_ps(Vec4& r, const Vec4& v0, Float const& s)
{
r[0] = SIMD::min_ps(v0[0], s);
r[1] = SIMD::min_ps(v0[1], s);
r[3] = SIMD::min_ps(v0[3], s);
}
- static SIMDINLINE
- void vec4_max_ps(Vec4& r, const Vec4& v0, Float const &s)
+ static SIMDINLINE void vec4_max_ps(Vec4& r, const Vec4& v0, Float const& s)
{
r[0] = SIMD::max_ps(v0[0], s);
r[1] = SIMD::max_ps(v0[1], s);
// outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * v.w)
// outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * v.w)
// outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * v.w)
- static SIMDINLINE
- void SIMDCALL mat4x4_vec4_multiply(
- Vec4& result,
- const float *pMatrix,
- const Vec4& v)
+ static SIMDINLINE void SIMDCALL mat4x4_vec4_multiply(Vec4& result,
+ const float* pMatrix,
+ const Vec4& v)
{
Float m;
Float r0;
Float r1;
- m = SIMD::load1_ps(pMatrix + 0*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 0*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 0*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 0*4 + 3); // m[row][3]
- r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 3); // m[row][3]
+ r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
result[0] = r0;
- m = SIMD::load1_ps(pMatrix + 1*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 1*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 1*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 1*4 + 3); // m[row][3]
- r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 3); // m[row][3]
+ r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
result[1] = r0;
- m = SIMD::load1_ps(pMatrix + 2*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 2*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 2*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 2*4 + 3); // m[row][3]
- r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 3); // m[row][3]
+ r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
result[2] = r0;
- m = SIMD::load1_ps(pMatrix + 3*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 3*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 3*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 3*4 + 3); // m[row][3]
- r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 3); // m[row][3]
+ r1 = SIMD::mul_ps(m, v[3]); // (m3 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
result[3] = r0;
}
// outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * 0)
// outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * 0)
// outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * 0)
- static SIMDINLINE
- void SIMDCALL mat3x3_vec3_w0_multiply(
- Vec4& result,
- const float *pMatrix,
- const Vec4& v)
+ static SIMDINLINE void SIMDCALL mat3x3_vec3_w0_multiply(Vec4& result,
+ const float* pMatrix,
+ const Vec4& v)
{
Float m;
Float r0;
Float r1;
- m = SIMD::load1_ps(pMatrix + 0*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 0*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 0*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
result[0] = r0;
- m = SIMD::load1_ps(pMatrix + 1*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 1*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 1*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
result[1] = r0;
- m = SIMD::load1_ps(pMatrix + 2*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 2*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 2*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
result[2] = r0;
result[3] = SIMD::setzero_ps();
// outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * 1)
// outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * 1)
// outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * 1)
- static SIMDINLINE
- void SIMDCALL mat4x4_vec3_w1_multiply(
- Vec4& result,
- const float *pMatrix,
- const Vec4& v)
+ static SIMDINLINE void SIMDCALL mat4x4_vec3_w1_multiply(Vec4& result,
+ const float* pMatrix,
+ const Vec4& v)
{
Float m;
Float r0;
Float r1;
- m = SIMD::load1_ps(pMatrix + 0*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 0*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 0*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 0*4 + 3); // m[row][3]
- r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 3); // m[row][3]
+ r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
result[0] = r0;
- m = SIMD::load1_ps(pMatrix + 1*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 1*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 1*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 1*4 + 3); // m[row][3]
- r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 3); // m[row][3]
+ r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
result[1] = r0;
- m = SIMD::load1_ps(pMatrix + 2*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 2*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 2*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 2*4 + 3); // m[row][3]
- r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 3); // m[row][3]
+ r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
result[2] = r0;
- m = SIMD::load1_ps(pMatrix + 3*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 3*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 3*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 3*4 + 3); // m[row][3]
- result[3] = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 3 * 4 + 3); // m[row][3]
+ result[3] = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
}
- static SIMDINLINE
- void SIMDCALL mat4x3_vec3_w1_multiply(
- Vec4& result,
- const float *pMatrix,
- const Vec4& v)
+ static SIMDINLINE void SIMDCALL mat4x3_vec3_w1_multiply(Vec4& result,
+ const float* pMatrix,
+ const Vec4& v)
{
Float m;
Float r0;
Float r1;
- m = SIMD::load1_ps(pMatrix + 0*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 0*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 0*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 0*4 + 3); // m[row][3]
- r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 0 * 4 + 3); // m[row][3]
+ r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
result[0] = r0;
- m = SIMD::load1_ps(pMatrix + 1*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 1*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 1*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 1*4 + 3); // m[row][3]
- r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 1 * 4 + 3); // m[row][3]
+ r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
result[1] = r0;
- m = SIMD::load1_ps(pMatrix + 2*4 + 0); // m[row][0]
- r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
- m = SIMD::load1_ps(pMatrix + 2*4 + 1); // m[row][1]
- r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
- m = SIMD::load1_ps(pMatrix + 2*4 + 2); // m[row][2]
- r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
- r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
- m = SIMD::load1_ps(pMatrix + 2*4 + 3); // m[row][3]
- r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+ r0 = SIMD::mul_ps(m, v[0]); // (m00 * v.x)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+ r1 = SIMD::mul_ps(m, v[1]); // (m1 * v.y)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+ r1 = SIMD::mul_ps(m, v[2]); // (m2 * v.z)
+ r0 = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+ m = SIMD::load1_ps(pMatrix + 2 * 4 + 3); // m[row][3]
+ r0 = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
result[2] = r0;
result[3] = SIMD::set1_ps(1.0f);
}
using SIMD256 = SIMDBase<SIMDImpl::SIMD256Impl::Traits>;
using SIMD512 = SIMDBase<SIMDImpl::SIMD512Impl::Traits>;
-template <typename SIMD_T> using CompareType = typename SIMD_T::CompareType;
-template <typename SIMD_T> using ScaleFactor = typename SIMD_T::ScaleFactor;
-template <typename SIMD_T> using RoundMode = typename SIMD_T::RoundMode;
-template <typename SIMD_T> using Float = typename SIMD_T::Float;
-template <typename SIMD_T> using Double = typename SIMD_T::Double;
-template <typename SIMD_T> using Integer = typename SIMD_T::Integer;
-template <typename SIMD_T> using Vec4 = typename SIMD_T::Vec4;
-template <typename SIMD_T> using Mask = typename SIMD_T::Mask;
+template <typename SIMD_T>
+using CompareType = typename SIMD_T::CompareType;
+template <typename SIMD_T>
+using ScaleFactor = typename SIMD_T::ScaleFactor;
+template <typename SIMD_T>
+using RoundMode = typename SIMD_T::RoundMode;
+template <typename SIMD_T>
+using Float = typename SIMD_T::Float;
+template <typename SIMD_T>
+using Double = typename SIMD_T::Double;
+template <typename SIMD_T>
+using Integer = typename SIMD_T::Integer;
+template <typename SIMD_T>
+using Vec4 = typename SIMD_T::Vec4;
+template <typename SIMD_T>
+using Mask = typename SIMD_T::Mask;
template <typename SIMD_T>
struct SIMDVecEqual
{
- INLINE bool operator () (Integer<SIMD_T> a, Integer<SIMD_T> b) const
+ INLINE bool operator()(Integer<SIMD_T> a, Integer<SIMD_T> b) const
{
Integer<SIMD_T> c = SIMD_T::xor_si(a, b);
return SIMD_T::testz_si(c, c);
}
- INLINE bool operator () (Float<SIMD_T> a, Float<SIMD_T> b) const
+ INLINE bool operator()(Float<SIMD_T> a, Float<SIMD_T> b) const
{
return this->operator()(SIMD_T::castps_si(a), SIMD_T::castps_si(b));
}
- INLINE bool operator () (Double<SIMD_T> a, Double<SIMD_T> b) const
+ INLINE bool operator()(Double<SIMD_T> a, Double<SIMD_T> b) const
{
return this->operator()(SIMD_T::castpd_si(a), SIMD_T::castpd_si(b));
}
template <typename SIMD_T>
struct SIMDVecHash
{
- INLINE uint32_t operator ()(Integer<SIMD_T> val) const
+ INLINE uint32_t operator()(Integer<SIMD_T> val) const
{
#if defined(_WIN64) || !defined(_WIN32) // assume non-Windows is always 64-bit
static_assert(sizeof(void*) == 8, "This path only meant for 64-bit code");
- uint64_t crc32 = 0;
- const uint64_t *pData = reinterpret_cast<const uint64_t*>(&val);
+ uint64_t crc32 = 0;
+ const uint64_t* pData = reinterpret_cast<const uint64_t*>(&val);
static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
static_assert(sizeof(void*) == 4, "This path only meant for 32-bit code");
uint32_t crc32 = 0;
- const uint32_t *pData = reinterpret_cast<const uint32_t*>(&val);
+ const uint32_t* pData = reinterpret_cast<const uint32_t*>(&val);
static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
#endif
};
- INLINE uint32_t operator ()(Float<SIMD_T> val) const
+ INLINE uint32_t operator()(Float<SIMD_T> val) const
{
return operator()(SIMD_T::castps_si(val));
};
- INLINE uint32_t operator ()(Double<SIMD_T> val) const
+ INLINE uint32_t operator()(Double<SIMD_T> val) const
{
return operator()(SIMD_T::castpd_si(val));
}
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// SIMD128 AVX (1) implementation
//============================================================================
-#define SIMD_WRAPPER_1(op) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return _mm_##op(a);\
- }
+#define SIMD_WRAPPER_1(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a) { return _mm_##op(a); }
-#define SIMD_WRAPPER_2(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm_##op(a, b);\
- }
+#define SIMD_WRAPPER_2(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm_##op(a, b); }
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm_##op(a, b);\
- }
+#define SIMD_DWRAPPER_2(op) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm_##op(a, b); }
-#define SIMD_WRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm_##op(a, b, ImmT);\
+#define SIMD_WRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return _mm_##op(a, b, ImmT); \
}
-#define SIMD_DWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm_##op(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return _mm_##op(a, b, ImmT); \
}
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return _mm_##op(a, b, c);\
- }
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm_##op(a, b, c); }
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return _mm_##op(a);\
- }
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm_##op(a); }
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return intrin(a, ImmT); \
}
#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm_##op)
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return intrin(a, b);\
- }
+#define SIMD_IWRAPPER_2_(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return intrin(a, b); }
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm_##op(a, b);\
- }
+#define SIMD_IWRAPPER_2(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm_##op(a, b); }
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return castps_si( intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return castps_si(intrin(castsi_ps(a), castsi_ps(b))); \
}
-#define SIMD_IWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm_##op(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return _mm_##op(a, b, ImmT); \
}
//-----------------------------------------------------------------------
// Single precision floating point arithmetic operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
+SIMD_WRAPPER_2(add_ps); // return a + b
+SIMD_WRAPPER_2(div_ps); // return a / b
+SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps); // return a * b
+SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
+SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps); // return a - b
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float a, Float b, Float c) // return (a * b) + c
+static SIMDINLINE Float SIMDCALL fmadd_ps(Float a, Float b, Float c) // return (a * b) + c
{
return add_ps(mul_ps(a, b), c);
}
-static SIMDINLINE Float SIMDCALL fmsub_ps(Float a, Float b, Float c) // return (a * b) - c
+static SIMDINLINE Float SIMDCALL fmsub_ps(Float a, Float b, Float c) // return (a * b) - c
{
return sub_ps(mul_ps(a, b), c);
}
return _mm_round_ps(a, static_cast<int>(RMT));
}
-static SIMDINLINE Float SIMDCALL ceil_ps(Float a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float a)
+{
+ return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float a)
+{
+ return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
//-----------------------------------------------------------------------
// Integer (various width) arithmetic operations
SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
//-----------------------------------------------------------------------
// Logical operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
-SIMD_IWRAPPER_2_(and_si, _mm_and_si128); // return a & b (int)
-SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
-SIMD_IWRAPPER_2_(andnot_si, _mm_andnot_si128); // return (~a) & b (int)
-SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
-SIMD_IWRAPPER_2_(or_si, _mm_or_si128); // return a | b (int)
-SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-SIMD_IWRAPPER_2_(xor_si, _mm_xor_si128); // return a ^ b (int)
-
+SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
+SIMD_IWRAPPER_2_(and_si, _mm_and_si128); // return a & b (int)
+SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
+SIMD_IWRAPPER_2_(andnot_si, _mm_andnot_si128); // return (~a) & b (int)
+SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
+SIMD_IWRAPPER_2_(or_si, _mm_or_si128); // return a | b (int)
+SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
+SIMD_IWRAPPER_2_(xor_si, _mm_xor_si128); // return a ^ b (int)
//-----------------------------------------------------------------------
// Shift operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_1I(slli_epi64); // return a << ImmT
+SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_1I(slli_epi64); // return a << ImmT
static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer vA, Integer vB) // return a << b (uint32)
{
int32_t a, count;
- a = _mm_extract_epi32(vA, 0);
+ a = _mm_extract_epi32(vA, 0);
count = _mm_extract_epi32(vB, 0);
a <<= count;
vA = _mm_insert_epi32(vA, a, 0);
- a = _mm_extract_epi32(vA, 1);
+ a = _mm_extract_epi32(vA, 1);
count = _mm_extract_epi32(vB, 1);
a <<= count;
vA = _mm_insert_epi32(vA, a, 1);
- a = _mm_extract_epi32(vA, 2);
+ a = _mm_extract_epi32(vA, 2);
count = _mm_extract_epi32(vB, 2);
a <<= count;
vA = _mm_insert_epi32(vA, a, 2);
- a = _mm_extract_epi32(vA, 3);
+ a = _mm_extract_epi32(vA, 3);
count = _mm_extract_epi32(vB, 3);
a <<= count;
vA = _mm_insert_epi32(vA, a, 3);
return _mm_srl_epi64(a, n);
}
-template<int ImmT> // same as srli_si, but with Float cast to int
+template <int ImmT> // same as srli_si, but with Float cast to int
static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
{
return castsi_ps(srli_si<ImmT>(castps_si(a)));
static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vB) // return a >> b (uint32)
{
int32_t a, count;
- a = _mm_extract_epi32(vA, 0);
+ a = _mm_extract_epi32(vA, 0);
count = _mm_extract_epi32(vB, 0);
a >>= count;
vA = _mm_insert_epi32(vA, a, 0);
- a = _mm_extract_epi32(vA, 1);
+ a = _mm_extract_epi32(vA, 1);
count = _mm_extract_epi32(vB, 1);
a >>= count;
vA = _mm_insert_epi32(vA, a, 1);
- a = _mm_extract_epi32(vA, 2);
+ a = _mm_extract_epi32(vA, 2);
count = _mm_extract_epi32(vB, 2);
a >>= count;
vA = _mm_insert_epi32(vA, a, 2);
- a = _mm_extract_epi32(vA, 3);
+ a = _mm_extract_epi32(vA, 3);
count = _mm_extract_epi32(vB, 3);
a >>= count;
vA = _mm_insert_epi32(vA, a, 3);
return vA;
}
-
-
//-----------------------------------------------------------------------
// Conversion operations
//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
{
return _mm_castpd_ps(a);
}
-static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
{
return _mm_castps_si128(a);
}
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
{
return _mm_castsi128_pd(a);
}
-static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
{
return _mm_castps_pd(a);
}
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
{
return _mm_castsi128_ps(a);
}
return _mm_cvtsi32_si128(n);
}
-SIMD_IWRAPPER_1(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-SIMD_IWRAPPER_1(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
-SIMD_IWRAPPER_1(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
-SIMD_IWRAPPER_1(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
-SIMD_IWRAPPER_1(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
+SIMD_IWRAPPER_1(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
+SIMD_IWRAPPER_1(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
+SIMD_IWRAPPER_1(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
+SIMD_IWRAPPER_1(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
+SIMD_IWRAPPER_1(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32)
+static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32)
{
return _mm_cvtps_epi32(a);
}
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32)
{
return _mm_cvttps_epi32(a);
}
//-----------------------------------------------------------------------
// Comparison operations
//-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
+template <CompareType CmpTypeT>
static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b
{
return _mm_cmp_ps(a, b, static_cast<const int>(CmpTypeT));
}
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::LE_OQ>(a, b);
+}
-SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
+SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
+SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
-static SIMDINLINE bool SIMDCALL testz_ps(Float a, Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL testz_ps(Float a,
+ Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
{
- return 0 != _mm_testz_ps(a, b);
+ return 0 != _mm_testz_ps(a, b);
}
-static SIMDINLINE bool SIMDCALL testz_si(Integer a, Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL testz_si(Integer a,
+ Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
{
- return 0 != _mm_testz_si128(a, b);
+ return 0 != _mm_testz_si128(a, b);
}
//-----------------------------------------------------------------------
// Blend / shuffle / permute operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
-SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
+SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
+SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Float mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+ Integer b,
+ Float mask) // return mask ? b : a (int)
{
return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
}
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Integer mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+ Integer b,
+ Integer mask) // return mask ? b : a (int)
{
return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
}
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p) // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+ broadcast_ss(float const* p) // return *p (all elements in vector get same value)
{
return _mm_broadcast_ss(p);
}
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm_packus_epi32 and _mm512_packus_epi32
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Integer SIMDCALL
+ permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
return castps_si(_mm_permutevar_ps(castsi_ps(a), swiz));
}
-static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+ permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
return _mm_permutevar_ps(a, swiz);
}
SIMD_IWRAPPER_1I(shuffle_epi32);
-template<int ImmT>
+template <int ImmT>
static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b) = delete;
SIMD_IWRAPPER_2(shuffle_epi8);
SIMD_WRAPPER_2I(shuffle_ps);
SIMD_IWRAPPER_2(unpackhi_epi16);
-//SIMD_IFWRAPPER_2(unpackhi_epi32, _mm_unpackhi_ps);
+// SIMD_IFWRAPPER_2(unpackhi_epi32, _mm_unpackhi_ps);
static SIMDINLINE Integer SIMDCALL unpackhi_epi32(Integer a, Integer b)
{
return castps_si(_mm_unpackhi_ps(castsi_ps(a), castsi_ps(b)));
//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
- uint32_t *pOffsets = (uint32_t*)&idx;
- Float vResult;
- float* pResult = (float*)&vResult;
+ uint32_t* pOffsets = (uint32_t*)&idx;
+ Float vResult;
+ float* pResult = (float*)&vResult;
for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
{
uint32_t offset = pOffsets[i];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[i] = *(float const*)(((uint8_t const*)p + offset));
+ offset = offset * static_cast<uint32_t>(ScaleT);
+ pResult[i] = *(float const*)(((uint8_t const*)p + offset));
}
return vResult;
}
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+ load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
{
return broadcast_ss(p);
}
-static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+ load_ps(float const* p) // return *p (loads SIMD width elements from memory)
{
return _mm_load_ps(p);
}
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
{
return _mm_load_si128(&p->v);
}
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+ loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
{
return _mm_loadu_ps(p);
}
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+ loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
{
return _mm_lddqu_si128(&p->v);
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
-{
- uint32_t *pOffsets = (uint32_t*)&idx;
- Float vResult = old;
- float* pResult = (float*)&vResult;
- DWORD index;
- uint32_t umask = movemask_ps(mask);
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+{
+ uint32_t* pOffsets = (uint32_t*)&idx;
+ Float vResult = old;
+ float* pResult = (float*)&vResult;
+ DWORD index;
+ uint32_t umask = movemask_ps(mask);
while (_BitScanForward(&index, umask))
{
umask &= ~(1 << index);
uint32_t offset = pOffsets[index];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[index] = *(float const *)(((uint8_t const *)p + offset));
+ offset = offset * static_cast<uint32_t>(ScaleT);
+ pResult[index] = *(float const*)(((uint8_t const*)p + offset));
}
return vResult;
}
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
{
_mm_maskstore_ps(p, mask, src);
}
return _mm_set1_epi8(i);
}
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
{
return _mm_set1_ps(f);
}
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
{
return _mm_setzero_ps();
}
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
{
return _mm_setzero_si128();
}
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+ store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
{
_mm_store_ps(p, a);
}
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
{
_mm_store_si128(&p->v, a);
}
-static SIMDINLINE void SIMDCALL storeu_si(Integer *p, Integer a) // *p = a (same as store_si but allows for unaligned mem)
+static SIMDINLINE void SIMDCALL
+ storeu_si(Integer* p, Integer a) // *p = a (same as store_si but allows for unaligned mem)
{
_mm_storeu_si128(&p->v, a);
}
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+ stream_ps(float* p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache)
{
_mm_stream_ps(p, a);
}
static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
{
- Integer vec = set1_epi32(mask);
- const Integer bit = set_epi32(
- 0x08, 0x04, 0x02, 0x01);
- vec = and_si(vec, bit);
- vec = cmplt_epi32(setzero_si(), vec);
+ Integer vec = set1_epi32(mask);
+ const Integer bit = set_epi32(0x08, 0x04, 0x02, 0x01);
+ vec = and_si(vec, bit);
+ vec = cmplt_epi32(setzero_si(), vec);
return castsi_ps(vec);
}
#undef SIMD_IWRAPPER_2
#undef SIMD_IWRAPPER_2_
#undef SIMD_IWRAPPER_2I
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX2_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// Only 2 shifts and 2 gathers were introduced with AVX 2
// Also, add native support for FMA operations
//============================================================================
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return _mm_##op(a, b, c);\
- }
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm_##op(a, b, c); }
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
+SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer vA, Integer vB) // return a << b (uint32)
{
return _mm_srlv_epi32(vA, vB);
}
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
return _mm_i32gather_ps(p, idx, static_cast<const int>(ScaleT));
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
{
return _mm_mask_i32gather_ps(old, p, idx, mask, static_cast<const int>(ScaleT));
}
#undef SIMD_WRAPPER_3
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
//============================================================================
private:
- static SIMDINLINE __m512 __conv(Float r) { return _mm512_castps128_ps512(r.v); }
- static SIMDINLINE __m512d __conv(Double r) { return _mm512_castpd128_pd512(r.v); }
- static SIMDINLINE __m512i __conv(Integer r) { return _mm512_castsi128_si512(r.v); }
- static SIMDINLINE Float __conv(__m512 r) { return _mm512_castps512_ps128(r); }
- static SIMDINLINE Double __conv(__m512d r) { return _mm512_castpd512_pd128(r); }
- static SIMDINLINE Integer __conv(__m512i r) { return _mm512_castsi512_si128(r); }
-public:
+static SIMDINLINE __m512 __conv(Float r)
+{
+ return _mm512_castps128_ps512(r.v);
+}
+static SIMDINLINE __m512d __conv(Double r)
+{
+ return _mm512_castpd128_pd512(r.v);
+}
+static SIMDINLINE __m512i __conv(Integer r)
+{
+ return _mm512_castsi128_si512(r.v);
+}
+static SIMDINLINE Float __conv(__m512 r)
+{
+ return _mm512_castps512_ps128(r);
+}
+static SIMDINLINE Double __conv(__m512d r)
+{
+ return _mm512_castpd512_pd128(r);
+}
+static SIMDINLINE Integer __conv(__m512i r)
+{
+ return _mm512_castsi512_si128(r);
+}
-#define SIMD_WRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+public:
+#define SIMD_WRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
-#define SIMD_WRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_WRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
-#define SIMD_WRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_WRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
-#define SIMD_WRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_WRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
}
-#define SIMD_WRAPPER_3_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\
+#define SIMD_WRAPPER_3_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
}
-#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
-#define SIMD_DWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT));\
+#define SIMD_DWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT)); \
}
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xf))
+#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xf))
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xf))
+#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xf))
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xf))
+#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xf))
-#define SIMD_IWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
}
//-----------------------------------------------------------------------
// Single precision floating point arithmetic operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
+SIMD_WRAPPER_2(add_ps); // return a + b
+SIMD_WRAPPER_2(div_ps); // return a / b
+SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
+SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps); // return a * b
SIMD_WRAPPER_1_(rcp_ps, rcp14_ps, __mmask16(0xf)); // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xf)); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
+SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xf)); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps); // return a - b
//-----------------------------------------------------------------------
// Integer (various width) arithmetic operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
+SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
+SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
+SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
// SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
// return (a * b) & 0xFFFFFFFF
//
// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
// and store the low 32 bits of the intermediate integers in dst.
SIMD_IWRAPPER_2_32(mullo_epi32);
-SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
+SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
// SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
// SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
//-----------------------------------------------------------------------
// Logical operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xf)); // return a & b (int)
+SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xf)); // return a & b (int)
SIMD_IWRAPPER_2_(andnot_si, andnot_epi32, __mmask16(0xf)); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xf)); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xf)); // return a ^ b (int)
-
+SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xf)); // return a | b (int)
+SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xf)); // return a ^ b (int)
//-----------------------------------------------------------------------
// Shift operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_2_32(sllv_epi32); // return a << b (uint32)
-SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT (uint32)
-SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32)
+SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_2_32(sllv_epi32); // return a << b (uint32)
+SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT (int32)
+SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT (uint32)
+SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32)
// use AVX2 version
-//SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
+// SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
//-----------------------------------------------------------------------
// Conversion operations (Use AVX2 versions)
//-----------------------------------------------------------------------
// Comparison operations (Use AVX2 versions
//-----------------------------------------------------------------------
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi8); // return a == b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi16); // return a == b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi32); // return a == b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi64); // return a == b (int64)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi8,); // return a > b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi16); // return a > b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi32); // return a > b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi64); // return a > b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi8); // return a == b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi16); // return a == b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi32); // return a == b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi64); // return a == b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi8,); // return a > b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi16); // return a > b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi32); // return a > b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi64); // return a > b (int64)
//
-//static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b) // return a < b (int32)
+// static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b) // return a < b (int32)
//{
// return cmpgt_epi32(b, a);
//}
//-----------------------------------------------------------------------
// Blend / shuffle / permute operations
//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-// SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-// SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-// SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
-
-//static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16
+// and _mm512_packs_epi16 SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation
+// for _mm256_packs_epi32 and _mm512_packs_epi32 SIMD_IWRAPPER_2_8(packus_epi16); // uint16 -->
+// uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for
+// _mm256_packus_epi32 and _mm512_packus_epi32 SIMD_IWRAPPER_2_(permute_epi32,
+// permutevar8x32_epi32);
+
+// static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for
+// each 32-bit lane i (float)
//{
// return _mm256_permutevar8x32_ps(a, swiz);
//}
SIMD_IWRAPPER_1I_32(shuffle_epi32);
-//template<int ImmT>
-//static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
+// template<int ImmT>
+// static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
//{
// return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
//}
-//SIMD_IWRAPPER_2(shuffle_epi8);
+// SIMD_IWRAPPER_2(shuffle_epi8);
SIMD_IWRAPPER_2_32(unpackhi_epi32);
SIMD_IWRAPPER_2_32(unpacklo_epi32);
//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+ load_ps(float const* p) // return *p (loads SIMD width elements from memory)
{
return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p));
}
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
{
return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p));
}
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+ loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
{
return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p));
}
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+ loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
{
return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p));
}
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
return __conv(_mm512_mask_i32gather_ps(
- _mm512_setzero_ps(),
- __mmask16(0xf),
- __conv(idx),
- p,
- static_cast<int>(ScaleT)));
+ _mm512_setzero_ps(), __mmask16(0xf), __conv(idx), p, static_cast<int>(ScaleT)));
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
{
__mmask16 m = 0xf;
- m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)),
- _mm512_set1_epi32(0x80000000));
- return __conv(_mm512_mask_i32gather_ps(
- __conv(old),
- m,
- __conv(idx),
- p,
- static_cast<int>(ScaleT)));
+ m = _mm512_mask_test_epi32_mask(
+ m, _mm512_castps_si512(__conv(mask)), _mm512_set1_epi32(0x80000000));
+ return __conv(
+ _mm512_mask_i32gather_ps(__conv(old), m, __conv(idx), p, static_cast<int>(ScaleT)));
}
// static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
// _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
// }
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
{
__mmask16 m = 0xf;
- m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
+ m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
_mm512_mask_storeu_ps(p, m, __conv(src));
}
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+ store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
{
_mm512_mask_storeu_ps(p, __mmask16(0xf), __conv(a));
}
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
{
_mm512_mask_storeu_epi32(p, __mmask16(0xf), __conv(a));
}
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// register set.
//============================================================================
-#define SIMD_WRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_WRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
-#define SIMD_WRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_WRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
-#define SIMD_WRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_WRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
-#define SIMD_WRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_WRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
}
-#define SIMD_WRAPPER_3_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\
+#define SIMD_WRAPPER_3_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
}
-#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
-#define SIMD_DWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_DWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3))
+#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3))
-#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3))
+#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3))
-#define SIMD_DWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_DWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3))
+#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3))
-#define SIMD_DWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT));\
+#define SIMD_DWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT)); \
}
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
}
-SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
+SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
+SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and
+ // _mm512_packs_epi16
+SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and
+ // _mm512_packs_epi32
+SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and
+ // _mm512_packus_epi16
+SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and
+ // _mm512_packus_epi32
SIMD_IWRAPPER_2_16(unpackhi_epi16);
SIMD_IWRAPPER_2_64(unpackhi_epi64);
SIMD_IWRAPPER_2_8(unpackhi_epi8);
static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
{
__mmask64 m = 0xffffull;
- return static_cast<uint32_t>(
- _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
+ return static_cast<uint32_t>(_mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
}
#undef SIMD_WRAPPER_1_
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// These use native AVX512 instructions with masking to enable a larger
// register set.
//============================================================================
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// SIMD256 AVX (1) implementation
//============================================================================
-#define SIMD_WRAPPER_1(op) \
- static SIMDINLINE Float SIMDCALL op(Float const &a) \
- {\
- return _mm256_##op(a);\
- }
+#define SIMD_WRAPPER_1(op) \
+ static SIMDINLINE Float SIMDCALL op(Float const& a) { return _mm256_##op(a); }
-#define SIMD_WRAPPER_2(op) \
- static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \
- {\
- return _mm256_##op(a, b);\
+#define SIMD_WRAPPER_2(op) \
+ static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+ { \
+ return _mm256_##op(a, b); \
}
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double const &a, Double const &b) \
- {\
- return _mm256_##op(a, b);\
+#define SIMD_DWRAPPER_2(op) \
+ static SIMDINLINE Double SIMDCALL op(Double const& a, Double const& b) \
+ { \
+ return _mm256_##op(a, b); \
}
-#define SIMD_WRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \
- {\
- return _mm256_##op(a, b, ImmT);\
+#define SIMD_WRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+ { \
+ return _mm256_##op(a, b, ImmT); \
}
-#define SIMD_DWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double const &a, Double const &b) \
- {\
- return _mm256_##op(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double const& a, Double const& b) \
+ { \
+ return _mm256_##op(a, b, ImmT); \
}
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b, Float const &c) \
- {\
- return _mm256_##op(a, b, c);\
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b, Float const& c) \
+ { \
+ return _mm256_##op(a, b, c); \
}
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a) \
- {\
- return _mm256_##op(a);\
- }
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) { return _mm256_##op(a); }
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return _mm256_##op(a, b);\
+#define SIMD_IWRAPPER_2(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return _mm256_##op(a, b); \
}
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return castps_si( intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return castps_si(intrin(castsi_ps(a), castsi_ps(b))); \
}
-#define SIMD_IFWRAPPER_2I(op, intrin) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return castps_si( intrin(castsi_ps(a), castsi_ps(b), ImmT) );\
+#define SIMD_IFWRAPPER_2I(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return castps_si(intrin(castsi_ps(a), castsi_ps(b), ImmT)); \
}
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return _mm256_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return _mm256_##intrin(a, b, ImmT); \
}
-#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
+#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
-#define SIMD_IWRAPPER_3(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b, Integer const &c) \
- {\
- return _mm256_##op(a, b, c);\
+#define SIMD_IWRAPPER_3(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b, Integer const& c) \
+ { \
+ return _mm256_##op(a, b, c); \
}
// emulated integer simd
-#define SIMD_EMU_IWRAPPER_1(op) \
- static SIMDINLINE \
- Integer SIMDCALL op(Integer const &a)\
- {\
- return Integer\
- {\
- SIMD128T::op(a.v4[0]),\
- SIMD128T::op(a.v4[1]),\
- };\
+#define SIMD_EMU_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+ { \
+ return Integer{ \
+ SIMD128T::op(a.v4[0]), \
+ SIMD128T::op(a.v4[1]), \
+ }; \
}
-#define SIMD_EMU_IWRAPPER_1L(op, shift) \
- static SIMDINLINE \
- Integer SIMDCALL op(Integer const &a)\
- {\
- return Integer \
- {\
- SIMD128T::op(a.v4[0]), \
- SIMD128T::op(SIMD128T::template srli_si<shift>(a.v4[0])), \
- };\
- }\
- static SIMDINLINE \
- Integer SIMDCALL op(SIMD128Impl::Integer const &a)\
- {\
- return Integer \
- {\
- SIMD128T::op(a), \
- SIMD128T::op(SIMD128T::template srli_si<shift>(a)), \
- };\
+#define SIMD_EMU_IWRAPPER_1L(op, shift) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+ { \
+ return Integer{ \
+ SIMD128T::op(a.v4[0]), \
+ SIMD128T::op(SIMD128T::template srli_si<shift>(a.v4[0])), \
+ }; \
+ } \
+ static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer const& a) \
+ { \
+ return Integer{ \
+ SIMD128T::op(a), \
+ SIMD128T::op(SIMD128T::template srli_si<shift>(a)), \
+ }; \
}
-#define SIMD_EMU_IWRAPPER_1I(op) \
- template <int ImmT> static SIMDINLINE \
- Integer SIMDCALL op(Integer const &a)\
- {\
- return Integer\
- {\
- SIMD128T::template op<ImmT>(a.v4[0]),\
- SIMD128T::template op<ImmT>(a.v4[1]),\
- };\
+#define SIMD_EMU_IWRAPPER_1I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+ { \
+ return Integer{ \
+ SIMD128T::template op<ImmT>(a.v4[0]), \
+ SIMD128T::template op<ImmT>(a.v4[1]), \
+ }; \
}
-#define SIMD_EMU_IWRAPPER_2(op) \
- static SIMDINLINE \
- Integer SIMDCALL op(Integer const &a, Integer const &b)\
- {\
- return Integer\
- {\
- SIMD128T::op(a.v4[0], b.v4[0]),\
- SIMD128T::op(a.v4[1], b.v4[1]),\
- };\
+#define SIMD_EMU_IWRAPPER_2(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return Integer{ \
+ SIMD128T::op(a.v4[0], b.v4[0]), \
+ SIMD128T::op(a.v4[1], b.v4[1]), \
+ }; \
}
-#define SIMD_EMU_IWRAPPER_2I(op) \
- template <int ImmT> static SIMDINLINE \
- Integer SIMDCALL op(Integer const &a, Integer const &b)\
- {\
- return Integer\
- {\
- SIMD128T::template op<ImmT>(a.v4[0], b.v[0]),\
- SIMD128T::template op<ImmT>(a.v4[1], b.v[1]),\
- };\
+#define SIMD_EMU_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return Integer{ \
+ SIMD128T::template op<ImmT>(a.v4[0], b.v[0]), \
+ SIMD128T::template op<ImmT>(a.v4[1], b.v[1]), \
+ }; \
}
//-----------------------------------------------------------------------
// Single precision floating point arithmetic operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
+SIMD_WRAPPER_2(add_ps); // return a + b
+SIMD_WRAPPER_2(div_ps); // return a / b
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float const &a, Float const &b, Float const &c) // return (a * b) + c
+static SIMDINLINE Float SIMDCALL fmadd_ps(Float const& a,
+ Float const& b,
+ Float const& c) // return (a * b) + c
{
return add_ps(mul_ps(a, b), c);
}
-static SIMDINLINE Float SIMDCALL fmsub_ps(Float const &a, Float const &b, Float const &c) // return (a * b) - c
+static SIMDINLINE Float SIMDCALL fmsub_ps(Float const& a,
+ Float const& b,
+ Float const& c) // return (a * b) - c
{
return sub_ps(mul_ps(a, b), c);
}
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
+SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps); // return a * b
+SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
+SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps); // return a - b
template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float const &a)
+static SIMDINLINE Float SIMDCALL round_ps(Float const& a)
{
return _mm256_round_ps(a, static_cast<int>(RMT));
}
-static SIMDINLINE Float SIMDCALL ceil_ps(Float const &a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float const& a)
+{
+ return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float const& a)
+{
+ return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
//-----------------------------------------------------------------------
// Integer (various width) arithmetic operations
SIMD_EMU_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
SIMD_EMU_IWRAPPER_2(add_epi32); // return a + b (int32)
SIMD_EMU_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_EMU_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_EMU_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
SIMD_EMU_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
SIMD_EMU_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
SIMD_EMU_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
SIMD_EMU_IWRAPPER_2(xor_si); // return a ^ b (int)
-
//-----------------------------------------------------------------------
// Shift operations
//-----------------------------------------------------------------------
-SIMD_EMU_IWRAPPER_1I(slli_epi32); // return a << ImmT
+SIMD_EMU_IWRAPPER_1I(slli_epi32); // return a << ImmT
-static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer const &vA, Integer const &vCount) // return a << b (uint32)
+static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer const& vA,
+ Integer const& vCount) // return a << b (uint32)
{
int32_t aHi, aLow, countHi, countLow;
- __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
- __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
- __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
+ __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
+ __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
+ __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
__m128i vCountLow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 0));
- aHi = _mm_extract_epi32(vAHi, 0);
+ aHi = _mm_extract_epi32(vAHi, 0);
countHi = _mm_extract_epi32(vCountHi, 0);
aHi <<= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 0);
- aLow = _mm_extract_epi32(vALow, 0);
+ aLow = _mm_extract_epi32(vALow, 0);
countLow = _mm_extract_epi32(vCountLow, 0);
aLow <<= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 0);
- aHi = _mm_extract_epi32(vAHi, 1);
+ aHi = _mm_extract_epi32(vAHi, 1);
countHi = _mm_extract_epi32(vCountHi, 1);
aHi <<= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 1);
- aLow = _mm_extract_epi32(vALow, 1);
+ aLow = _mm_extract_epi32(vALow, 1);
countLow = _mm_extract_epi32(vCountLow, 1);
aLow <<= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 1);
- aHi = _mm_extract_epi32(vAHi, 2);
+ aHi = _mm_extract_epi32(vAHi, 2);
countHi = _mm_extract_epi32(vCountHi, 2);
aHi <<= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 2);
- aLow = _mm_extract_epi32(vALow, 2);
+ aLow = _mm_extract_epi32(vALow, 2);
countLow = _mm_extract_epi32(vCountLow, 2);
aLow <<= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 2);
- aHi = _mm_extract_epi32(vAHi, 3);
+ aHi = _mm_extract_epi32(vAHi, 3);
countHi = _mm_extract_epi32(vCountHi, 3);
aHi <<= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 3);
- aLow = _mm_extract_epi32(vALow, 3);
+ aLow = _mm_extract_epi32(vALow, 3);
countLow = _mm_extract_epi32(vCountLow, 3);
aLow <<= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 3);
__m256i ret = _mm256_set1_epi32(0);
- ret = _mm256_insertf128_si256(ret, vAHi, 1);
- ret = _mm256_insertf128_si256(ret, vALow, 0);
+ ret = _mm256_insertf128_si256(ret, vAHi, 1);
+ ret = _mm256_insertf128_si256(ret, vALow, 0);
return ret;
}
-SIMD_EMU_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
-SIMD_EMU_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
-SIMD_EMU_IWRAPPER_1I(srli_si); // return a >> (ImmT*8) (uint)
+SIMD_EMU_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
+SIMD_EMU_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
+SIMD_EMU_IWRAPPER_1I(srli_si); // return a >> (ImmT*8) (uint)
-template<int ImmT> // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a)
+template <int ImmT> // same as srli_si, but with Float cast to int
+static SIMDINLINE Float SIMDCALL srlisi_ps(Float const& a)
{
return castsi_ps(srli_si<ImmT>(castps_si(a)));
}
-static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer const &vA, Integer const &vCount) // return a >> b (uint32)
+static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer const& vA,
+ Integer const& vCount) // return a >> b (uint32)
{
int32_t aHi, aLow, countHi, countLow;
- __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
- __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
- __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
+ __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
+ __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
+ __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
__m128i vCountLow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 0));
- aHi = _mm_extract_epi32(vAHi, 0);
+ aHi = _mm_extract_epi32(vAHi, 0);
countHi = _mm_extract_epi32(vCountHi, 0);
aHi >>= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 0);
- aLow = _mm_extract_epi32(vALow, 0);
+ aLow = _mm_extract_epi32(vALow, 0);
countLow = _mm_extract_epi32(vCountLow, 0);
aLow >>= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 0);
- aHi = _mm_extract_epi32(vAHi, 1);
+ aHi = _mm_extract_epi32(vAHi, 1);
countHi = _mm_extract_epi32(vCountHi, 1);
aHi >>= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 1);
- aLow = _mm_extract_epi32(vALow, 1);
+ aLow = _mm_extract_epi32(vALow, 1);
countLow = _mm_extract_epi32(vCountLow, 1);
aLow >>= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 1);
- aHi = _mm_extract_epi32(vAHi, 2);
+ aHi = _mm_extract_epi32(vAHi, 2);
countHi = _mm_extract_epi32(vCountHi, 2);
aHi >>= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 2);
- aLow = _mm_extract_epi32(vALow, 2);
+ aLow = _mm_extract_epi32(vALow, 2);
countLow = _mm_extract_epi32(vCountLow, 2);
aLow >>= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 2);
- aHi = _mm_extract_epi32(vAHi, 3);
+ aHi = _mm_extract_epi32(vAHi, 3);
countHi = _mm_extract_epi32(vCountHi, 3);
aHi >>= countHi;
vAHi = _mm_insert_epi32(vAHi, aHi, 3);
- aLow = _mm_extract_epi32(vALow, 3);
+ aLow = _mm_extract_epi32(vALow, 3);
countLow = _mm_extract_epi32(vCountLow, 3);
aLow >>= countLow;
vALow = _mm_insert_epi32(vALow, aLow, 3);
__m256i ret = _mm256_set1_epi32(0);
- ret = _mm256_insertf128_si256(ret, vAHi, 1);
- ret = _mm256_insertf128_si256(ret, vALow, 0);
+ ret = _mm256_insertf128_si256(ret, vAHi, 1);
+ ret = _mm256_insertf128_si256(ret, vALow, 0);
return ret;
}
-
-
//-----------------------------------------------------------------------
// Conversion operations
//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double const &a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double const& a) // return *(Float*)(&a)
{
return _mm256_castpd_ps(a);
}
-static SIMDINLINE Integer SIMDCALL castps_si(Float const &a) // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float const& a) // return *(Integer*)(&a)
{
return _mm256_castps_si256(a);
}
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer const &a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer const& a) // return *(Double*)(&a)
{
return _mm256_castsi256_pd(a);
}
-static SIMDINLINE Double SIMDCALL castps_pd(Float const &a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float const& a) // return *(Double*)(&a)
{
return _mm256_castps_pd(a);
}
-static SIMDINLINE Integer SIMDCALL castpd_si(Double const &a) // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castpd_si(Double const& a) // return *(Integer*)(&a)
{
return _mm256_castpd_si256(a);
}
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer const &a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer const& a) // return *(Float*)(&a)
{
return _mm256_castsi256_ps(a);
}
-static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer const &a) // return (float)a (int32 --> float)
+static SIMDINLINE Float SIMDCALL
+ cvtepi32_ps(Integer const& a) // return (float)a (int32 --> float)
{
return _mm256_cvtepi32_ps(a);
}
-SIMD_EMU_IWRAPPER_1L(cvtepu8_epi16, 8); // return (int16)a (uint8 --> int16)
-SIMD_EMU_IWRAPPER_1L(cvtepu8_epi32, 4); // return (int32)a (uint8 --> int32)
-SIMD_EMU_IWRAPPER_1L(cvtepu16_epi32, 8); // return (int32)a (uint16 --> int32)
-SIMD_EMU_IWRAPPER_1L(cvtepu16_epi64, 4); // return (int64)a (uint16 --> int64)
-SIMD_EMU_IWRAPPER_1L(cvtepu32_epi64, 8); // return (int64)a (uint32 --> int64)
+SIMD_EMU_IWRAPPER_1L(cvtepu8_epi16, 8); // return (int16)a (uint8 --> int16)
+SIMD_EMU_IWRAPPER_1L(cvtepu8_epi32, 4); // return (int32)a (uint8 --> int32)
+SIMD_EMU_IWRAPPER_1L(cvtepu16_epi32, 8); // return (int32)a (uint16 --> int32)
+SIMD_EMU_IWRAPPER_1L(cvtepu16_epi64, 4); // return (int64)a (uint16 --> int64)
+SIMD_EMU_IWRAPPER_1L(cvtepu32_epi64, 8); // return (int64)a (uint32 --> int64)
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float const &a) // return (int32)a (float --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvtps_epi32(Float const& a) // return (int32)a (float --> int32)
{
return _mm256_cvtps_epi32(a);
}
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a) // return (int32)a (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvttps_epi32(Float const& a) // return (int32)a (rnd_to_zero(float) --> int32)
{
return _mm256_cvttps_epi32(a);
}
//-----------------------------------------------------------------------
// Comparison operations
//-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float const &a, Float const &b) // return a (CmpTypeT) b
+template <CompareType CmpTypeT>
+static SIMDINLINE Float SIMDCALL cmp_ps(Float const& a, Float const& b) // return a (CmpTypeT) b
{
return _mm256_cmp_ps(a, b, static_cast<const int>(CmpTypeT));
}
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::LE_OQ>(a, b);
+}
-SIMD_EMU_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-SIMD_EMU_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+SIMD_EMU_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
-static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL
+ testz_ps(Float const& a, Float const& b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
{
- return 0 != _mm256_testz_ps(a, b);
+ return 0 != _mm256_testz_ps(a, b);
}
-static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL
+ testz_si(Integer const& a, Integer const& b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
{
- return 0 != _mm256_testz_si256(a, b);
+ return 0 != _mm256_testz_si256(a, b);
}
//-----------------------------------------------------------------------
// Blend / shuffle / permute operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
-SIMD_IFWRAPPER_2I(blend_epi32, _mm256_blend_ps); // return ImmT ? b : a (int32)
-SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
+SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
+SIMD_IFWRAPPER_2I(blend_epi32, _mm256_blend_ps); // return ImmT ? b : a (int32)
+SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Float const &mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+ Integer const& b,
+ Float const& mask) // return mask ? b : a (int)
{
return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
}
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Integer const &mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+ Integer const& b,
+ Integer const& mask) // return mask ? b : a (int)
{
return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
}
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p) // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+ broadcast_ss(float const* p) // return *p (all elements in vector get same value)
{
return _mm256_broadcast_ss(p);
}
-SIMD_EMU_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_EMU_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_EMU_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_EMU_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_EMU_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_EMU_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_EMU_IWRAPPER_2(
+ packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_EMU_IWRAPPER_2(
+ packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
{
return _mm256_permute_ps(a, ImmT);
}
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
+static SIMDINLINE Integer SIMDCALL permute_epi32(
+ Integer const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
{
Integer result;
// Ugly slow implementation
- uint32_t const *pA = reinterpret_cast<uint32_t const*>(&a);
- uint32_t const *pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
- uint32_t *pResult = reinterpret_cast<uint32_t *>(&result);
+ uint32_t const* pA = reinterpret_cast<uint32_t const*>(&a);
+ uint32_t const* pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
+ uint32_t* pResult = reinterpret_cast<uint32_t*>(&result);
for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
{
return result;
}
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+ permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
Float result;
// Ugly slow implementation
- float const *pA = reinterpret_cast<float const*>(&a);
- uint32_t const *pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
- float *pResult = reinterpret_cast<float *>(&result);
+ float const* pA = reinterpret_cast<float const*>(&a);
+ uint32_t const* pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
+ float* pResult = reinterpret_cast<float*>(&result);
for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
{
SIMD_DWRAPPER_2I(permute2f128_pd);
SIMD_IWRAPPER_2I_(permute2f128_si, permute2f128_si256);
-
SIMD_EMU_IWRAPPER_1I(shuffle_epi32);
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const &a, Integer const &b)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const& a, Integer const& b)
{
return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
}
//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
- uint32_t *pOffsets = (uint32_t*)&idx;
- Float vResult;
- float* pResult = (float*)&vResult;
+ uint32_t* pOffsets = (uint32_t*)&idx;
+ Float vResult;
+ float* pResult = (float*)&vResult;
for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
{
uint32_t offset = pOffsets[i];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[i] = *(float const*)(((uint8_t const*)p + offset));
+ offset = offset * static_cast<uint32_t>(ScaleT);
+ pResult[i] = *(float const*)(((uint8_t const*)p + offset));
}
return vResult;
}
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+ load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
{
return broadcast_ss(p);
}
-static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+ load_ps(float const* p) // return *p (loads SIMD width elements from memory)
{
return _mm256_load_ps(p);
}
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
{
return _mm256_load_si256(&p->v);
}
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+ loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
{
return _mm256_loadu_ps(p);
}
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+ loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
{
return _mm256_lddqu_si256(&p->v);
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask)
-{
- uint32_t *pOffsets = (uint32_t*)&idx;
- Float vResult = old;
- float* pResult = (float*)&vResult;
- DWORD index;
- uint32_t umask = movemask_ps(mask);
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
+{
+ uint32_t* pOffsets = (uint32_t*)&idx;
+ Float vResult = old;
+ float* pResult = (float*)&vResult;
+ DWORD index;
+ uint32_t umask = movemask_ps(mask);
while (_BitScanForward(&index, umask))
{
umask &= ~(1 << index);
uint32_t offset = pOffsets[index];
- offset = offset * static_cast<uint32_t>(ScaleT);
- pResult[index] = *(float const *)(((uint8_t const *)p + offset));
+ offset = offset * static_cast<uint32_t>(ScaleT);
+ pResult[index] = *(float const*)(((uint8_t const*)p + offset));
}
return vResult;
}
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer const &mask, Float const &src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer const& mask, Float const& src)
{
_mm256_maskstore_ps(p, mask, src);
}
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const& a)
{
- return SIMD128T::movemask_epi8(a.v4[0]) |
- (SIMD128T::movemask_epi8(a.v4[1]) << 16);
+ return SIMD128T::movemask_epi8(a.v4[0]) | (SIMD128T::movemask_epi8(a.v4[1]) << 16);
}
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const& a)
{
return static_cast<uint32_t>(_mm256_movemask_pd(a));
}
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const& a)
{
return static_cast<uint32_t>(_mm256_movemask_ps(a));
}
return _mm256_set1_epi8(i);
}
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
{
return _mm256_set1_ps(f);
}
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
{
return _mm256_setzero_ps();
}
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
{
return _mm256_setzero_si256();
}
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float const &a) // *p = a (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+ store_ps(float* p, Float const& a) // *p = a (stores all elements contiguously in memory)
{
_mm256_store_ps(p, a);
}
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer const &a) // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer const& a) // *p = a
{
_mm256_store_si256(&p->v, a);
}
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float const &a) // *p = a (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+ stream_ps(float* p, Float const& a) // *p = a (same as store_ps, but doesn't keep memory in cache)
{
_mm256_stream_ps(p, a);
}
// Legacy interface (available only in SIMD256 width)
//=======================================================================
-static SIMDINLINE Float SIMDCALL broadcast_ps(SIMD128Impl::Float const *p)
+static SIMDINLINE Float SIMDCALL broadcast_ps(SIMD128Impl::Float const* p)
{
return _mm256_broadcast_ps(&p->v);
}
-template<int ImmT>
-static SIMDINLINE SIMD128Impl::Double SIMDCALL extractf128_pd(Double const &a)
+template <int ImmT>
+static SIMDINLINE SIMD128Impl::Double SIMDCALL extractf128_pd(Double const& a)
{
return _mm256_extractf128_pd(a, ImmT);
}
-template<int ImmT>
-static SIMDINLINE SIMD128Impl::Float SIMDCALL extractf128_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE SIMD128Impl::Float SIMDCALL extractf128_ps(Float const& a)
{
return _mm256_extractf128_ps(a, ImmT);
}
-template<int ImmT>
-static SIMDINLINE SIMD128Impl::Integer SIMDCALL extractf128_si(Integer const &a)
+template <int ImmT>
+static SIMDINLINE SIMD128Impl::Integer SIMDCALL extractf128_si(Integer const& a)
{
return _mm256_extractf128_si256(a, ImmT);
}
-template<int ImmT>
-static SIMDINLINE Double SIMDCALL insertf128_pd(Double const &a, SIMD128Impl::Double const &b)
+template <int ImmT>
+static SIMDINLINE Double SIMDCALL insertf128_pd(Double const& a, SIMD128Impl::Double const& b)
{
return _mm256_insertf128_pd(a, b, ImmT);
}
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL insertf128_ps(Float const &a, SIMD128Impl::Float const &b)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL insertf128_ps(Float const& a, SIMD128Impl::Float const& b)
{
return _mm256_insertf128_ps(a, b, ImmT);
}
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL insertf128_si(Integer const &a, SIMD128Impl::Integer const &b)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL insertf128_si(Integer const& a, SIMD128Impl::Integer const& b)
{
return _mm256_insertf128_si256(a, b, ImmT);
}
_mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr))
#endif
-static SIMDINLINE Integer SIMDCALL loadu2_si(SIMD128Impl::Integer const* phi, SIMD128Impl::Integer const* plo)
+static SIMDINLINE Integer SIMDCALL loadu2_si(SIMD128Impl::Integer const* phi,
+ SIMD128Impl::Integer const* plo)
{
return _mm256_loadu2_m128i(&phi->v, &plo->v);
}
-static SIMDINLINE Integer SIMDCALL set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL
+ set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
{
return _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0);
}
-static SIMDINLINE Float SIMDCALL set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL
+ set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
{
return _mm256_set_ps(i7, i6, i5, i4, i3, i2, i1, i0);
}
-static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer *phi, SIMD128Impl::Integer *plo, Integer const &src)
+static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer* phi,
+ SIMD128Impl::Integer* plo,
+ Integer const& src)
{
_mm256_storeu2_m128i(&phi->v, &plo->v, src);
}
static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
{
- Integer vec = set1_epi32(mask);
- const Integer bit = set_epi32(
- 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
- vec = and_si(vec, bit);
- vec = cmplt_epi32(setzero_si(), vec);
+ Integer vec = set1_epi32(mask);
+ const Integer bit = set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
+ vec = and_si(vec, bit);
+ vec = cmplt_epi32(setzero_si(), vec);
return castsi_ps(vec);
}
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX2_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// Mostly these are integer operations that are no longer emulated with SSE
//============================================================================
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a) \
- {\
- return _mm256_##op(a);\
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) { return _mm256_##op(a); }
+
+#define SIMD_IWRAPPER_1L(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+ { \
+ return _mm256_##op(_mm256_castsi256_si128(a)); \
}
-#define SIMD_IWRAPPER_1L(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a) \
- {\
- return _mm256_##op(_mm256_castsi256_si128(a));\
- }\
-
-#define SIMD_IWRAPPER_1I(op) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a) \
- {\
- return _mm256_##op(a, ImmT);\
+#define SIMD_IWRAPPER_1I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+ { \
+ return _mm256_##op(a, ImmT); \
}
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a) \
- {\
- return _mm256_##intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+ { \
+ return _mm256_##intrin(a, ImmT); \
}
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return _mm256_##intrin(a, b);\
+#define SIMD_IWRAPPER_2_(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return _mm256_##intrin(a, b); \
}
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return _mm256_##op(a, b);\
+#define SIMD_IWRAPPER_2(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return _mm256_##op(a, b); \
}
-#define SIMD_IWRAPPER_2I(op) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return _mm256_##op(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return _mm256_##op(a, b, ImmT); \
}
-#define SIMD_IWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return _mm256_##op(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return _mm256_##op(a, b, ImmT); \
}
//-----------------------------------------------------------------------
// Floating point arithmetic operations
//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float const &a, Float const &b, Float const &c) // return (a * b) + c
+static SIMDINLINE Float SIMDCALL fmadd_ps(Float const& a,
+ Float const& b,
+ Float const& c) // return (a * b) + c
{
return _mm256_fmadd_ps(a, b, c);
}
SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
//-----------------------------------------------------------------------
// Logical operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_si256); // return a & b (int)
-SIMD_IWRAPPER_2_(andnot_si, andnot_si256); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_si256); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_si256); // return a ^ b (int)
-
+SIMD_IWRAPPER_2_(and_si, and_si256); // return a & b (int)
+SIMD_IWRAPPER_2_(andnot_si, andnot_si256); // return (~a) & b (int)
+SIMD_IWRAPPER_2_(or_si, or_si256); // return a | b (int)
+SIMD_IWRAPPER_2_(xor_si, xor_si256); // return a ^ b (int)
//-----------------------------------------------------------------------
// Shift operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_2(sllv_epi32); // return a << b (uint32)
-SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
-SIMD_IWRAPPER_2(srlv_epi32); // return a >> b (uint32)
-SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
+SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_2(sllv_epi32); // return a << b (uint32)
+SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
+SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
+SIMD_IWRAPPER_2(srlv_epi32); // return a >> b (uint32)
+SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
-template<int ImmT> // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a)
+template <int ImmT> // same as srli_si, but with Float cast to int
+static SIMDINLINE Float SIMDCALL srlisi_ps(Float const& a)
{
return castsi_ps(srli_si<ImmT>(castps_si(a)));
}
-
//-----------------------------------------------------------------------
// Conversion operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1L(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-SIMD_IWRAPPER_1L(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
-SIMD_IWRAPPER_1L(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
-SIMD_IWRAPPER_1L(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
-SIMD_IWRAPPER_1L(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
+SIMD_IWRAPPER_1L(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
+SIMD_IWRAPPER_1L(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
+SIMD_IWRAPPER_1L(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
+SIMD_IWRAPPER_1L(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
+SIMD_IWRAPPER_1L(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
//-----------------------------------------------------------------------
// Comparison operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-
-static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer const &a, Integer const &b) // return a < b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
+SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
+SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+
+static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer const& a,
+ Integer const& b) // return a < b (int32)
{
return cmpgt_epi32(b, a);
}
//-----------------------------------------------------------------------
// Blend / shuffle / permute operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a (int32)
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a (int32)
+SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
{
return _mm256_permute_ps(a, ImmT);
}
SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+ permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
return _mm256_permutevar8x32_ps(a, swiz);
}
SIMD_IWRAPPER_1I(shuffle_epi32);
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const &a, Integer const &b)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const& a, Integer const& b)
{
return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
}
//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
return _mm256_i32gather_ps(p, idx, static_cast<int>(ScaleT));
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
{
- // g++ in debug mode needs the explicit .v suffix instead of relying on operator __m256()
- // Only for this intrinsic - not sure why. :(
+ // g++ in debug mode needs the explicit .v suffix instead of relying on operator __m256()
+ // Only for this intrinsic - not sure why. :(
return _mm256_mask_i32gather_ps(old.v, p, idx.v, mask.v, static_cast<int>(ScaleT));
}
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const& a)
{
return static_cast<uint32_t>(_mm256_movemask_epi8(a));
}
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
//============================================================================
private:
- static SIMDINLINE __m512 __conv(Float r) { return _mm512_castps256_ps512(r.v); }
- static SIMDINLINE __m512d __conv(Double r) { return _mm512_castpd256_pd512(r.v); }
- static SIMDINLINE __m512i __conv(Integer r) { return _mm512_castsi256_si512(r.v); }
- static SIMDINLINE Float __conv(__m512 r) { return _mm512_castps512_ps256(r); }
- static SIMDINLINE Double __conv(__m512d r) { return _mm512_castpd512_pd256(r); }
- static SIMDINLINE Integer __conv(__m512i r) { return _mm512_castsi512_si256(r); }
-public:
+static SIMDINLINE __m512 __conv(Float r)
+{
+ return _mm512_castps256_ps512(r.v);
+}
+static SIMDINLINE __m512d __conv(Double r)
+{
+ return _mm512_castpd256_pd512(r.v);
+}
+static SIMDINLINE __m512i __conv(Integer r)
+{
+ return _mm512_castsi256_si512(r.v);
+}
+static SIMDINLINE Float __conv(__m512 r)
+{
+ return _mm512_castps512_ps256(r);
+}
+static SIMDINLINE Double __conv(__m512d r)
+{
+ return _mm512_castpd512_pd256(r);
+}
+static SIMDINLINE Integer __conv(__m512i r)
+{
+ return _mm512_castsi512_si256(r);
+}
-#define SIMD_WRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+public:
+#define SIMD_WRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xff))
-#define SIMD_WRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_WRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xff))
-#define SIMD_WRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_WRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xff))
-#define SIMD_WRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT));\
+#define SIMD_WRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT)); \
}
-#define SIMD_WRAPPER_3_(op, intrin, mask) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\
+#define SIMD_WRAPPER_3_(op, intrin, mask) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
}
-#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xff))
-#define SIMD_DWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_DWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
}
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xff))
+#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xff))
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xff))
+#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xff))
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xff))
+#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xff))
-#define SIMD_IWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT));\
+#define SIMD_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT)); \
}
//-----------------------------------------------------------------------
// Single precision floating point arithmetic operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
+SIMD_WRAPPER_2(add_ps); // return a + b
+SIMD_WRAPPER_2(div_ps); // return a / b
+SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
+SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps); // return a * b
SIMD_WRAPPER_1_(rcp_ps, rcp14_ps, __mmask16(0xff)); // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xff)); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
+SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xff)); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps); // return a - b
//-----------------------------------------------------------------------
// Integer (various width) arithmetic operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
+SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
+SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
+SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
// SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
// return (a * b) & 0xFFFFFFFF
//
// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
// and store the low 32 bits of the intermediate integers in dst.
SIMD_IWRAPPER_2_32(mullo_epi32);
-SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
+SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
// SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
// SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
//-----------------------------------------------------------------------
// Logical operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xff)); // return a & b (int)
+SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xff)); // return a & b (int)
SIMD_IWRAPPER_2_(andnot_si, andnot_epi32, __mmask16(0xff)); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xff)); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xff)); // return a ^ b (int)
-
+SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xff)); // return a | b (int)
+SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xff)); // return a ^ b (int)
//-----------------------------------------------------------------------
// Shift operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
-SIMD_IWRAPPER_2_32(sllv_epi32); // return a << b (uint32)
-SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT (uint32)
-SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32)
+SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_2_32(sllv_epi32); // return a << b (uint32)
+SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT (int32)
+SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT (uint32)
+SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32)
// use AVX2 version
-//SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
+// SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
//-----------------------------------------------------------------------
// Conversion operations (Use AVX2 versions)
//-----------------------------------------------------------------------
// Comparison operations (Use AVX2 versions
//-----------------------------------------------------------------------
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi8); // return a == b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi16); // return a == b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi32); // return a == b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi64); // return a == b (int64)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi8,); // return a > b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi16); // return a > b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi32); // return a > b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi64); // return a > b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi8); // return a == b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi16); // return a == b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi32); // return a == b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi64); // return a == b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi8,); // return a > b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi16); // return a > b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi32); // return a > b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi64); // return a > b (int64)
//
-//static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b) // return a < b (int32)
+// static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b) // return a < b (int32)
//{
// return cmpgt_epi32(b, a);
//}
//-----------------------------------------------------------------------
// Blend / shuffle / permute operations
//-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-// SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-// SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16
+// and _mm512_packs_epi16 SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation
+// for _mm256_packs_epi32 and _mm512_packs_epi32 SIMD_IWRAPPER_2_8(packus_epi16); // uint16 -->
+// uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for
+// _mm256_packus_epi32 and _mm512_packus_epi32
// SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
-//static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+// static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for
+// each 32-bit lane i (float)
//{
// return _mm256_permutevar8x32_ps(a, swiz);
//}
SIMD_IWRAPPER_1I_32(shuffle_epi32);
-//template<int ImmT>
-//static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
+// template<int ImmT>
+// static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
//{
// return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
//}
-//SIMD_IWRAPPER_2(shuffle_epi8);
+// SIMD_IWRAPPER_2(shuffle_epi8);
SIMD_IWRAPPER_2_32(unpackhi_epi32);
SIMD_IWRAPPER_2_32(unpacklo_epi32);
//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+ load_ps(float const* p) // return *p (loads SIMD width elements from memory)
{
return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p));
}
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
{
return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p));
}
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+ loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
{
return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p));
}
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+ loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
{
return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p));
}
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
return __conv(_mm512_mask_i32gather_ps(
- _mm512_setzero_ps(),
- __mmask16(0xff),
- __conv(idx),
- p,
- static_cast<int>(ScaleT)));
+ _mm512_setzero_ps(), __mmask16(0xff), __conv(idx), p, static_cast<int>(ScaleT)));
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
{
__mmask16 m = 0xff;
- m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)),
- _mm512_set1_epi32(0x80000000));
- return __conv(_mm512_mask_i32gather_ps(
- __conv(old),
- m,
- __conv(idx),
- p,
- static_cast<int>(ScaleT)));
+ m = _mm512_mask_test_epi32_mask(
+ m, _mm512_castps_si512(__conv(mask)), _mm512_set1_epi32(0x80000000));
+ return __conv(
+ _mm512_mask_i32gather_ps(__conv(old), m, __conv(idx), p, static_cast<int>(ScaleT)));
}
// static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
// _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
// }
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
{
__mmask16 m = 0xff;
- m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
+ m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
_mm512_mask_storeu_ps(p, m, __conv(src));
}
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+ store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
{
_mm512_mask_storeu_ps(p, __mmask16(0xff), __conv(a));
}
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
{
_mm512_mask_storeu_epi32(p, __mmask16(0xff), __conv(a));
}
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// register set.
//============================================================================
-#define SIMD_DWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_DWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf))
+#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf))
-#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Double SIMDCALL op(Double a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_DWRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf))
+#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf))
-#define SIMD_DWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_DWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf))
+#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf))
-#define SIMD_IWRAPPER_1_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
}
-#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf))
+#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf))
-#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1I_(op, intrin, mask) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
}
-#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf))
+#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf))
-#define SIMD_IWRAPPER_2_(op, intrin, mask) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_2_(op, intrin, mask) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
}
-#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf))
+#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf))
-
-SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8)
+SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64)
+SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and
+ // _mm512_packs_epi16
+SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and
+ // _mm512_packs_epi32
+SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and
+ // _mm512_packus_epi16
+SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and
+ // _mm512_packus_epi32
SIMD_IWRAPPER_2_16(unpackhi_epi16);
SIMD_IWRAPPER_2_64(unpackhi_epi64);
SIMD_IWRAPPER_2_8(unpackhi_epi8);
static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
{
__mmask64 m = 0xffffffffull;
- return static_cast<uint32_t>(
- _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
+ return static_cast<uint32_t>(_mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
}
#undef SIMD_DWRAPPER_1_
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// These use native AVX512 instructions with masking to enable a larger
// register set.
//============================================================================
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
-#if defined(__GNUC__) && !defined( __clang__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
// gcc as of 7.1 was missing these intrinsics
#ifndef _mm512_cmpneq_ps_mask
-#define _mm512_cmpneq_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_NEQ_UQ)
+#define _mm512_cmpneq_ps_mask(a, b) _mm512_cmp_ps_mask((a), (b), _CMP_NEQ_UQ)
#endif
#ifndef _mm512_cmplt_ps_mask
-#define _mm512_cmplt_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_LT_OS)
+#define _mm512_cmplt_ps_mask(a, b) _mm512_cmp_ps_mask((a), (b), _CMP_LT_OS)
#endif
#ifndef _mm512_cmplt_pd_mask
-#define _mm512_cmplt_pd_mask(a,b) _mm512_cmp_pd_mask((a),(b),_CMP_LT_OS)
+#define _mm512_cmplt_pd_mask(a, b) _mm512_cmp_pd_mask((a), (b), _CMP_LT_OS)
#endif
#endif
//============================================================================
static const int TARGET_SIMD_WIDTH = 16;
-using SIMD256T = SIMD256Impl::AVX2Impl;
+using SIMD256T = SIMD256Impl::AVX2Impl;
-#define SIMD_WRAPPER_1_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return intrin(a);\
- }
+#define SIMD_WRAPPER_1_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
-#define SIMD_WRAPPER_1(op) \
- SIMD_WRAPPER_1_(op, _mm512_##op)
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
-#define SIMD_WRAPPER_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_##intrin(a, b);\
- }
+#define SIMD_WRAPPER_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
-#define SIMD_WRAPPERI_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_castsi512_ps(_mm512_##intrin(\
- _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+#define SIMD_WRAPPERI_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return _mm512_castsi512_ps( \
+ _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
}
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm512_##op(a, b);\
- }
+#define SIMD_DWRAPPER_2(op) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
-#define SIMD_WRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_WRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
-#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
-#define SIMD_DWRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
-#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return _mm512_##op(a, b, c);\
- }
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return _mm512_##op(a);\
- }
-#define SIMD_IWRAPPER_1_8(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \
- {\
- return _mm512_##op(a);\
- }
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
+#define SIMD_IWRAPPER_1_8(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1_4(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \
- {\
- return _mm512_##op(a);\
- }
+#define SIMD_IWRAPPER_1_4(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return intrin(a, ImmT); \
}
#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm512_##intrin(a, b);\
- }
-#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
+#define SIMD_IWRAPPER_2_(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
-#define SIMD_IWRAPPER_2_CMP(op, cmp) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return cmp(a, b);\
- }
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
}
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
private:
- static SIMDINLINE Integer vmask(__mmask16 m)
- {
- return _mm512_maskz_set1_epi32(m, -1);
- }
+static SIMDINLINE Integer vmask(__mmask16 m)
+{
+ return _mm512_maskz_set1_epi32(m, -1);
+}
- static SIMDINLINE Integer vmask(__mmask8 m)
- {
- return _mm512_maskz_set1_epi64(m, -1LL);
- }
+static SIMDINLINE Integer vmask(__mmask8 m)
+{
+ return _mm512_maskz_set1_epi64(m, -1LL);
+}
public:
//-----------------------------------------------------------------------
// Single precision floating point arithmetic operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1_(rcp_ps, _mm512_rcp14_ps); // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, _mm512_rsqrt14_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
+SIMD_WRAPPER_2(add_ps); // return a + b
+SIMD_WRAPPER_2(div_ps); // return a / b
+SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
+SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps); // return a * b
+SIMD_WRAPPER_1_(rcp_ps, _mm512_rcp14_ps); // return 1.0f / a
+SIMD_WRAPPER_1_(rsqrt_ps, _mm512_rsqrt14_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps); // return a - b
template <RoundMode RMT>
static SIMDINLINE Float SIMDCALL round_ps(Float a)
return _mm512_roundscale_ps(a, static_cast<int>(RMT));
}
-static SIMDINLINE Float SIMDCALL ceil_ps(Float a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float a)
+{
+ return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float a)
+{
+ return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
//-----------------------------------------------------------------------
// Integer (various width) arithmetic operations
//-----------------------------------------------------------------------
SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
-//SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-//SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+// SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
+// SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
SIMD_IWRAPPER_2(min_epu32); // return (a < b) ? a : b (uint32)
SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
- // return (a * b) & 0xFFFFFFFF
- //
- // Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
- // and store the low 32 bits of the intermediate integers in dst.
+// return (a * b) & 0xFFFFFFFF
+//
+// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
+// and store the low 32 bits of the intermediate integers in dst.
SIMD_IWRAPPER_2(mullo_epi32);
SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-//SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+// SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
//-----------------------------------------------------------------------
// Logical operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_si512); // return a & b (int)
-SIMD_IWRAPPER_2_(andnot_si, andnot_si512); // return (~a) & b (int)
-SIMD_IWRAPPER_2_(or_si, or_si512); // return a | b (int)
-SIMD_IWRAPPER_2_(xor_si, xor_si512); // return a ^ b (int)
+SIMD_IWRAPPER_2_(and_si, and_si512); // return a & b (int)
+SIMD_IWRAPPER_2_(andnot_si, andnot_si512); // return (~a) & b (int)
+SIMD_IWRAPPER_2_(or_si, or_si512); // return a | b (int)
+SIMD_IWRAPPER_2_(xor_si, xor_si512); // return a ^ b (int)
// SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
// SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
// SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
// SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-
//-----------------------------------------------------------------------
// Shift operations
//-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
SIMD_IWRAPPER_2(sllv_epi32);
-SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
-SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
+SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32)
+SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32)
#if 0
SIMD_IWRAPPER_1I_(srli_si, srli_si512); // return a >> (ImmT*8) (uint)
//-----------------------------------------------------------------------
// Conversion operations
//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
{
return _mm512_castpd_ps(a);
}
-static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
{
return _mm512_castps_si512(a);
}
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
{
return _mm512_castsi512_pd(a);
}
-static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
{
return _mm512_castps_pd(a);
}
-static SIMDINLINE Integer SIMDCALL castpd_si(Double a) // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castpd_si(Double a) // return *(Integer*)(&a)
{
return _mm512_castpd_si512(a);
}
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
{
return _mm512_castsi512_ps(a);
}
return _mm512_cvtepi32_ps(a);
}
-//SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-SIMD_IWRAPPER_1_4(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
-SIMD_IWRAPPER_1_8(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
-SIMD_IWRAPPER_1_4(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
-SIMD_IWRAPPER_1_8(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
+// SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
+SIMD_IWRAPPER_1_4(cvtepu8_epi32); // return (int32)a (uint8 --> int32)
+SIMD_IWRAPPER_1_8(cvtepu16_epi32); // return (int32)a (uint16 --> int32)
+SIMD_IWRAPPER_1_4(cvtepu16_epi64); // return (int64)a (uint16 --> int64)
+SIMD_IWRAPPER_1_8(cvtepu32_epi64); // return (int64)a (uint32 --> int64)
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32)
+static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a (float --> int32)
{
return _mm512_cvtps_epi32(a);
}
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvttps_epi32(Float a) // return (int32)a (rnd_to_zero(float) --> int32)
{
return _mm512_cvttps_epi32(a);
}
//-----------------------------------------------------------------------
// Comparison operations
//-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
+template <CompareType CmpTypeT>
static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float a, Float b)
{
return _mm512_cmp_ps_mask(a, b, static_cast<const int>(CmpTypeT));
}
-template<CompareType CmpTypeT>
+template <CompareType CmpTypeT>
static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b
{
// Legacy vector mask generator
return castsi_ps(vmask(result));
}
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b)
+{
+ return cmp_ps<CompareType::LE_OQ>(a, b);
+}
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
static SIMDINLINE Integer SIMDCALL cmp_epi32(Integer a, Integer b)
{
// Legacy vector mask generator
__mmask16 result = _mm512_cmp_epi32_mask(a, b, static_cast<const int>(CmpTypeT));
return vmask(result);
}
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
static SIMDINLINE Integer SIMDCALL cmp_epi64(Integer a, Integer b)
{
// Legacy vector mask generator
return vmask(result);
}
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32<CompareTypeInt::EQ>); // return a == b (int32)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64<CompareTypeInt::EQ>); // return a == b (int64)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi32, cmp_epi32<CompareTypeInt::GT>); // return a > b (int32)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi64, cmp_epi64<CompareTypeInt::GT>); // return a > b (int64)
-SIMD_IWRAPPER_2_CMP(cmplt_epi32, cmp_epi32<CompareTypeInt::LT>); // return a < b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32<CompareTypeInt::EQ>); // return a == b (int32)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64<CompareTypeInt::EQ>); // return a == b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi32, cmp_epi32<CompareTypeInt::GT>); // return a > b (int32)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi64, cmp_epi64<CompareTypeInt::GT>); // return a > b (int64)
+SIMD_IWRAPPER_2_CMP(cmplt_epi32, cmp_epi32<CompareTypeInt::LT>); // return a < b (int32)
-static SIMDINLINE bool SIMDCALL testz_ps(Float a, Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL testz_ps(Float a,
+ Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
{
return (0 == static_cast<int>(_mm512_test_epi32_mask(castps_si(a), castps_si(b))));
}
-static SIMDINLINE bool SIMDCALL testz_si(Integer a, Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL testz_si(Integer a,
+ Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
{
return (0 == static_cast<int>(_mm512_test_epi32_mask(a, b)));
}
return _mm512_mask_blend_ps(__mmask16(movemask_ps(mask)), a, b);
}
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Float mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+ Integer b,
+ Float mask) // return mask ? b : a (int)
{
return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
}
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Integer mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+ Integer b,
+ Integer mask) // return mask ? b : a (int)
{
return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
}
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p) // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+ broadcast_ss(float const* p) // return *p (all elements in vector get same value)
{
return _mm512_set1_ps(*p);
}
-template<int imm>
+template <int imm>
static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float a)
{
return _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a), imm));
}
-template<int imm>
+template <int imm>
static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double a)
{
return _mm512_extractf64x4_pd(a, imm);
}
-template<int imm>
+template <int imm>
static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer a)
{
return _mm512_extracti64x4_epi64(a, imm);
}
-template<int imm>
+template <int imm>
static SIMDINLINE Float SIMDCALL insert_ps(Float a, SIMD256Impl::Float b)
{
return _mm512_castpd_ps(_mm512_insertf64x4(_mm512_castps_pd(a), _mm256_castps_pd(b), imm));
}
-template<int imm>
+template <int imm>
static SIMDINLINE Double SIMDCALL insert_pd(Double a, SIMD256Impl::Double b)
{
return _mm512_insertf64x4(a, b, imm);
}
-template<int imm>
+template <int imm>
static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b)
{
return _mm512_inserti64x4(a, b, imm);
}
-// SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16
-// SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32
-// SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32
+// SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and
+// _mm512_packs_epi16 SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32
+// and _mm512_packs_epi32 SIMD_IWRAPPER_2(packus_epi16); // See documentation for
+// _mm512_packus_epi16 and _mm512_packus_epi16 SIMD_IWRAPPER_2(packus_epi32); // See documentation
+// for _mm512_packus_epi32 and _mm512_packus_epi32
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
{
return _mm512_permute_ps(a, ImmT);
}
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Integer SIMDCALL
+ permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
return _mm512_permutexvar_epi32(swiz, a);
}
-static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+ permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
return _mm512_permutexvar_ps(swiz, a);
}
SIMD_IWRAPPER_1I(shuffle_epi32);
-//SIMD_IWRAPPER_2(shuffle_epi8);
+// SIMD_IWRAPPER_2(shuffle_epi8);
SIMD_DWRAPPER_2I(shuffle_pd);
SIMD_WRAPPER_2I(shuffle_ps);
-template<int ImmT>
+template <int ImmT>
static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
{
return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
SIMD_IWRAPPER_2(unpackhi_epi16);
-//SIMD_IFWRAPPER_2(unpackhi_epi32, _mm512_unpackhi_ps);
+// SIMD_IFWRAPPER_2(unpackhi_epi32, _mm512_unpackhi_ps);
static SIMDINLINE Integer SIMDCALL unpackhi_epi32(Integer a, Integer b)
{
return castps_si(_mm512_unpackhi_ps(castsi_ps(a), castsi_ps(b)));
}
SIMD_IWRAPPER_2(unpackhi_epi64);
-//SIMD_IWRAPPER_2(unpackhi_epi8);
+// SIMD_IWRAPPER_2(unpackhi_epi8);
SIMD_DWRAPPER_2(unpackhi_pd);
SIMD_WRAPPER_2(unpackhi_ps);
-//SIMD_IWRAPPER_2(unpacklo_epi16);
+// SIMD_IWRAPPER_2(unpacklo_epi16);
SIMD_IFWRAPPER_2(unpacklo_epi32, unpacklo_ps);
SIMD_IWRAPPER_2(unpacklo_epi64);
-//SIMD_IWRAPPER_2(unpacklo_epi8);
+// SIMD_IWRAPPER_2(unpacklo_epi8);
SIMD_DWRAPPER_2(unpacklo_pd);
SIMD_WRAPPER_2(unpacklo_ps);
//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
return _mm512_i32gather_ps(idx, p, static_cast<int>(ScaleT));
}
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+ load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
{
return broadcast_ss(p);
}
-static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+ load_ps(float const* p) // return *p (loads SIMD width elements from memory)
{
return _mm512_load_ps(p);
}
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
{
return _mm512_load_si512(&p->v);
}
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+ loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
{
return _mm512_loadu_ps(p);
}
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+ loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
{
return _mm512_loadu_si512(p);
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
{
__mmask16 k = _mm512_cmpneq_ps_mask(mask, setzero_ps());
return _mm512_mask_i32gather_ps(old, k, idx, p, static_cast<int>(ScaleT));
}
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
{
Mask m = _mm512_cmplt_epi32_mask(mask, setzero_si());
_mm512_mask_store_ps(p, m, src);
}
-//static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
+// static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
//{
// __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
// return static_cast<uint64_t>(m);
return _mm512_set1_epi8(i);
}
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
{
return _mm512_set1_ps(f);
}
-static SIMDINLINE Double SIMDCALL setzero_pd() // return 0 (double)
+static SIMDINLINE Double SIMDCALL setzero_pd() // return 0 (double)
{
return _mm512_setzero_pd();
}
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
{
return _mm512_setzero_ps();
}
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
{
return _mm512_setzero_si512();
}
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a) // *p = a (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+ store_ps(float* p, Float a) // *p = a (stores all elements contiguously in memory)
{
_mm512_store_ps(p, a);
}
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
{
_mm512_store_si512(&p->v, a);
}
-static SIMDINLINE void SIMDCALL storeu_si(Integer *p, Integer a) // *p = a (same as store_si but allows for unaligned mem)
+static SIMDINLINE void SIMDCALL
+ storeu_si(Integer* p, Integer a) // *p = a (same as store_si but allows for unaligned mem)
{
_mm512_storeu_si512(&p->v, a);
}
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+ stream_ps(float* p, Float a) // *p = a (same as store_ps, but doesn't keep memory in cache)
{
_mm512_stream_ps(p, a);
}
-static SIMDINLINE Integer SIMDCALL set_epi32(
- int i15, int i14, int i13, int i12, int i11, int i10, int i9, int i8,
- int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL set_epi32(int i15,
+ int i14,
+ int i13,
+ int i12,
+ int i11,
+ int i10,
+ int i9,
+ int i8,
+ int i7,
+ int i6,
+ int i5,
+ int i4,
+ int i3,
+ int i2,
+ int i1,
+ int i0)
{
- return _mm512_set_epi32(
- i15, i14, i13, i12, i11, i10, i9, i8,
- i7, i6, i5, i4, i3, i2, i1, i0);
+ return _mm512_set_epi32(i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0);
}
-static SIMDINLINE Integer SIMDCALL set_epi32(
- int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL
+ set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
{
- return set_epi32(
- 0, 0, 0, 0, 0, 0, 0, 0,
- i7, i6, i5, i4, i3, i2, i1, i0);
+ return set_epi32(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
}
-static SIMDINLINE Float SIMDCALL set_ps(
- float i15, float i14, float i13, float i12, float i11, float i10, float i9, float i8,
- float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL set_ps(float i15,
+ float i14,
+ float i13,
+ float i12,
+ float i11,
+ float i10,
+ float i9,
+ float i8,
+ float i7,
+ float i6,
+ float i5,
+ float i4,
+ float i3,
+ float i2,
+ float i1,
+ float i0)
{
- return _mm512_set_ps(
- i15, i14, i13, i12, i11, i10, i9, i8,
- i7, i6, i5, i4, i3, i2, i1, i0);
+ return _mm512_set_ps(i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0);
}
-static SIMDINLINE Float SIMDCALL set_ps(
- float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL
+ set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
{
- return set_ps(
- 0, 0, 0, 0, 0, 0, 0, 0,
- i7, i6, i5, i4, i3, i2, i1, i0);
+ return set_ps(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
}
static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
#undef SIMD_IWRAPPER_2
#undef SIMD_IWRAPPER_2_
#undef SIMD_IWRAPPER_2I
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
//
//============================================================================
-#define SIMD_WRAPPER_1_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return intrin(a);\
- }
+#define SIMD_WRAPPER_1_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
-#define SIMD_WRAPPER_1(op) \
- SIMD_WRAPPER_1_(op, _mm512_##op)
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
-#define SIMD_WRAPPER_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_##intrin(a, b);\
- }
+#define SIMD_WRAPPER_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
-#define SIMD_WRAPPERI_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_castsi512_ps(_mm512_##intrin(\
- _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+#define SIMD_WRAPPERI_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return _mm512_castsi512_ps( \
+ _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
}
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm512_##op(a, b);\
- }
+#define SIMD_DWRAPPER_2(op) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
-#define SIMD_WRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_WRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
-#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
-#define SIMD_DWRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
-#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return _mm512_##op(a, b, c);\
- }
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return _mm512_##op(a);\
- }
-#define SIMD_IWRAPPER_1_8(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \
- {\
- return _mm512_##op(a);\
- }
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
+#define SIMD_IWRAPPER_1_8(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1_4(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \
- {\
- return _mm512_##op(a);\
- }
+#define SIMD_IWRAPPER_1_4(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return intrin(a, ImmT); \
}
#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm512_##intrin(a, b);\
- }
-#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
+#define SIMD_IWRAPPER_2_(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
-#define SIMD_IWRAPPER_2_CMP(op, cmp) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return cmp(a, b);\
- }
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
}
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
private:
- static SIMDINLINE Integer vmask(__mmask32 m)
- {
- return _mm512_maskz_set1_epi16(m, -1);
- }
- static SIMDINLINE Integer vmask(__mmask64 m)
- {
- return _mm512_maskz_set1_epi8(m, -1);
- }
-public:
+static SIMDINLINE Integer vmask(__mmask32 m)
+{
+ return _mm512_maskz_set1_epi16(m, -1);
+}
+static SIMDINLINE Integer vmask(__mmask64 m)
+{
+ return _mm512_maskz_set1_epi8(m, -1);
+}
-SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
-SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+public:
+SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
-SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
-SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
-SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
-SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
+SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int)
+SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int)
+SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int)
+SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
-SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
+SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a (uint8 --> int16)
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b)
{
// Legacy vector mask generator
__mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast<const int>(CmpTypeT));
return vmask(result);
}
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
{
// Legacy vector mask generator
return vmask(result);
}
-SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>); // return a == b (int8)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>); // return a > b (int8)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32
+SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32
-SIMD_IWRAPPER_2(unpackhi_epi8); // See documentation for _mm512_unpackhi_epi8
-SIMD_IWRAPPER_2(unpacklo_epi16); // See documentation for _mm512_unpacklo_epi16
-SIMD_IWRAPPER_2(unpacklo_epi8); // See documentation for _mm512_unpacklo_epi8
+SIMD_IWRAPPER_2(unpackhi_epi8); // See documentation for _mm512_unpackhi_epi8
+SIMD_IWRAPPER_2(unpacklo_epi16); // See documentation for _mm512_unpacklo_epi16
+SIMD_IWRAPPER_2(unpacklo_epi8); // See documentation for _mm512_unpacklo_epi8
SIMD_IWRAPPER_2(shuffle_epi8);
return static_cast<uint64_t>(m);
}
-
-
#undef SIMD_WRAPPER_1_
#undef SIMD_WRAPPER_1
#undef SIMD_WRAPPER_2
#undef SIMD_IWRAPPER_2
#undef SIMD_IWRAPPER_2_
#undef SIMD_IWRAPPER_2I
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
//
//============================================================================
-#define SIMD_WRAPPER_1_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a) \
- {\
- return intrin(a);\
- }
+#define SIMD_WRAPPER_1_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
-#define SIMD_WRAPPER_1(op) \
- SIMD_WRAPPER_1_(op, _mm512_##op)
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
-#define SIMD_WRAPPER_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_##intrin(a, b);\
- }
+#define SIMD_WRAPPER_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
-#define SIMD_WRAPPERI_2_(op, intrin) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_castsi512_ps(_mm512_##intrin(\
- _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+#define SIMD_WRAPPERI_2_(op, intrin) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return _mm512_castsi512_ps( \
+ _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
}
-#define SIMD_DWRAPPER_2(op) \
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm512_##op(a, b);\
- }
+#define SIMD_DWRAPPER_2(op) \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
-#define SIMD_WRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_WRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
-#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
-#define SIMD_DWRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
-#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \
- {\
- return _mm512_##op(a, b, c);\
- }
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return _mm512_##op(a);\
- }
-#define SIMD_IWRAPPER_1_8(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \
- {\
- return _mm512_##op(a);\
- }
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
+#define SIMD_IWRAPPER_1_8(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1_4(op) \
- static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \
- {\
- return _mm512_##op(a);\
- }
+#define SIMD_IWRAPPER_1_4(op) \
+ static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
-#define SIMD_IWRAPPER_1I_(op, intrin) \
- template<int ImmT> \
- static SIMDINLINE Integer SIMDCALL op(Integer a) \
- {\
- return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a) \
+ { \
+ return intrin(a, ImmT); \
}
#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
-#define SIMD_IWRAPPER_2_(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm512_##intrin(a, b);\
- }
-#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
+#define SIMD_IWRAPPER_2_(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
-#define SIMD_IWRAPPER_2_CMP(op, cmp) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return cmp(a, b);\
- }
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
-#define SIMD_IFWRAPPER_2(op, intrin) \
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin) \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
}
-#define SIMD_IWRAPPER_2I_(op, intrin) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
- {\
- return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+ { \
+ return _mm512_##intrin(a, b, ImmT); \
}
#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
-SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int)
-SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int)
-SIMD_WRAPPERI_2_(or_ps, or_epi32); // return a | b (float treated as int)
-SIMD_WRAPPERI_2_(xor_ps, xor_epi32); // return a ^ b (float treated as int)
+SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int)
+SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int)
+SIMD_WRAPPERI_2_(or_ps, or_epi32); // return a | b (float treated as int)
+SIMD_WRAPPERI_2_(xor_ps, xor_epi32); // return a ^ b (float treated as int)
#undef SIMD_WRAPPER_1_
#undef SIMD_WRAPPER_1
#undef SIMD_IWRAPPER_2
#undef SIMD_IWRAPPER_2_
#undef SIMD_IWRAPPER_2I
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX512_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
//============================================================================
static const int TARGET_SIMD_WIDTH = 8;
-using SIMD128T = SIMD128Impl::AVXImpl;
-
-#define SIMD_WRAPPER_1(op) \
- static SIMDINLINE Float SIMDCALL op(Float const &a) \
- {\
- return Float\
- {\
- SIMD256T::op(a.v8[0]),\
- SIMD256T::op(a.v8[1]),\
- };\
+using SIMD128T = SIMD128Impl::AVXImpl;
+
+#define SIMD_WRAPPER_1(op) \
+ static SIMDINLINE Float SIMDCALL op(Float const& a) \
+ { \
+ return Float{ \
+ SIMD256T::op(a.v8[0]), \
+ SIMD256T::op(a.v8[1]), \
+ }; \
}
-#define SIMD_WRAPPER_2(op) \
- static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \
- {\
- return Float\
- {\
- SIMD256T::op(a.v8[0], b.v8[0]),\
- SIMD256T::op(a.v8[1], b.v8[1]),\
- };\
+#define SIMD_WRAPPER_2(op) \
+ static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+ { \
+ return Float{ \
+ SIMD256T::op(a.v8[0], b.v8[0]), \
+ SIMD256T::op(a.v8[1], b.v8[1]), \
+ }; \
}
-#define SIMD_WRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \
- {\
- return Float\
- {\
- SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]),\
- SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]),\
- };\
+#define SIMD_WRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+ { \
+ return Float{ \
+ SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]), \
+ SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]), \
+ }; \
}
-#define SIMD_WRAPPER_2I_1(op) \
- template<int ImmT>\
- static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b) \
- {\
- return Float\
- {\
- SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]),\
- SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]),\
- };\
+#define SIMD_WRAPPER_2I_1(op) \
+ template <int ImmT> \
+ static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+ { \
+ return Float{ \
+ SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]), \
+ SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]), \
+ }; \
}
-#define SIMD_WRAPPER_3(op) \
- static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b, Float const &c) \
- {\
- return Float\
- {\
- SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]),\
- SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]),\
- };\
- }
-
-#define SIMD_IWRAPPER_1(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a) \
- {\
- return Integer\
- {\
- SIMD256T::op(a.v8[0]),\
- SIMD256T::op(a.v8[1]),\
- };\
+#define SIMD_WRAPPER_3(op) \
+ static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b, Float const& c) \
+ { \
+ return Float{ \
+ SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]), \
+ SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]), \
+ }; \
}
-#define SIMD_IWRAPPER_2(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return Integer\
- {\
- SIMD256T::op(a.v8[0], b.v8[0]),\
- SIMD256T::op(a.v8[1], b.v8[1]),\
- };\
+#define SIMD_IWRAPPER_1(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+ { \
+ return Integer{ \
+ SIMD256T::op(a.v8[0]), \
+ SIMD256T::op(a.v8[1]), \
+ }; \
}
-#define SIMD_IWRAPPER_2I(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return Integer\
- {\
- SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]),\
- SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]),\
- };\
+#define SIMD_IWRAPPER_2(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return Integer{ \
+ SIMD256T::op(a.v8[0], b.v8[0]), \
+ SIMD256T::op(a.v8[1], b.v8[1]), \
+ }; \
}
-#define SIMD_IWRAPPER_2I_1(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return Integer\
- {\
- SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]),\
- SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]),\
- };\
+#define SIMD_IWRAPPER_2I(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return Integer{ \
+ SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]), \
+ SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]), \
+ }; \
}
-#define SIMD_IWRAPPER_2I_2(op) \
- template<int ImmT>\
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b) \
- {\
- return Integer\
- {\
- SIMD256T::template op<0xF & ImmT>(a.v8[0], b.v8[0]),\
- SIMD256T::template op<0xF & (ImmT >> 4)>(a.v8[1], b.v8[1]),\
- };\
+#define SIMD_IWRAPPER_2I_1(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return Integer{ \
+ SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]), \
+ SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]), \
+ }; \
}
-#define SIMD_IWRAPPER_3(op) \
- static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b, Integer const &c) \
- {\
- return Integer\
- {\
- SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]),\
- SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]),\
- };\
+#define SIMD_IWRAPPER_2I_2(op) \
+ template <int ImmT> \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+ { \
+ return Integer{ \
+ SIMD256T::template op<0xF & ImmT>(a.v8[0], b.v8[0]), \
+ SIMD256T::template op<0xF & (ImmT >> 4)>(a.v8[1], b.v8[1]), \
+ }; \
+ }
+
+#define SIMD_IWRAPPER_3(op) \
+ static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b, Integer const& c) \
+ { \
+ return Integer{ \
+ SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]), \
+ SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]), \
+ }; \
}
//-----------------------------------------------------------------------
// Single precision floating point arithmetic operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps); // return a + b
-SIMD_WRAPPER_2(div_ps); // return a / b
-SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
-SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps); // return a * b
-SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps); // return a - b
+SIMD_WRAPPER_2(add_ps); // return a + b
+SIMD_WRAPPER_2(div_ps); // return a / b
+SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
+SIMD_WRAPPER_2(max_ps); // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps); // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps); // return a * b
+SIMD_WRAPPER_1(rcp_ps); // return 1.0f / a
+SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps); // return a - b
template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float const &a)
+static SIMDINLINE Float SIMDCALL round_ps(Float const& a)
{
- return Float
- {
+ return Float{
SIMD256T::template round_ps<RMT>(a.v8[0]),
SIMD256T::template round_ps<RMT>(a.v8[1]),
};
}
-static SIMDINLINE Float SIMDCALL ceil_ps(Float const &a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float const& a)
+{
+ return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float const& a)
+{
+ return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
//-----------------------------------------------------------------------
// Integer (various width) arithmetic operations
SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
SIMD_IWRAPPER_2(add_epi8); // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int)
SIMD_IWRAPPER_2(xor_si); // return a ^ b (int)
-
//-----------------------------------------------------------------------
// Shift operations
//-----------------------------------------------------------------------
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL slli_epi32(Integer const &a) // return a << ImmT
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL slli_epi32(Integer const& a) // return a << ImmT
{
- return Integer
- {
+ return Integer{
SIMD256T::template slli_epi32<ImmT>(a.v8[0]),
SIMD256T::template slli_epi32<ImmT>(a.v8[1]),
};
}
-SIMD_IWRAPPER_2(sllv_epi32); // return a << b (uint32)
+SIMD_IWRAPPER_2(sllv_epi32); // return a << b (uint32)
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL srai_epi32(Integer const &a) // return a >> ImmT (int32)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL srai_epi32(Integer const& a) // return a >> ImmT (int32)
{
- return Integer
- {
+ return Integer{
SIMD256T::template srai_epi32<ImmT>(a.v8[0]),
SIMD256T::template srai_epi32<ImmT>(a.v8[1]),
};
}
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL srli_epi32(Integer const &a) // return a >> ImmT (uint32)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL srli_epi32(Integer const& a) // return a >> ImmT (uint32)
{
- return Integer
- {
+ return Integer{
SIMD256T::template srli_epi32<ImmT>(a.v8[0]),
SIMD256T::template srli_epi32<ImmT>(a.v8[1]),
};
}
-template<int ImmT> // for each 128-bit lane:
-static SIMDINLINE Integer SIMDCALL srli_si(Integer const &a) // return a >> (ImmT*8) (uint)
+template <int ImmT> // for each 128-bit lane:
+static SIMDINLINE Integer SIMDCALL srli_si(Integer const& a) // return a >> (ImmT*8) (uint)
{
- return Integer
- {
+ return Integer{
SIMD256T::template srli_si<ImmT>(a.v8[0]),
SIMD256T::template srli_si<ImmT>(a.v8[1]),
};
}
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a) // same as srli_si, but with Float cast to int
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL
+ srlisi_ps(Float const& a) // same as srli_si, but with Float cast to int
{
- return Float
- {
+ return Float{
SIMD256T::template srlisi_ps<ImmT>(a.v8[0]),
SIMD256T::template srlisi_ps<ImmT>(a.v8[1]),
};
}
-SIMD_IWRAPPER_2(srlv_epi32); // return a >> b (uint32)
+SIMD_IWRAPPER_2(srlv_epi32); // return a >> b (uint32)
//-----------------------------------------------------------------------
// Conversion operations
//-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double const &a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double const& a) // return *(Float*)(&a)
{
- return Float
- {
+ return Float{
SIMD256T::castpd_ps(a.v8[0]),
SIMD256T::castpd_ps(a.v8[1]),
};
}
-static SIMDINLINE Integer SIMDCALL castps_si(Float const &a) // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float const& a) // return *(Integer*)(&a)
{
- return Integer
- {
+ return Integer{
SIMD256T::castps_si(a.v8[0]),
SIMD256T::castps_si(a.v8[1]),
};
}
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer const &a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer const& a) // return *(Double*)(&a)
{
- return Double
- {
+ return Double{
SIMD256T::castsi_pd(a.v8[0]),
SIMD256T::castsi_pd(a.v8[1]),
};
}
-static SIMDINLINE Double SIMDCALL castps_pd(Float const &a) // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float const& a) // return *(Double*)(&a)
{
- return Double
- {
+ return Double{
SIMD256T::castps_pd(a.v8[0]),
SIMD256T::castps_pd(a.v8[1]),
};
}
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer const &a) // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer const& a) // return *(Float*)(&a)
{
- return Float
- {
+ return Float{
SIMD256T::castsi_ps(a.v8[0]),
SIMD256T::castsi_ps(a.v8[1]),
};
}
-static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer const &a) // return (float)a (int32 --> float)
+static SIMDINLINE Float SIMDCALL
+ cvtepi32_ps(Integer const& a) // return (float)a (int32 --> float)
{
- return Float
- {
+ return Float{
SIMD256T::cvtepi32_ps(a.v8[0]),
SIMD256T::cvtepi32_ps(a.v8[1]),
};
}
-static SIMDINLINE Integer SIMDCALL cvtepu8_epi16(SIMD256Impl::Integer const &a) // return (int16)a (uint8 --> int16)
+static SIMDINLINE Integer SIMDCALL
+ cvtepu8_epi16(SIMD256Impl::Integer const& a) // return (int16)a (uint8 --> int16)
{
- return Integer
- {
+ return Integer{
SIMD256T::cvtepu8_epi16(a.v4[0]),
SIMD256T::cvtepu8_epi16(a.v4[1]),
};
}
-static SIMDINLINE Integer SIMDCALL cvtepu8_epi32(SIMD256Impl::Integer const &a) // return (int32)a (uint8 --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvtepu8_epi32(SIMD256Impl::Integer const& a) // return (int32)a (uint8 --> int32)
{
- return Integer
- {
+ return Integer{
SIMD256T::cvtepu8_epi32(a.v4[0]),
SIMD256T::cvtepu8_epi32(SIMD128T::template srli_si<8>(a.v4[0])),
- };
+ };
}
-static SIMDINLINE Integer SIMDCALL cvtepu16_epi32(SIMD256Impl::Integer const &a) // return (int32)a (uint16 --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvtepu16_epi32(SIMD256Impl::Integer const& a) // return (int32)a (uint16 --> int32)
{
- return Integer
- {
+ return Integer{
SIMD256T::cvtepu16_epi32(a.v4[0]),
SIMD256T::cvtepu16_epi32(a.v4[1]),
};
}
-static SIMDINLINE Integer SIMDCALL cvtepu16_epi64(SIMD256Impl::Integer const &a) // return (int64)a (uint16 --> int64)
+static SIMDINLINE Integer SIMDCALL
+ cvtepu16_epi64(SIMD256Impl::Integer const& a) // return (int64)a (uint16 --> int64)
{
- return Integer
- {
+ return Integer{
SIMD256T::cvtepu16_epi64(a.v4[0]),
SIMD256T::cvtepu16_epi64(SIMD128T::template srli_si<8>(a.v4[0])),
};
}
-static SIMDINLINE Integer SIMDCALL cvtepu32_epi64(SIMD256Impl::Integer const &a) // return (int64)a (uint32 --> int64)
+static SIMDINLINE Integer SIMDCALL
+ cvtepu32_epi64(SIMD256Impl::Integer const& a) // return (int64)a (uint32 --> int64)
{
- return Integer
- {
+ return Integer{
SIMD256T::cvtepu32_epi64(a.v4[0]),
SIMD256T::cvtepu32_epi64(a.v4[1]),
};
}
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float const &a) // return (int32)a (float --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvtps_epi32(Float const& a) // return (int32)a (float --> int32)
{
- return Integer
- {
+ return Integer{
SIMD256T::cvtps_epi32(a.v8[0]),
SIMD256T::cvtps_epi32(a.v8[1]),
};
}
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a) // return (int32)a (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+ cvttps_epi32(Float const& a) // return (int32)a (rnd_to_zero(float) --> int32)
{
- return Integer
- {
+ return Integer{
SIMD256T::cvtps_epi32(a.v8[0]),
SIMD256T::cvtps_epi32(a.v8[1]),
};
//-----------------------------------------------------------------------
// Comparison operations
//-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float const &a, Float const &b) // return a (CmpTypeT) b
+template <CompareType CmpTypeT>
+static SIMDINLINE Float SIMDCALL cmp_ps(Float const& a, Float const& b) // return a (CmpTypeT) b
{
- return Float
- {
+ return Float{
SIMD256T::template cmp_ps<CmpTypeT>(a.v8[0], b.v8[0]),
SIMD256T::template cmp_ps<CmpTypeT>(a.v8[1], b.v8[1]),
};
}
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float const& a, Float const& b)
+{
+ return cmp_ps<CompareType::LE_OQ>(a, b);
+}
-template<CompareType CmpTypeT>
-static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float const &a, Float const &b)
+template <CompareType CmpTypeT>
+static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float const& a, Float const& b)
{
return static_cast<Mask>(movemask_ps(cmp_ps<CmpTypeT>(a, b)));
}
+SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
+SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
+SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi8); // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8); // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
-SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
-
-static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL
+ testz_ps(Float const& a, Float const& b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
{
- return 0 != (SIMD256T::testz_ps(a.v8[0], b.v8[0]) &
- SIMD256T::testz_ps(a.v8[1], b.v8[1]));
+ return 0 != (SIMD256T::testz_ps(a.v8[0], b.v8[0]) & SIMD256T::testz_ps(a.v8[1], b.v8[1]));
}
-static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL
+ testz_si(Integer const& a, Integer const& b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
{
- return 0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) &
- SIMD256T::testz_si(a.v8[1], b.v8[1]));
+ return 0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) & SIMD256T::testz_si(a.v8[1], b.v8[1]));
}
//-----------------------------------------------------------------------
// Blend / shuffle / permute operations
//-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
-SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a (int32)
-SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Float const &mask) // return mask ? b : a (int)
-{
- return Integer
- {
+SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a (float)
+SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a (int32)
+SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a (float)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+ Integer const& b,
+ Float const& mask) // return mask ? b : a (int)
+{
+ return Integer{
SIMD256T::blendv_epi32(a.v8[0], b.v8[0], mask.v8[0]),
SIMD256T::blendv_epi32(a.v8[1], b.v8[1], mask.v8[1]),
};
}
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Integer const &mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+ Integer const& b,
+ Integer const& mask) // return mask ? b : a (int)
{
- return Integer
- {
+ return Integer{
SIMD256T::blendv_epi32(a.v8[0], b.v8[0], mask.v8[0]),
SIMD256T::blendv_epi32(a.v8[1], b.v8[1], mask.v8[1]),
};
}
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p) // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+ broadcast_ss(float const* p) // return *p (all elements in vector get same value)
{
float f = *p;
- return Float
- {
+ return Float{
SIMD256T::set1_ps(f),
SIMD256T::set1_ps(f),
};
}
-template<int imm>
-static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float const &a)
+template <int imm>
+static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float const& a)
{
SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
return a.v8[imm];
}
-template<int imm>
-static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double const &a)
+template <int imm>
+static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double const& a)
{
SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
return a.v8[imm];
}
-template<int imm>
-static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer const &a)
+template <int imm>
+static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer const& a)
{
SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
return a.v8[imm];
}
-template<int imm>
-static SIMDINLINE Float SIMDCALL insert_ps(Float const &a, SIMD256Impl::Float const &b)
+template <int imm>
+static SIMDINLINE Float SIMDCALL insert_ps(Float const& a, SIMD256Impl::Float const& b)
{
SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- Float r = a;
+ Float r = a;
r.v8[imm] = b;
return r;
}
-template<int imm>
-static SIMDINLINE Double SIMDCALL insert_pd(Double const &a, SIMD256Impl::Double const &b)
+template <int imm>
+static SIMDINLINE Double SIMDCALL insert_pd(Double const& a, SIMD256Impl::Double const& b)
{
SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
- Double r = a;
+ Double r = a;
r.v8[imm] = b;
return r;
}
-template<int imm>
-static SIMDINLINE Integer SIMDCALL insert_si(Integer const &a, SIMD256Impl::Integer const &b)
+template <int imm>
+static SIMDINLINE Integer SIMDCALL insert_si(Integer const& a, SIMD256Impl::Integer const& b)
{
SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
Integer r = a;
return r;
}
-SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
{
- return Float
- {
+ return Float{
SIMD256T::template permute_ps<ImmT>(a.v8[0]),
SIMD256T::template permute_ps<ImmT>(a.v8[1]),
};
}
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
+static SIMDINLINE Integer SIMDCALL permute_epi32(
+ Integer const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
{
return castps_si(permute_ps(castsi_ps(a), swiz));
}
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+ permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
{
const auto mask = SIMD256T::set1_epi32(7);
auto hilo = SIMD256T::permute_ps(a.v8[0], SIMD256T::and_si(swiz.v8[1], mask));
auto hihi = SIMD256T::permute_ps(a.v8[1], SIMD256T::and_si(swiz.v8[1], mask));
- return Float
- {
- SIMD256T::blendv_ps(lolo, lohi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[0], mask))),
- SIMD256T::blendv_ps(hilo, hihi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[1], mask))),
+ return Float{
+ SIMD256T::blendv_ps(
+ lolo, lohi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[0], mask))),
+ SIMD256T::blendv_ps(
+ hilo, hihi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[1], mask))),
};
}
// ESAC
// RETURN tmp[127:0]
// }
-//
+//
// dst[127:0] : = SELECT4(a[511:0], imm8[1:0])
// dst[255:128] : = SELECT4(a[511:0], imm8[3:2])
// dst[383:256] : = SELECT4(b[511:0], imm8[5:4])
// AVX instructions for emulation.
//
template <int shuf>
-static SIMDINLINE Float SIMDCALL permute2f128_ps(Float const &a, Float const &b)
+static SIMDINLINE Float SIMDCALL permute2f128_ps(Float const& a, Float const& b)
{
- return Float
- {
- SIMD256T::template permute2f128_ps<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0], a.v8[1]),
- SIMD256T::template permute2f128_ps<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0], b.v8[1]),
+ return Float{
+ SIMD256T::template permute2f128_ps<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
+ a.v8[1]),
+ SIMD256T::template permute2f128_ps<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
+ b.v8[1]),
};
}
template <int shuf>
-static SIMDINLINE Double SIMDCALL permute2f128_pd(Double const &a, Double const &b)
+static SIMDINLINE Double SIMDCALL permute2f128_pd(Double const& a, Double const& b)
{
- return Double
- {
- SIMD256T::template permute2f128_pd<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0], a.v8[1]),
- SIMD256T::template permute2f128_pd<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0], b.v8[1]),
+ return Double{
+ SIMD256T::template permute2f128_pd<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
+ a.v8[1]),
+ SIMD256T::template permute2f128_pd<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
+ b.v8[1]),
};
}
template <int shuf>
-static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer const &a, Integer const &b)
+static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer const& a, Integer const& b)
{
- return Integer
- {
- SIMD256T::template permute2f128_si<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0], a.v8[1]),
- SIMD256T::template permute2f128_si<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0], b.v8[1]),
+ return Integer{
+ SIMD256T::template permute2f128_si<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
+ a.v8[1]),
+ SIMD256T::template permute2f128_si<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
+ b.v8[1]),
};
}
//-----------------------------------------------------------------------
// Load / store operations
//-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
{
- return Float
- {
+ return Float{
SIMD256T::template i32gather_ps<ScaleT>(p, idx.v8[0]),
SIMD256T::template i32gather_ps<ScaleT>(p, idx.v8[1]),
};
}
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p) // return *p (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+ load1_ps(float const* p) // return *p (broadcast 1 value to all elements)
{
return broadcast_ss(p);
}
-static SIMDINLINE Float SIMDCALL load_ps(float const *p) // return *p (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+ load_ps(float const* p) // return *p (loads SIMD width elements from memory)
{
- return Float
- {
- SIMD256T::load_ps(p),
- SIMD256T::load_ps(p + TARGET_SIMD_WIDTH)
- };
+ return Float{SIMD256T::load_ps(p), SIMD256T::load_ps(p + TARGET_SIMD_WIDTH)};
}
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p) // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
{
- return Integer
- {
+ return Integer{
SIMD256T::load_si(&p->v8[0]),
SIMD256T::load_si(&p->v8[1]),
};
}
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p) // return *p (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+ loadu_ps(float const* p) // return *p (same as load_ps but allows for unaligned mem)
{
- return Float
- {
- SIMD256T::loadu_ps(p),
- SIMD256T::loadu_ps(p + TARGET_SIMD_WIDTH)
- };
+ return Float{SIMD256T::loadu_ps(p), SIMD256T::loadu_ps(p + TARGET_SIMD_WIDTH)};
}
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+ loadu_si(Integer const* p) // return *p (same as load_si but allows for unaligned mem)
{
- return Integer
- {
+ return Integer{
SIMD256T::loadu_si(&p->v8[0]),
SIMD256T::loadu_si(&p->v8[1]),
};
}
// for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+ mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
{
- return Float
- {
+ return Float{
SIMD256T::template mask_i32gather_ps<ScaleT>(old.v8[0], p, idx.v8[0], mask.v8[0]),
SIMD256T::template mask_i32gather_ps<ScaleT>(old.v8[1], p, idx.v8[1], mask.v8[1]),
};
}
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer const &mask, Float const &src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer const& mask, Float const& src)
{
SIMD256T::maskstore_ps(p, mask.v8[0], src.v8[0]);
SIMD256T::maskstore_ps(p + TARGET_SIMD_WIDTH, mask.v8[1], src.v8[1]);
}
-static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer const &a)
+static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer const& a)
{
uint64_t mask = static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[0]));
- mask |= static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[1])) << (TARGET_SIMD_WIDTH * 4);
+ mask |= static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[1])) << (TARGET_SIMD_WIDTH * 4);
return mask;
}
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const& a)
{
uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[0]));
- mask |= static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[1])) << (TARGET_SIMD_WIDTH / 2);
+ mask |= static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[1])) << (TARGET_SIMD_WIDTH / 2);
return mask;
}
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const& a)
{
uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[0]));
- mask |= static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[1])) << TARGET_SIMD_WIDTH;
+ mask |= static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[1])) << TARGET_SIMD_WIDTH;
return mask;
}
static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements are same value)
{
- return Integer
- {
- SIMD256T::set1_epi32(i),
- SIMD256T::set1_epi32(i)
- };
+ return Integer{SIMD256T::set1_epi32(i), SIMD256T::set1_epi32(i)};
}
static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements are same value)
{
- return Integer
- {
- SIMD256T::set1_epi8(i),
- SIMD256T::set1_epi8(i)
- };
+ return Integer{SIMD256T::set1_epi8(i), SIMD256T::set1_epi8(i)};
}
-static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
{
- return Float
- {
- SIMD256T::set1_ps(f),
- SIMD256T::set1_ps(f)
- };
+ return Float{SIMD256T::set1_ps(f), SIMD256T::set1_ps(f)};
}
-static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
{
- return Float
- {
- SIMD256T::setzero_ps(),
- SIMD256T::setzero_ps()
- };
+ return Float{SIMD256T::setzero_ps(), SIMD256T::setzero_ps()};
}
-static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
{
- return Integer
- {
- SIMD256T::setzero_si(),
- SIMD256T::setzero_si()
- };
+ return Integer{SIMD256T::setzero_si(), SIMD256T::setzero_si()};
}
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float const &a) // *p = a (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+ store_ps(float* p, Float const& a) // *p = a (stores all elements contiguously in memory)
{
SIMD256T::store_ps(p, a.v8[0]);
SIMD256T::store_ps(p + TARGET_SIMD_WIDTH, a.v8[1]);
}
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer const &a) // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer const& a) // *p = a
{
SIMD256T::store_si(&p->v8[0], a.v8[0]);
SIMD256T::store_si(&p->v8[1], a.v8[1]);
}
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float const &a) // *p = a (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+ stream_ps(float* p, Float const& a) // *p = a (same as store_ps, but doesn't keep memory in cache)
{
SIMD256T::stream_ps(p, a.v8[0]);
SIMD256T::stream_ps(p + TARGET_SIMD_WIDTH, a.v8[1]);
}
-static SIMDINLINE Integer SIMDCALL set_epi32(
- int i15, int i14, int i13, int i12, int i11, int i10, int i9, int i8,
- int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL set_epi32(int i15,
+ int i14,
+ int i13,
+ int i12,
+ int i11,
+ int i10,
+ int i9,
+ int i8,
+ int i7,
+ int i6,
+ int i5,
+ int i4,
+ int i3,
+ int i2,
+ int i1,
+ int i0)
{
- return Integer
- {
- SIMD256T::set_epi32(
- i7, i6, i5, i4, i3, i2, i1, i0),
- SIMD256T::set_epi32(
- i15, i14, i13, i12, i11, i10, i9, i8)
- };
+ return Integer{SIMD256T::set_epi32(i7, i6, i5, i4, i3, i2, i1, i0),
+ SIMD256T::set_epi32(i15, i14, i13, i12, i11, i10, i9, i8)};
}
-static SIMDINLINE Integer SIMDCALL set_epi32(
- int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL
+ set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
{
- return set_epi32(
- 0, 0, 0, 0, 0, 0, 0, 0,
- i7, i6, i5, i4, i3, i2, i1, i0);
+ return set_epi32(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
}
-static SIMDINLINE Float SIMDCALL set_ps(
- float i15, float i14, float i13, float i12, float i11, float i10, float i9, float i8,
- float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL set_ps(float i15,
+ float i14,
+ float i13,
+ float i12,
+ float i11,
+ float i10,
+ float i9,
+ float i8,
+ float i7,
+ float i6,
+ float i5,
+ float i4,
+ float i3,
+ float i2,
+ float i1,
+ float i0)
{
- return Float
- {
- SIMD256T::set_ps(
- i7, i6, i5, i4, i3, i2, i1, i0),
- SIMD256T::set_ps(
- i15, i14, i13, i12, i11, i10, i9, i8)
- };
+ return Float{SIMD256T::set_ps(i7, i6, i5, i4, i3, i2, i1, i0),
+ SIMD256T::set_ps(i15, i14, i13, i12, i11, i10, i9, i8)};
}
-static SIMDINLINE Float SIMDCALL set_ps(
- float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL
+ set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
{
- return set_ps(
- 0, 0, 0, 0, 0, 0, 0, 0,
- i7, i6, i5, i4, i3, i2, i1, i0);
+ return set_ps(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
}
static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
{
- return Float
- {
- SIMD256T::vmask_ps(mask),
- SIMD256T::vmask_ps(mask >> TARGET_SIMD_WIDTH)
- };
+ return Float{SIMD256T::vmask_ps(mask), SIMD256T::vmask_ps(mask >> TARGET_SIMD_WIDTH)};
}
#undef SIMD_WRAPPER_1
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#if !defined(__SIMD_LIB_AVX_HPP__)
#error Do not include this file directly, use "simdlib.hpp" instead.
#endif
// no backwards compatibility for simd mask-enabled functions
-
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#pragma once
#if 0
//===========================================================================
/****************************************************************************
-* Copyright (C) 2017 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#pragma once
#if !defined(__cplusplus)
#include <inttypes.h>
#include <stdint.h>
-#define SIMD_ARCH_AVX 0
-#define SIMD_ARCH_AVX2 1
-#define SIMD_ARCH_AVX512 2
+#define SIMD_ARCH_AVX 0
+#define SIMD_ARCH_AVX2 1
+#define SIMD_ARCH_AVX512 2
#if !defined(SIMD_ARCH)
#define SIMD_ARCH SIMD_ARCH_AVX
{
enum class CompareType
{
- EQ_OQ = 0x00, // Equal (ordered, nonsignaling)
- LT_OS = 0x01, // Less-than (ordered, signaling)
- LE_OS = 0x02, // Less-than-or-equal (ordered, signaling)
- UNORD_Q = 0x03, // Unordered (nonsignaling)
- NEQ_UQ = 0x04, // Not-equal (unordered, nonsignaling)
- NLT_US = 0x05, // Not-less-than (unordered, signaling)
- NLE_US = 0x06, // Not-less-than-or-equal (unordered, signaling)
- ORD_Q = 0x07, // Ordered (nonsignaling)
- EQ_UQ = 0x08, // Equal (unordered, non-signaling)
- NGE_US = 0x09, // Not-greater-than-or-equal (unordered, signaling)
- NGT_US = 0x0A, // Not-greater-than (unordered, signaling)
- FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
- NEQ_OQ = 0x0C, // Not-equal (ordered, non-signaling)
- GE_OS = 0x0D, // Greater-than-or-equal (ordered, signaling)
- GT_OS = 0x0E, // Greater-than (ordered, signaling)
- TRUE_UQ = 0x0F, // True (unordered, non-signaling)
- EQ_OS = 0x10, // Equal (ordered, signaling)
- LT_OQ = 0x11, // Less-than (ordered, nonsignaling)
- LE_OQ = 0x12, // Less-than-or-equal (ordered, nonsignaling)
- UNORD_S = 0x13, // Unordered (signaling)
- NEQ_US = 0x14, // Not-equal (unordered, signaling)
- NLT_UQ = 0x15, // Not-less-than (unordered, nonsignaling)
- NLE_UQ = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
- ORD_S = 0x17, // Ordered (signaling)
- EQ_US = 0x18, // Equal (unordered, signaling)
- NGE_UQ = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
- NGT_UQ = 0x1A, // Not-greater-than (unordered, nonsignaling)
- FALSE_OS = 0x1B, // False (ordered, signaling)
- NEQ_OS = 0x1C, // Not-equal (ordered, signaling)
- GE_OQ = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
- GT_OQ = 0x1E, // Greater-than (ordered, nonsignaling)
- TRUE_US = 0x1F, // True (unordered, signaling)
+ EQ_OQ = 0x00, // Equal (ordered, nonsignaling)
+ LT_OS = 0x01, // Less-than (ordered, signaling)
+ LE_OS = 0x02, // Less-than-or-equal (ordered, signaling)
+ UNORD_Q = 0x03, // Unordered (nonsignaling)
+ NEQ_UQ = 0x04, // Not-equal (unordered, nonsignaling)
+ NLT_US = 0x05, // Not-less-than (unordered, signaling)
+ NLE_US = 0x06, // Not-less-than-or-equal (unordered, signaling)
+ ORD_Q = 0x07, // Ordered (nonsignaling)
+ EQ_UQ = 0x08, // Equal (unordered, non-signaling)
+ NGE_US = 0x09, // Not-greater-than-or-equal (unordered, signaling)
+ NGT_US = 0x0A, // Not-greater-than (unordered, signaling)
+ FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
+ NEQ_OQ = 0x0C, // Not-equal (ordered, non-signaling)
+ GE_OS = 0x0D, // Greater-than-or-equal (ordered, signaling)
+ GT_OS = 0x0E, // Greater-than (ordered, signaling)
+ TRUE_UQ = 0x0F, // True (unordered, non-signaling)
+ EQ_OS = 0x10, // Equal (ordered, signaling)
+ LT_OQ = 0x11, // Less-than (ordered, nonsignaling)
+ LE_OQ = 0x12, // Less-than-or-equal (ordered, nonsignaling)
+ UNORD_S = 0x13, // Unordered (signaling)
+ NEQ_US = 0x14, // Not-equal (unordered, signaling)
+ NLT_UQ = 0x15, // Not-less-than (unordered, nonsignaling)
+ NLE_UQ = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
+ ORD_S = 0x17, // Ordered (signaling)
+ EQ_US = 0x18, // Equal (unordered, signaling)
+ NGE_UQ = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
+ NGT_UQ = 0x1A, // Not-greater-than (unordered, nonsignaling)
+ FALSE_OS = 0x1B, // False (ordered, signaling)
+ NEQ_OS = 0x1C, // Not-equal (ordered, signaling)
+ GE_OQ = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
+ GT_OQ = 0x1E, // Greater-than (ordered, nonsignaling)
+ TRUE_US = 0x1F, // True (unordered, signaling)
};
#if SIMD_ARCH >= SIMD_ARCH_AVX512
enum class CompareTypeInt
{
- EQ = _MM_CMPINT_EQ, // Equal
- LT = _MM_CMPINT_LT, // Less than
- LE = _MM_CMPINT_LE, // Less than or Equal
- NE = _MM_CMPINT_NE, // Not Equal
- GE = _MM_CMPINT_GE, // Greater than or Equal
- GT = _MM_CMPINT_GT, // Greater than
+ EQ = _MM_CMPINT_EQ, // Equal
+ LT = _MM_CMPINT_LT, // Less than
+ LE = _MM_CMPINT_LE, // Less than or Equal
+ NE = _MM_CMPINT_NE, // Not Equal
+ GE = _MM_CMPINT_GE, // Greater than or Equal
+ GT = _MM_CMPINT_GT, // Greater than
};
#endif // SIMD_ARCH >= SIMD_ARCH_AVX512
enum class ScaleFactor
{
- SF_1 = 1, // No scaling
- SF_2 = 2, // Scale offset by 2
- SF_4 = 4, // Scale offset by 4
- SF_8 = 8, // Scale offset by 8
+ SF_1 = 1, // No scaling
+ SF_2 = 2, // Scale offset by 2
+ SF_4 = 4, // Scale offset by 4
+ SF_8 = 8, // Scale offset by 8
};
enum class RoundMode
{
- TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + 0.5)
- TO_NEG_INF = 0x01, // Round to negative infinity
- TO_POS_INF = 0x02, // Round to positive infinity
- TO_ZERO = 0x03, // Round to 0 a.k.a. truncate
- CUR_DIRECTION = 0x04, // Round in direction set in MXCSR register
-
- RAISE_EXC = 0x00, // Raise exception on overflow
- NO_EXC = 0x08, // Suppress exceptions
-
- NINT = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
- NINT_NOEXC = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
- FLOOR = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
- FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
- CEIL = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
- CEIL_NOEXC = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
- TRUNC = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
- TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
- RINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
- NEARBYINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
+ TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + 0.5)
+ TO_NEG_INF = 0x01, // Round to negative infinity
+ TO_POS_INF = 0x02, // Round to positive infinity
+ TO_ZERO = 0x03, // Round to 0 a.k.a. truncate
+ CUR_DIRECTION = 0x04, // Round in direction set in MXCSR register
+
+ RAISE_EXC = 0x00, // Raise exception on overflow
+ NO_EXC = 0x08, // Suppress exceptions
+
+ NINT = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
+ NINT_NOEXC = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
+ FLOOR = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
+ FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
+ CEIL = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
+ CEIL_NOEXC = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
+ TRUNC = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
+ TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
+ RINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
+ NEARBYINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
};
struct Traits
};
// Attribute, 4-dimensional attribute in SIMD SOA layout
- template<typename Float, typename Integer, typename Double>
+ template <typename Float, typename Integer, typename Double>
union Vec4
{
Float v[4];
Double vd[4];
struct
{
- Float x;
- Float y;
- Float z;
- Float w;
+ Float x;
+ Float y;
+ Float z;
+ Float w;
};
- SIMDINLINE Float& SIMDCALL operator[] (const int i) { return v[i]; }
- SIMDINLINE Float const & SIMDCALL operator[] (const int i) const { return v[i]; }
- SIMDINLINE Vec4& SIMDCALL operator=(Vec4 const & in)
+ SIMDINLINE Float& SIMDCALL operator[](const int i) { return v[i]; }
+ SIMDINLINE Float const& SIMDCALL operator[](const int i) const { return v[i]; }
+ SIMDINLINE Vec4& SIMDCALL operator=(Vec4 const& in)
{
v[0] = in.v[0];
v[1] = in.v[1];
{
SIMDINLINE Float() = default;
SIMDINLINE Float(__m128 in) : v(in) {}
- SIMDINLINE Float& SIMDCALL operator=(__m128 in) { v = in; return *this; }
- SIMDINLINE Float& SIMDCALL operator=(Float const & in) { v = in.v; return *this; }
+ SIMDINLINE Float& SIMDCALL operator=(__m128 in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Float& SIMDCALL operator=(Float const& in)
+ {
+ v = in.v;
+ return *this;
+ }
SIMDINLINE SIMDCALL operator __m128() const { return v; }
SIMDALIGN(__m128, 16) v;
{
SIMDINLINE Integer() = default;
SIMDINLINE Integer(__m128i in) : v(in) {}
- SIMDINLINE Integer& SIMDCALL operator=(__m128i in) { v = in; return *this; }
- SIMDINLINE Integer& SIMDCALL operator=(Integer const & in) { v = in.v; return *this; }
+ SIMDINLINE Integer& SIMDCALL operator=(__m128i in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
+ {
+ v = in.v;
+ return *this;
+ }
SIMDINLINE SIMDCALL operator __m128i() const { return v; }
SIMDALIGN(__m128i, 16) v;
{
SIMDINLINE Double() = default;
SIMDINLINE Double(__m128d in) : v(in) {}
- SIMDINLINE Double& SIMDCALL operator=(__m128d in) { v = in; return *this; }
- SIMDINLINE Double& SIMDCALL operator=(Double const & in) { v = in.v; return *this; }
+ SIMDINLINE Double& SIMDCALL operator=(__m128d in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Double& SIMDCALL operator=(Double const& in)
+ {
+ v = in.v;
+ return *this;
+ }
SIMDINLINE SIMDCALL operator __m128d() const { return v; }
SIMDALIGN(__m128d, 16) v;
using Mask = uint8_t;
static const uint32_t SIMD_WIDTH = 4;
- } // ns SIMD128Impl
+ } // namespace SIMD128Impl
namespace SIMD256Impl
{
{
SIMDINLINE Float() = default;
SIMDINLINE Float(__m256 in) : v(in) {}
- SIMDINLINE Float(SIMD128Impl::Float const &in_lo, SIMD128Impl::Float const &in_hi = _mm_setzero_ps())
+ SIMDINLINE Float(SIMD128Impl::Float const& in_lo,
+ SIMD128Impl::Float const& in_hi = _mm_setzero_ps())
{
v = _mm256_insertf128_ps(_mm256_castps128_ps256(in_lo), in_hi, 0x1);
}
- SIMDINLINE Float& SIMDCALL operator=(__m256 in) { v = in; return *this; }
- SIMDINLINE Float& SIMDCALL operator=(Float const & in) { v = in.v; return *this; }
+ SIMDINLINE Float& SIMDCALL operator=(__m256 in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Float& SIMDCALL operator=(Float const& in)
+ {
+ v = in.v;
+ return *this;
+ }
SIMDINLINE SIMDCALL operator __m256() const { return v; }
SIMDALIGN(__m256, 32) v;
{
SIMDINLINE Integer() = default;
SIMDINLINE Integer(__m256i in) : v(in) {}
- SIMDINLINE Integer(SIMD128Impl::Integer const &in_lo, SIMD128Impl::Integer const &in_hi = _mm_setzero_si128())
+ SIMDINLINE Integer(SIMD128Impl::Integer const& in_lo,
+ SIMD128Impl::Integer const& in_hi = _mm_setzero_si128())
{
v = _mm256_insertf128_si256(_mm256_castsi128_si256(in_lo), in_hi, 0x1);
}
- SIMDINLINE Integer& SIMDCALL operator=(__m256i in) { v = in; return *this; }
- SIMDINLINE Integer& SIMDCALL operator=(Integer const & in) { v = in.v; return *this; }
+ SIMDINLINE Integer& SIMDCALL operator=(__m256i in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
+ {
+ v = in.v;
+ return *this;
+ }
SIMDINLINE SIMDCALL operator __m256i() const { return v; }
SIMDALIGN(__m256i, 32) v;
union Double
{
SIMDINLINE Double() = default;
- SIMDINLINE Double(__m256d const &in) : v(in) {}
- SIMDINLINE Double(SIMD128Impl::Double const &in_lo, SIMD128Impl::Double const &in_hi = _mm_setzero_pd())
+ SIMDINLINE Double(__m256d const& in) : v(in) {}
+ SIMDINLINE Double(SIMD128Impl::Double const& in_lo,
+ SIMD128Impl::Double const& in_hi = _mm_setzero_pd())
{
v = _mm256_insertf128_pd(_mm256_castpd128_pd256(in_lo), in_hi, 0x1);
}
- SIMDINLINE Double& SIMDCALL operator=(__m256d in) { v = in; return *this; }
- SIMDINLINE Double& SIMDCALL operator=(Double const & in) { v = in.v; return *this; }
+ SIMDINLINE Double& SIMDCALL operator=(__m256d in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Double& SIMDCALL operator=(Double const& in)
+ {
+ v = in.v;
+ return *this;
+ }
SIMDINLINE SIMDCALL operator __m256d() const { return v; }
SIMDALIGN(__m256d, 32) v;
using Mask = uint8_t;
static const uint32_t SIMD_WIDTH = 8;
- } // ns SIMD256Impl
+ } // namespace SIMD256Impl
namespace SIMD512Impl
{
union __m512i
{
private:
- int8_t m512i_i8[64];
- int16_t m512i_i16[32];
- int32_t m512i_i32[16];
- int64_t m512i_i64[8];
- uint8_t m512i_u8[64];
- uint16_t m512i_u16[32];
- uint32_t m512i_u32[16];
- uint64_t m512i_u64[8];
+ int8_t m512i_i8[64];
+ int16_t m512i_i16[32];
+ int32_t m512i_i32[16];
+ int64_t m512i_i64[8];
+ uint8_t m512i_u8[64];
+ uint16_t m512i_u16[32];
+ uint32_t m512i_u32[16];
+ uint64_t m512i_u64[8];
};
using __mmask16 = uint16_t;
{
SIMDINLINE Float() = default;
SIMDINLINE Float(__m512 in) : v(in) {}
- SIMDINLINE Float(SIMD256Impl::Float const &in_lo, SIMD256Impl::Float const &in_hi = _mm256_setzero_ps()) { v8[0] = in_lo; v8[1] = in_hi; }
- SIMDINLINE Float& SIMDCALL operator=(__m512 in) { v = in; return *this; }
- SIMDINLINE Float& SIMDCALL operator=(Float const & in)
+ SIMDINLINE Float(SIMD256Impl::Float const& in_lo,
+ SIMD256Impl::Float const& in_hi = _mm256_setzero_ps())
+ {
+ v8[0] = in_lo;
+ v8[1] = in_hi;
+ }
+ SIMDINLINE Float& SIMDCALL operator=(__m512 in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Float& SIMDCALL operator=(Float const& in)
{
#if SIMD_ARCH >= SIMD_ARCH_AVX512
v = in.v;
{
SIMDINLINE Integer() = default;
SIMDINLINE Integer(__m512i in) : v(in) {}
- SIMDINLINE Integer(SIMD256Impl::Integer const &in_lo, SIMD256Impl::Integer const &in_hi = _mm256_setzero_si256()) { v8[0] = in_lo; v8[1] = in_hi; }
- SIMDINLINE Integer& SIMDCALL operator=(__m512i in) { v = in; return *this; }
- SIMDINLINE Integer& SIMDCALL operator=(Integer const & in)
+ SIMDINLINE Integer(SIMD256Impl::Integer const& in_lo,
+ SIMD256Impl::Integer const& in_hi = _mm256_setzero_si256())
+ {
+ v8[0] = in_lo;
+ v8[1] = in_hi;
+ }
+ SIMDINLINE Integer& SIMDCALL operator=(__m512i in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
{
#if SIMD_ARCH >= SIMD_ARCH_AVX512
v = in.v;
{
SIMDINLINE Double() = default;
SIMDINLINE Double(__m512d in) : v(in) {}
- SIMDINLINE Double(SIMD256Impl::Double const &in_lo, SIMD256Impl::Double const &in_hi = _mm256_setzero_pd()) { v8[0] = in_lo; v8[1] = in_hi; }
- SIMDINLINE Double& SIMDCALL operator=(__m512d in) { v = in; return *this; }
- SIMDINLINE Double& SIMDCALL operator=(Double const & in)
+ SIMDINLINE Double(SIMD256Impl::Double const& in_lo,
+ SIMD256Impl::Double const& in_hi = _mm256_setzero_pd())
+ {
+ v8[0] = in_lo;
+ v8[1] = in_hi;
+ }
+ SIMDINLINE Double& SIMDCALL operator=(__m512d in)
+ {
+ v = in;
+ return *this;
+ }
+ SIMDINLINE Double& SIMDCALL operator=(Double const& in)
{
#if SIMD_ARCH >= SIMD_ARCH_AVX512
v = in.v;
static const uint32_t SIMD_WIDTH = 16;
#undef SIMD_ALIGNMENT_BYTES
- } // ns SIMD512Impl
-} // ns SIMDImpl
+ } // namespace SIMD512Impl
+} // namespace SIMDImpl
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#include "common/os.h"
#include <stdarg.h>
{
enum class TextColor
{
- BLACK = 0,
+ BLACK = 0,
#if defined(_WIN32)
- RED = 4,
- GREEN = 2,
- BLUE = 1,
+ RED = 4,
+ GREEN = 2,
+ BLUE = 1,
#else
- RED = 1,
- GREEN = 2,
- BLUE = 4,
+ RED = 1,
+ GREEN = 2,
+ BLUE = 4,
#endif // _WIN32
- PURPLE = static_cast<uint32_t>(RED) | static_cast<uint32_t>(BLUE),
- CYAN = static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
- YELLOW = static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN),
- WHITE = static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
+ PURPLE = static_cast<uint32_t>(RED) | static_cast<uint32_t>(BLUE),
+ CYAN = static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
+ YELLOW = static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN),
+ WHITE =
+ static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
};
enum class TextStyle
{
- NORMAL = 0,
- INTENSITY = 1,
+ NORMAL = 0,
+ INTENSITY = 1,
};
- void SetTextColor(FILE* stream, TextColor color = TextColor::WHITE, TextStyle style = TextStyle::NORMAL)
+ void SetTextColor(FILE* stream,
+ TextColor color = TextColor::WHITE,
+ TextStyle style = TextStyle::NORMAL)
{
#if defined(_WIN32)
#else // !_WIN32
// Print ANSI codes
- uint32_t cc = 30 + ((style == TextStyle::INTENSITY) ? 60 : 0) + static_cast<uint32_t>(color);
+ uint32_t cc =
+ 30 + ((style == TextStyle::INTENSITY) ? 60 : 0) + static_cast<uint32_t>(color);
fprintf(stream, "\033[0m\033[%d;%dm", static_cast<uint32_t>(style), cc);
#endif
}
static std::mutex g_stderrMutex;
-} // ns ConsoleUtils
-
-bool SwrAssert(
- bool chkDebugger,
- bool& enabled,
- const char* pExpression,
- const char* pFileName,
- uint32_t lineNum,
- const char* pFunction,
- const char* pFmtString,
- ...)
+} // namespace ConsoleUtils
+
+bool SwrAssert(bool chkDebugger,
+ bool& enabled,
+ const char* pExpression,
+ const char* pFileName,
+ uint32_t lineNum,
+ const char* pFunction,
+ const char* pFmtString,
+ ...)
{
using namespace ConsoleUtils;
std::lock_guard<std::mutex> l(g_stderrMutex);
#if defined(_WIN32)
static const int MAX_MESSAGE_LEN = 2048;
- char msgBuf[MAX_MESSAGE_LEN];
+ char msgBuf[MAX_MESSAGE_LEN];
sprintf_s(msgBuf, "%s(%d): ASSERT: %s\n", pFileName, lineNum, pExpression);
msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
{
va_list args;
va_start(args, pFmtString);
- offset = _vsnprintf_s(
- msgBuf,
- sizeof(msgBuf),
- sizeof(msgBuf),
- pFmtString,
- args);
+ offset = _vsnprintf_s(msgBuf, sizeof(msgBuf), sizeof(msgBuf), pFmtString, args);
va_end(args);
- if (offset < 0) { return true; }
+ if (offset < 0)
+ {
+ return true;
+ }
OutputDebugStringA("\t");
OutputDebugStringA(msgBuf);
if (enabled && KNOB_ENABLE_ASSERT_DIALOGS)
{
- int retval = sprintf_s(
- &msgBuf[offset],
- MAX_MESSAGE_LEN - offset,
- "\n\n"
- "File: %s\n"
- "Line: %d\n"
- "\n"
- "Expression: %s\n\n"
- "Cancel: Disable this assert for the remainder of the process\n"
- "Try Again: Break into the debugger\n"
- "Continue: Continue execution (but leave assert enabled)",
- pFileName,
- lineNum,
- pExpression);
-
- if (retval < 0) { return true; }
+ int retval = sprintf_s(&msgBuf[offset],
+ MAX_MESSAGE_LEN - offset,
+ "\n\n"
+ "File: %s\n"
+ "Line: %d\n"
+ "\n"
+ "Expression: %s\n\n"
+ "Cancel: Disable this assert for the remainder of the process\n"
+ "Try Again: Break into the debugger\n"
+ "Continue: Continue execution (but leave assert enabled)",
+ pFileName,
+ lineNum,
+ pExpression);
+
+ if (retval < 0)
+ {
+ return true;
+ }
offset += retval;
if (!IsDebuggerPresent())
{
- sprintf_s(
- &msgBuf[offset],
- MAX_MESSAGE_LEN - offset,
- "\n\n*** NO DEBUGGER DETECTED ***\n\nPressing \"Try Again\" will cause a program crash!");
+ sprintf_s(&msgBuf[offset],
+ MAX_MESSAGE_LEN - offset,
+ "\n\n*** NO DEBUGGER DETECTED ***\n\nPressing \"Try Again\" will cause a "
+ "program crash!");
}
- retval = MessageBoxA(nullptr, msgBuf, "Assert Failed", MB_CANCELTRYCONTINUE | MB_ICONEXCLAMATION | MB_SETFOREGROUND);
+ retval = MessageBoxA(nullptr,
+ msgBuf,
+ "Assert Failed",
+ MB_CANCELTRYCONTINUE | MB_ICONEXCLAMATION | MB_SETFOREGROUND);
switch (retval)
{
- case IDCANCEL:
- enabled = false;
- return false;
+ case IDCANCEL:
+ enabled = false;
+ return false;
- case IDTRYAGAIN:
- return true;
+ case IDTRYAGAIN:
+ return true;
- case IDCONTINUE:
- return false;
+ case IDCONTINUE:
+ return false;
}
}
else
}
void SwrTrace(
- const char* pFileName,
- uint32_t lineNum,
- const char* pFunction,
- const char* pFmtString,
- ...)
+ const char* pFileName, uint32_t lineNum, const char* pFunction, const char* pFmtString, ...)
{
using namespace ConsoleUtils;
std::lock_guard<std::mutex> l(g_stderrMutex);
#if defined(_WIN32)
static const int MAX_MESSAGE_LEN = 2048;
- char msgBuf[MAX_MESSAGE_LEN];
+ char msgBuf[MAX_MESSAGE_LEN];
sprintf_s(msgBuf, "%s(%d): TRACE in %s\n", pFileName, lineNum, pFunction);
msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
{
va_list args;
va_start(args, pFmtString);
- offset = _vsnprintf_s(
- msgBuf,
- sizeof(msgBuf),
- sizeof(msgBuf),
- pFmtString,
- args);
+ offset = _vsnprintf_s(msgBuf, sizeof(msgBuf), sizeof(msgBuf), pFmtString, args);
va_end(args);
- if (offset < 0) { return; }
+ if (offset < 0)
+ {
+ return;
+ }
OutputDebugStringA("\t");
OutputDebugStringA(msgBuf);
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#ifndef __SWR_ASSERT_H__
#define __SWR_ASSERT_H__
// Stupid preprocessor tricks to avoid -Wall / -W4 warnings
#if defined(_MSC_VER)
-#define _SWR_WARN_DISABLE __pragma(warning(push)) __pragma(warning(disable:4127))
+#define _SWR_WARN_DISABLE __pragma(warning(push)) __pragma(warning(disable : 4127))
#define _SWR_WARN_RESTORE __pragma(warning(pop))
#else // ! MSVC compiler
#define _SWR_WARN_DISABLE
#define _SWR_WARN_RESTORE
#endif
-#define _SWR_MACRO_START do {
-#define _SWR_MACRO_END \
- _SWR_WARN_DISABLE \
- } while(0) \
+#define _SWR_MACRO_START \
+ do \
+ {
+#define _SWR_MACRO_END \
+ _SWR_WARN_DISABLE \
+ } \
+ while (0) \
_SWR_WARN_RESTORE
-
#if defined(_WIN32)
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START __assume(e); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...) \
+ _SWR_MACRO_START __assume(e); \
+ _SWR_MACRO_END
#elif defined(__clang__)
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START __builtin_assume(e); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...) \
+ _SWR_MACRO_START __builtin_assume(e); \
+ _SWR_MACRO_END
#elif defined(__GNUC__)
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START ((e) ? ((void)0) : __builtin_unreachable()); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...) \
+ _SWR_MACRO_START((e) ? ((void)0) : __builtin_unreachable()); \
+ _SWR_MACRO_END
#else
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START ASSUME(e); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...) \
+ _SWR_MACRO_START ASSUME(e); \
+ _SWR_MACRO_END
#endif
#if !defined(SWR_ENABLE_ASSERTS)
#else
-bool SwrAssert(
- bool chkDebugger,
- bool& enabled,
- const char* pExpression,
- const char* pFileName,
- uint32_t lineNum,
- const char* function,
- const char* pFmtString = nullptr,
- ...);
+bool SwrAssert(bool chkDebugger,
+ bool& enabled,
+ const char* pExpression,
+ const char* pFileName,
+ uint32_t lineNum,
+ const char* function,
+ const char* pFmtString = nullptr,
+ ...);
void SwrTrace(
- const char* pFileName,
- uint32_t lineNum,
- const char* function,
- const char* pFmtString,
- ...);
-
-#define _SWR_ASSERT(chkDebugger, e, ...) \
- _SWR_MACRO_START \
- bool expFailed = !(e);\
- if (expFailed) {\
- static bool swrAssertEnabled = true;\
- expFailed = SwrAssert(chkDebugger, swrAssertEnabled, #e, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__);\
- if (expFailed) { DEBUGBREAK; }\
- }\
+ const char* pFileName, uint32_t lineNum, const char* function, const char* pFmtString, ...);
+
+#define _SWR_ASSERT(chkDebugger, e, ...) \
+ _SWR_MACRO_START \
+ bool expFailed = !(e); \
+ if (expFailed) \
+ { \
+ static bool swrAssertEnabled = true; \
+ expFailed = SwrAssert( \
+ chkDebugger, swrAssertEnabled, #e, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__); \
+ if (expFailed) \
+ { \
+ DEBUGBREAK; \
+ } \
+ } \
_SWR_MACRO_END
-#define _SWR_INVALID(chkDebugger, ...) \
- _SWR_MACRO_START \
- static bool swrAssertEnabled = true;\
- bool expFailed = SwrAssert(chkDebugger, swrAssertEnabled, "", __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__);\
- if (expFailed) { DEBUGBREAK; }\
+#define _SWR_INVALID(chkDebugger, ...) \
+ _SWR_MACRO_START \
+ static bool swrAssertEnabled = true; \
+ bool expFailed = SwrAssert( \
+ chkDebugger, swrAssertEnabled, "", __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__); \
+ if (expFailed) \
+ { \
+ DEBUGBREAK; \
+ } \
_SWR_MACRO_END
-#define _SWR_TRACE(_fmtstr, ...) \
- SwrTrace(__FILE__, __LINE__, __FUNCTION__, _fmtstr, ##__VA_ARGS__);
+#define _SWR_TRACE(_fmtstr, ...) SwrTrace(__FILE__, __LINE__, __FUNCTION__, _fmtstr, ##__VA_ARGS__);
#if SWR_ENABLE_ASSERTS
-#define SWR_ASSERT(e, ...) _SWR_ASSERT(true, e, ##__VA_ARGS__)
-#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSERT(e, ##__VA_ARGS__)
-#define SWR_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
+#define SWR_ASSERT(e, ...) _SWR_ASSERT(true, e, ##__VA_ARGS__)
+#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSERT(e, ##__VA_ARGS__)
+#define SWR_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
#if defined(assert)
#undef assert
#endif // SWR_ENABLE_ASSERTS
#if SWR_ENABLE_REL_ASSERTS
-#define SWR_REL_ASSERT(e, ...) _SWR_ASSERT(false, e, ##__VA_ARGS__)
-#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_REL_ASSERT(e, ##__VA_ARGS__)
-#define SWR_REL_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
+#define SWR_REL_ASSERT(e, ...) _SWR_ASSERT(false, e, ##__VA_ARGS__)
+#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_REL_ASSERT(e, ##__VA_ARGS__)
+#define SWR_REL_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
// SWR_INVALID is always enabled
// Funky handling to allow 0 arguments with g++/gcc
// This is needed because you can't "swallow commas" with ##_VA_ARGS__ unless
// there is a first argument to the macro. So having a macro that can optionally
// accept 0 arguments is tricky.
-#define _SWR_INVALID_0() _SWR_INVALID(false)
-#define _SWR_INVALID_1(...) _SWR_INVALID(false, ##__VA_ARGS__)
+#define _SWR_INVALID_0() _SWR_INVALID(false)
+#define _SWR_INVALID_1(...) _SWR_INVALID(false, ##__VA_ARGS__)
#define _SWR_INVALID_VARGS_(_10, _9, _8, _7, _6, _5, _4, _3, _2, _1, N, ...) N
-#define _SWR_INVALID_VARGS(...) _SWR_INVALID_VARGS_(__VA_ARGS__, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
-#define _SWR_INVALID_VARGS_0() 1, 2, 3, 4, 5, 6, 7, 9, 9, 10
-#define _SWR_INVALID_CONCAT_(a, b) a##b
-#define _SWR_INVALID_CONCAT(a, b) _SWR_INVALID_CONCAT_(a, b)
-#define SWR_INVALID(...) \
- _SWR_INVALID_CONCAT(_SWR_INVALID_,_SWR_INVALID_VARGS(_SWR_INVALID_VARGS_0 __VA_ARGS__ ()))(__VA_ARGS__)
+#define _SWR_INVALID_VARGS(...) _SWR_INVALID_VARGS_(__VA_ARGS__, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+#define _SWR_INVALID_VARGS_0() 1, 2, 3, 4, 5, 6, 7, 9, 9, 10
+#define _SWR_INVALID_CONCAT_(a, b) a##b
+#define _SWR_INVALID_CONCAT(a, b) _SWR_INVALID_CONCAT_(a, b)
+#define SWR_INVALID(...) \
+ _SWR_INVALID_CONCAT(_SWR_INVALID_, _SWR_INVALID_VARGS(_SWR_INVALID_VARGS_0 __VA_ARGS__())) \
+ (__VA_ARGS__)
#endif
#endif // C++
#endif // SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
// Needed to allow passing bitfield members to sizeof() in disabled asserts
-template<typename T>
-static bool SwrSizeofWorkaround(T) {return false;}
+template <typename T>
+static bool SwrSizeofWorkaround(T)
+{
+ return false;
+}
#if !SWR_ENABLE_ASSERTS
-#define SWR_ASSERT(e, ...) _SWR_MACRO_START (void)sizeof(SwrSizeofWorkaround(e)); _SWR_MACRO_END
-#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
-#define SWR_TRACE(_fmtstr, ...) _SWR_MACRO_START (void)(0); _SWR_MACRO_END
+#define SWR_ASSERT(e, ...) \
+ _SWR_MACRO_START(void) sizeof(SwrSizeofWorkaround(e)); \
+ _SWR_MACRO_END
+#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
+#define SWR_TRACE(_fmtstr, ...) \
+ _SWR_MACRO_START(void)(0); \
+ _SWR_MACRO_END
#endif
#if !SWR_ENABLE_REL_ASSERTS
-#define SWR_REL_ASSERT(e, ...) _SWR_MACRO_START (void)sizeof(SwrSizeofWorkaround(e)); _SWR_MACRO_END
-#define SWR_INVALID(...) _SWR_MACRO_START (void)(0); _SWR_MACRO_END
-#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
-#define SWR_REL_TRACE(_fmtstr, ...) _SWR_MACRO_START (void)(0); _SWR_MACRO_END
+#define SWR_REL_ASSERT(e, ...) \
+ _SWR_MACRO_START(void) sizeof(SwrSizeofWorkaround(e)); \
+ _SWR_MACRO_END
+#define SWR_INVALID(...) \
+ _SWR_MACRO_START(void)(0); \
+ _SWR_MACRO_END
+#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
+#define SWR_REL_TRACE(_fmtstr, ...) \
+ _SWR_MACRO_START(void)(0); \
+ _SWR_MACRO_END
#endif
#if defined(_MSC_VER)
#define SWR_NOT_IMPL SWR_INVALID("%s not implemented", SWR_FUNCTION_DECL)
-#endif//__SWR_ASSERT_H__
+#endif //__SWR_ASSERT_H__
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file api.cpp
-*
-* @brief API implementation
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file api.cpp
+ *
+ * @brief API implementation
+ *
+ ******************************************************************************/
#include <cfloat>
#include <cmath>
#include "common/os.h"
-static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
+static const SWR_RECT g_MaxScissorRect = {0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y};
-void SetupDefaultState(SWR_CONTEXT *pContext);
+void SetupDefaultState(SWR_CONTEXT* pContext);
static INLINE SWR_CONTEXT* GetContext(HANDLE hContext)
{
return (SWR_CONTEXT*)hContext;
}
-void WakeAllThreads(SWR_CONTEXT *pContext)
+void WakeAllThreads(SWR_CONTEXT* pContext)
{
pContext->FifosNotEmpty.notify_all();
}
//////////////////////////////////////////////////////////////////////////
/// @brief Create SWR Context.
/// @param pCreateInfo - pointer to creation info.
-HANDLE SwrCreateContext(
- SWR_CREATECONTEXT_INFO* pCreateInfo)
+HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
{
RDTSC_RESET();
RDTSC_INIT(0);
void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4);
memset(pContextMem, 0, sizeof(SWR_CONTEXT));
- SWR_CONTEXT *pContext = new (pContextMem) SWR_CONTEXT();
+ SWR_CONTEXT* pContext = new (pContextMem) SWR_CONTEXT();
pContext->privateStateSize = pCreateInfo->privateStateSize;
pContext->dcRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
pContext->dsRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
- pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
- pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
+ pContext->pMacroTileManagerArray =
+ (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
+ pContext->pDispatchQueueArray =
+ (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
{
}
else
{
- pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
- pContext->threadInfo.BASE_NUMA_NODE = KNOB_BASE_NUMA_NODE;
- pContext->threadInfo.BASE_CORE = KNOB_BASE_CORE;
- pContext->threadInfo.BASE_THREAD = KNOB_BASE_THREAD;
- pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
- pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
- pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
- pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
+ pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;
+ pContext->threadInfo.BASE_NUMA_NODE = KNOB_BASE_NUMA_NODE;
+ pContext->threadInfo.BASE_CORE = KNOB_BASE_CORE;
+ pContext->threadInfo.BASE_THREAD = KNOB_BASE_THREAD;
+ pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;
+ pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
+ pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;
+ pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;
}
if (pCreateInfo->pApiThreadInfo)
}
else
{
- pContext->apiThreadInfo.bindAPIThread0 = true;
- pContext->apiThreadInfo.numAPIReservedThreads = 1;
- pContext->apiThreadInfo.numAPIThreadsPerCore = 1;
+ pContext->apiThreadInfo.bindAPIThread0 = true;
+ pContext->apiThreadInfo.numAPIReservedThreads = 1;
+ pContext->apiThreadInfo.numAPIThreadsPerCore = 1;
}
if (pCreateInfo->pWorkerPrivateState)
}
pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads];
- pContext->pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
+ pContext->pStats =
+ (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
#if defined(KNOB_ENABLE_AR)
// Setup ArchRast thread contexts which includes +1 for API thread.
- pContext->pArContext = new HANDLE[pContext->NumWorkerThreads+1];
- pContext->pArContext[pContext->NumWorkerThreads] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
+ pContext->pArContext = new HANDLE[pContext->NumWorkerThreads + 1];
+ pContext->pArContext[pContext->NumWorkerThreads] =
+ ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
#endif
// Allocate scratch space for workers.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
#if defined(_WIN32)
- uint32_t numaNode = pContext->threadPool.pThreadData ?
- pContext->threadPool.pThreadData[i].numaId : 0;
- pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma(
- GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE),
- MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE,
- numaNode);
+ uint32_t numaNode =
+ pContext->threadPool.pThreadData ? pContext->threadPool.pThreadData[i].numaId : 0;
+ pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma(GetCurrentProcess(),
+ nullptr,
+ 32 * sizeof(KILOBYTE),
+ MEM_RESERVE | MEM_COMMIT,
+ PAGE_READWRITE,
+ numaNode);
#else
- pContext->ppScratch[i] = (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
+ pContext->ppScratch[i] =
+ (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
#endif
#if defined(KNOB_ENABLE_AR)
pContext->pHotTileMgr = new HotTileMgr();
// initialize callback functions
- pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
- pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
- pContext->pfnClearTile = pCreateInfo->pfnClearTile;
+ pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
+ pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
+ pContext->pfnClearTile = pCreateInfo->pfnClearTile;
pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
- pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
- pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
-
+ pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
+ pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
+
// pass pointer to bucket manager back to caller
#ifdef KNOB_ENABLE_RDTSC
memcpy(&dst.state, &src.state, sizeof(API_STATE));
}
-template<bool IsDraw>
-void QueueWork(SWR_CONTEXT *pContext)
+template <bool IsDraw>
+void QueueWork(SWR_CONTEXT* pContext)
{
- DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
- uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
+ DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
+ uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
if (IsDraw)
{
if (IsDraw)
{
- uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
+ uint32_t curDraw[2] = {pContext->pCurDrawContext->drawId,
+ pContext->pCurDrawContext->drawId};
WorkOnFifoFE(pContext, 0, curDraw[0]);
WorkOnFifoBE(pContext, 0, curDraw[1], *pContext->pSingleThreadLockedTiles, 0, 0);
}
WorkOnCompute(pContext, 0, curDispatch);
}
- // Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
- while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {}
+ // Dequeue the work here, if not already done, since we're single threaded (i.e. no
+ // workers).
+ while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0)
+ {
+ }
// restore csr
_mm_setcsr(mxcsr);
RDTSC_END(APIDrawWakeAllThreads, 1);
}
- // Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
+ // Set current draw context to NULL so that next state call forces a new draw context to be
+ // created and populated.
pContext->pPrevDrawContext = pContext->pCurDrawContext;
- pContext->pCurDrawContext = nullptr;
+ pContext->pCurDrawContext = nullptr;
}
INLINE void QueueDraw(SWR_CONTEXT* pContext)
QueueWork<false>(pContext);
}
-DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
+DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false)
{
RDTSC_BEGIN(APIGetDrawContext, 0);
// If current draw context is null then need to obtain a new draw context to use from ring.
pContext->cachingArenaAllocator.FreeOldBlocks();
pContext->lastFrameChecked = pContext->frameCount;
- pContext->lastDrawChecked = curDraw;
+ pContext->lastDrawChecked = curDraw;
}
DRAW_CONTEXT* pCurDrawContext = &pContext->dcRing[dcIndex];
- pContext->pCurDrawContext = pCurDrawContext;
+ pContext->pCurDrawContext = pCurDrawContext;
// Assign next available entry in DS ring to this DC.
- uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT;
+ uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT;
pCurDrawContext->pState = &pContext->dsRing[dsIndex];
// Copy previous state to current state.
pCurDrawContext->pState->pPrivateState = nullptr;
- pContext->curStateId++; // Progress state ring index forward.
+ pContext->curStateId++; // Progress state ring index forward.
}
else
{
else
{
SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
- pContext->curStateId++; // Progress state ring index forward.
+ pContext->curStateId++; // Progress state ring index forward.
}
SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
// Reset dependency
- pCurDrawContext->dependent = false;
+ pCurDrawContext->dependent = false;
pCurDrawContext->dependentFE = false;
- pCurDrawContext->pContext = pContext;
+ pCurDrawContext->pContext = pContext;
pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
- pCurDrawContext->doneFE = false;
- pCurDrawContext->FeLock = 0;
- pCurDrawContext->threadsDone = 0;
+ pCurDrawContext->doneFE = false;
+ pCurDrawContext->FeLock = 0;
+ pCurDrawContext->threadsDone = 0;
pCurDrawContext->retireCallback.pfnCallbackFunc = nullptr;
pCurDrawContext->dynState.Reset(pContext->NumWorkerThreads);
return pContext->pCurDrawContext;
}
-API_STATE* GetDrawState(SWR_CONTEXT *pContext)
+API_STATE* GetDrawState(SWR_CONTEXT* pContext)
{
DRAW_CONTEXT* pDC = GetDrawContext(pContext);
SWR_ASSERT(pDC->pState != nullptr);
void SwrDestroyContext(HANDLE hContext)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- pDC->FeWork.type = SHUTDOWN;
+ pDC->FeWork.type = SHUTDOWN;
pDC->FeWork.pfnWork = ProcessShutdown;
- //enqueue
+ // enqueue
QueueDraw(pContext);
DestroyThreadPool(pContext, &pContext->threadPool);
void SwrBindApiThread(HANDLE hContext, uint32_t apiThreadId)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
BindApiThread(pContext, apiThreadId);
}
-void SWR_API SwrSaveState(
- HANDLE hContext,
- void* pOutputStateBlock,
- size_t memSize)
+void SWR_API SwrSaveState(HANDLE hContext, void* pOutputStateBlock, size_t memSize)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- auto pSrc = GetDrawState(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ auto pSrc = GetDrawState(pContext);
SWR_ASSERT(pOutputStateBlock && memSize >= sizeof(*pSrc));
memcpy(pOutputStateBlock, pSrc, sizeof(*pSrc));
}
-void SWR_API SwrRestoreState(
- HANDLE hContext,
- const void* pStateBlock,
- size_t memSize)
+void SWR_API SwrRestoreState(HANDLE hContext, const void* pStateBlock, size_t memSize)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- auto pDst = GetDrawState(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ auto pDst = GetDrawState(pContext);
SWR_ASSERT(pStateBlock && memSize >= sizeof(*pDst));
memcpy(pDst, pStateBlock, sizeof(*pDst));
}
-void SetupDefaultState(SWR_CONTEXT *pContext)
+void SetupDefaultState(SWR_CONTEXT* pContext)
{
API_STATE* pState = GetDrawState(pContext);
- pState->rastState.cullMode = SWR_CULLMODE_NONE;
+ pState->rastState.cullMode = SWR_CULLMODE_NONE;
pState->rastState.frontWinding = SWR_FRONTWINDING_CCW;
- pState->depthBoundsState.depthBoundsTestEnable = false;
+ pState->depthBoundsState.depthBoundsTestEnable = false;
pState->depthBoundsState.depthBoundsTestMinValue = 0.0f;
pState->depthBoundsState.depthBoundsTestMaxValue = 1.0f;
}
-void SWR_API SwrSync(
- HANDLE hContext,
- PFN_CALLBACK_FUNC pfnFunc,
- uint64_t userData,
- uint64_t userData2,
- uint64_t userData3)
+void SWR_API SwrSync(HANDLE hContext,
+ PFN_CALLBACK_FUNC pfnFunc,
+ uint64_t userData,
+ uint64_t userData2,
+ uint64_t userData3)
{
SWR_ASSERT(pfnFunc != nullptr);
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APISync, 0);
- pDC->FeWork.type = SYNC;
+ pDC->FeWork.type = SYNC;
pDC->FeWork.pfnWork = ProcessSync;
// Setup callback function
pDC->retireCallback.pfnCallbackFunc = pfnFunc;
- pDC->retireCallback.userData = userData;
- pDC->retireCallback.userData2 = userData2;
- pDC->retireCallback.userData3 = userData3;
+ pDC->retireCallback.userData = userData;
+ pDC->retireCallback.userData2 = userData2;
+ pDC->retireCallback.userData3 = userData3;
AR_API_EVENT(SwrSyncEvent(pDC->drawId));
- //enqueue
+ // enqueue
QueueDraw(pContext);
RDTSC_END(APISync, 1);
void SwrStallBE(HANDLE hContext)
{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
pDC->dependent = true;
}
void SwrWaitForIdle(HANDLE hContext)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
RDTSC_BEGIN(APIWaitForIdle, 0);
void SwrWaitForIdleFE(HANDLE hContext)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
RDTSC_BEGIN(APIWaitForIdle, 0);
RDTSC_END(APIWaitForIdle, 1);
}
-void SwrSetVertexBuffers(
- HANDLE hContext,
- uint32_t numBuffers,
- const SWR_VERTEX_BUFFER_STATE* pVertexBuffers)
+void SwrSetVertexBuffers(HANDLE hContext,
+ uint32_t numBuffers,
+ const SWR_VERTEX_BUFFER_STATE* pVertexBuffers)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
for (uint32_t i = 0; i < numBuffers; ++i)
{
- const SWR_VERTEX_BUFFER_STATE *pVB = &pVertexBuffers[i];
- pState->vertexBuffers[pVB->index] = *pVB;
+ const SWR_VERTEX_BUFFER_STATE* pVB = &pVertexBuffers[i];
+ pState->vertexBuffers[pVB->index] = *pVB;
}
}
-void SwrSetIndexBuffer(
- HANDLE hContext,
- const SWR_INDEX_BUFFER_STATE* pIndexBuffer)
+void SwrSetIndexBuffer(HANDLE hContext, const SWR_INDEX_BUFFER_STATE* pIndexBuffer)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->indexBuffer = *pIndexBuffer;
}
-void SwrSetFetchFunc(
- HANDLE hContext,
- PFN_FETCH_FUNC pfnFetchFunc)
+void SwrSetFetchFunc(HANDLE hContext, PFN_FETCH_FUNC pfnFetchFunc)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->pfnFetchFunc = pfnFetchFunc;
}
-void SwrSetSoFunc(
- HANDLE hContext,
- PFN_SO_FUNC pfnSoFunc,
- uint32_t streamIndex)
+void SwrSetSoFunc(HANDLE hContext, PFN_SO_FUNC pfnSoFunc, uint32_t streamIndex)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->pfnSoFunc[streamIndex] = pfnSoFunc;
}
-void SwrSetSoState(
- HANDLE hContext,
- SWR_STREAMOUT_STATE* pSoState)
+void SwrSetSoState(HANDLE hContext, SWR_STREAMOUT_STATE* pSoState)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->soState = *pSoState;
}
-void SwrSetSoBuffers(
- HANDLE hContext,
- SWR_STREAMOUT_BUFFER* pSoBuffer,
- uint32_t slot)
+void SwrSetSoBuffers(HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t slot)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->soBuffer[slot] = *pSoBuffer;
}
-void SwrSetVertexFunc(
- HANDLE hContext,
- PFN_VERTEX_FUNC pfnVertexFunc)
+void SwrSetVertexFunc(HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->pfnVertexFunc = pfnVertexFunc;
}
-void SwrSetFrontendState(
- HANDLE hContext,
- SWR_FRONTEND_STATE *pFEState)
+void SwrSetFrontendState(HANDLE hContext, SWR_FRONTEND_STATE* pFEState)
{
- API_STATE* pState = GetDrawState(GetContext(hContext));
+ API_STATE* pState = GetDrawState(GetContext(hContext));
pState->frontendState = *pFEState;
}
-void SwrSetGsState(
- HANDLE hContext,
- SWR_GS_STATE *pGSState)
+void SwrSetGsState(HANDLE hContext, SWR_GS_STATE* pGSState)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->gsState = *pGSState;
+ pState->gsState = *pGSState;
}
-void SwrSetGsFunc(
- HANDLE hContext,
- PFN_GS_FUNC pfnGsFunc)
+void SwrSetGsFunc(HANDLE hContext, PFN_GS_FUNC pfnGsFunc)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->pfnGsFunc = pfnGsFunc;
}
-void SwrSetCsFunc(
- HANDLE hContext,
- PFN_CS_FUNC pfnCsFunc,
- uint32_t totalThreadsInGroup,
- uint32_t totalSpillFillSize,
- uint32_t scratchSpaceSizePerInstance,
- uint32_t numInstances)
-{
- API_STATE* pState = GetDrawState(GetContext(hContext));
- pState->pfnCsFunc = pfnCsFunc;
- pState->totalThreadsInGroup = totalThreadsInGroup;
- pState->totalSpillFillSize = totalSpillFillSize;
- pState->scratchSpaceSize = scratchSpaceSizePerInstance;
+void SwrSetCsFunc(HANDLE hContext,
+ PFN_CS_FUNC pfnCsFunc,
+ uint32_t totalThreadsInGroup,
+ uint32_t totalSpillFillSize,
+ uint32_t scratchSpaceSizePerInstance,
+ uint32_t numInstances)
+{
+ API_STATE* pState = GetDrawState(GetContext(hContext));
+ pState->pfnCsFunc = pfnCsFunc;
+ pState->totalThreadsInGroup = totalThreadsInGroup;
+ pState->totalSpillFillSize = totalSpillFillSize;
+ pState->scratchSpaceSize = scratchSpaceSizePerInstance;
pState->scratchSpaceNumInstances = numInstances;
}
-void SwrSetTsState(
- HANDLE hContext,
- SWR_TS_STATE *pState)
+void SwrSetTsState(HANDLE hContext, SWR_TS_STATE* pState)
{
API_STATE* pApiState = GetDrawState(GetContext(hContext));
- pApiState->tsState = *pState;
+ pApiState->tsState = *pState;
}
-void SwrSetHsFunc(
- HANDLE hContext,
- PFN_HS_FUNC pfnFunc)
+void SwrSetHsFunc(HANDLE hContext, PFN_HS_FUNC pfnFunc)
{
API_STATE* pApiState = GetDrawState(GetContext(hContext));
pApiState->pfnHsFunc = pfnFunc;
}
-void SwrSetDsFunc(
- HANDLE hContext,
- PFN_DS_FUNC pfnFunc)
+void SwrSetDsFunc(HANDLE hContext, PFN_DS_FUNC pfnFunc)
{
API_STATE* pApiState = GetDrawState(GetContext(hContext));
pApiState->pfnDsFunc = pfnFunc;
}
-void SwrSetDepthStencilState(
- HANDLE hContext,
- SWR_DEPTH_STENCIL_STATE *pDSState)
+void SwrSetDepthStencilState(HANDLE hContext, SWR_DEPTH_STENCIL_STATE* pDSState)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->depthStencilState = *pDSState;
}
-void SwrSetBackendState(
- HANDLE hContext,
- SWR_BACKEND_STATE *pBEState)
+void SwrSetBackendState(HANDLE hContext, SWR_BACKEND_STATE* pBEState)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->backendState = *pBEState;
}
-void SwrSetDepthBoundsState(
- HANDLE hContext,
- SWR_DEPTH_BOUNDS_STATE *pDBState)
+void SwrSetDepthBoundsState(HANDLE hContext, SWR_DEPTH_BOUNDS_STATE* pDBState)
{
API_STATE* pState = GetDrawState(GetContext(hContext));
pState->depthBoundsState = *pDBState;
}
-void SwrSetPixelShaderState(
- HANDLE hContext,
- SWR_PS_STATE *pPSState)
+void SwrSetPixelShaderState(HANDLE hContext, SWR_PS_STATE* pPSState)
{
- API_STATE *pState = GetDrawState(GetContext(hContext));
- pState->psState = *pPSState;
+ API_STATE* pState = GetDrawState(GetContext(hContext));
+ pState->psState = *pPSState;
}
-void SwrSetBlendState(
- HANDLE hContext,
- SWR_BLEND_STATE *pBlendState)
+void SwrSetBlendState(HANDLE hContext, SWR_BLEND_STATE* pBlendState)
{
- API_STATE *pState = GetDrawState(GetContext(hContext));
+ API_STATE* pState = GetDrawState(GetContext(hContext));
memcpy(&pState->blendState, pBlendState, sizeof(SWR_BLEND_STATE));
}
-void SwrSetBlendFunc(
- HANDLE hContext,
- uint32_t renderTarget,
- PFN_BLEND_JIT_FUNC pfnBlendFunc)
+void SwrSetBlendFunc(HANDLE hContext, uint32_t renderTarget, PFN_BLEND_JIT_FUNC pfnBlendFunc)
{
SWR_ASSERT(renderTarget < SWR_NUM_RENDERTARGETS);
- API_STATE *pState = GetDrawState(GetContext(hContext));
+ API_STATE* pState = GetDrawState(GetContext(hContext));
pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
}
// update guardband multipliers for the viewport
-void updateGuardbands(API_STATE *pState)
+void updateGuardbands(API_STATE* pState)
{
uint32_t numGbs = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
- for(uint32_t i = 0; i < numGbs; ++i)
+ for (uint32_t i = 0; i < numGbs; ++i)
{
// guardband center is viewport center
- pState->gbState.left[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
- pState->gbState.right[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
- pState->gbState.top[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
+ pState->gbState.left[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
+ pState->gbState.right[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
+ pState->gbState.top[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
pState->gbState.bottom[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
}
}
-void SwrSetRastState(
- HANDLE hContext,
- const SWR_RASTSTATE *pRastState)
+void SwrSetRastState(HANDLE hContext, const SWR_RASTSTATE* pRastState)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- API_STATE* pState = GetDrawState(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ API_STATE* pState = GetDrawState(pContext);
memcpy(&pState->rastState, pRastState, sizeof(SWR_RASTSTATE));
}
-void SwrSetViewports(
- HANDLE hContext,
- uint32_t numViewports,
- const SWR_VIEWPORT* pViewports,
- const SWR_VIEWPORT_MATRICES* pMatrices)
+void SwrSetViewports(HANDLE hContext,
+ uint32_t numViewports,
+ const SWR_VIEWPORT* pViewports,
+ const SWR_VIEWPORT_MATRICES* pMatrices)
{
- SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS,
- "Invalid number of viewports.");
+ SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of viewports.");
- SWR_CONTEXT *pContext = GetContext(hContext);
- API_STATE* pState = GetDrawState(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ API_STATE* pState = GetDrawState(pContext);
memcpy(&pState->vp[0], pViewports, sizeof(SWR_VIEWPORT) * numViewports);
// @todo Faster to copy portions of the SOA or just copy all of it?
updateGuardbands(pState);
}
-void SwrSetScissorRects(
- HANDLE hContext,
- uint32_t numScissors,
- const SWR_RECT* pScissors)
+void SwrSetScissorRects(HANDLE hContext, uint32_t numScissors, const SWR_RECT* pScissors)
{
- SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS,
- "Invalid number of scissor rects.");
+ SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of scissor rects.");
API_STATE* pState = GetDrawState(GetContext(hContext));
memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0]));
};
-void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
+void SetupMacroTileScissors(DRAW_CONTEXT* pDC)
{
- API_STATE *pState = &pDC->pState->state;
- uint32_t numScissors = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
+ API_STATE* pState = &pDC->pState->state;
+ uint32_t numScissors =
+ pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
pState->scissorsTileAligned = true;
for (uint32_t index = 0; index < numScissors; ++index)
{
- SWR_RECT &scissorInFixedPoint = pState->scissorsInFixedPoint[index];
+ SWR_RECT& scissorInFixedPoint = pState->scissorsInFixedPoint[index];
// Set up scissor dimensions based on scissor or viewport
if (pState->rastState.scissorEnable)
}
else
{
- // the vp width and height must be added to origin un-rounded then the result round to -inf.
- // The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
+ // the vp width and height must be added to origin un-rounded then the result round to
+ // -inf. The cast to int works for rounding assuming all [left, right, top, bottom] are
+ // positive.
scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x;
scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width);
scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y;
// Test for tile alignment
bool tileAligned;
- tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
+ tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0;
tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0;
tileAligned &= (scissorInFixedPoint.ymax % KNOB_TILE_Y_DIM) == 0;
// templated backend function tables
-void SetupPipeline(DRAW_CONTEXT *pDC)
+void SetupPipeline(DRAW_CONTEXT* pDC)
{
- DRAW_STATE* pState = pDC->pState;
- const SWR_RASTSTATE &rastState = pState->state.rastState;
- const SWR_PS_STATE &psState = pState->state.psState;
- BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
+ DRAW_STATE* pState = pDC->pState;
+ const SWR_RASTSTATE& rastState = pState->state.rastState;
+ const SWR_PS_STATE& psState = pState->state.psState;
+ BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
// setup backend
if (psState.pfnPixelShader == nullptr)
else
{
const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
- const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
- const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
- const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesUAV)) ? 1 : 0;
+ const bool bMultisampleEnable =
+ ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
+ const uint32_t centroid =
+ ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
+ const uint32_t canEarlyZ =
+ (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesUAV)) ? 1 : 0;
SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
-
+
// select backend function
- switch(psState.shadingRate)
+ switch (psState.shadingRate)
{
case SWR_SHADING_RATE_PIXEL:
- if(bMultisampleEnable)
+ if (bMultisampleEnable)
{
// always need to generate I & J per sample for Z interpolation
- barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage]
- [centroid][forcedSampleCount][canEarlyZ]
+ barycentricsMask =
+ (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
+ backendFuncs.pfnBackend =
+ gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern]
+ [psState.inputCoverage][centroid][forcedSampleCount]
+ [canEarlyZ]
;
}
else
{
// always need to generate I & J per pixel for Z interpolation
- barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
- backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
+ barycentricsMask =
+ (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
+ backendFuncs.pfnBackend =
+ gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
}
break;
case SWR_SHADING_RATE_SAMPLE:
SWR_ASSERT(rastState.bIsCenterPattern != true);
// always need to generate I & J per sample for Z interpolation
- barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
- backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
+ barycentricsMask =
+ (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
+ backendFuncs.pfnBackend =
+ gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid]
+ [canEarlyZ];
break;
default:
SWR_ASSERT(0 && "Invalid shading rate");
{
case TOP_POINT_LIST:
pState->pfnProcessPrims = ClipPoints;
- pfnBinner = BinPoints;
+ pfnBinner = BinPoints;
#if USE_SIMD16_FRONTEND
pState->pfnProcessPrims_simd16 = ClipPoints_simd16;
- pfnBinner_simd16 = BinPoints_simd16;
+ pfnBinner_simd16 = BinPoints_simd16;
#endif
break;
case TOP_LINE_LIST:
case TOP_LINE_LIST_ADJ:
case TOP_LISTSTRIP_ADJ:
pState->pfnProcessPrims = ClipLines;
- pfnBinner = BinLines;
+ pfnBinner = BinLines;
#if USE_SIMD16_FRONTEND
pState->pfnProcessPrims_simd16 = ClipLines_simd16;
- pfnBinner_simd16 = BinLines_simd16;
+ pfnBinner_simd16 = BinLines_simd16;
#endif
break;
default:
pState->pfnProcessPrims = ClipTriangles;
- pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
+ pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
#if USE_SIMD16_FRONTEND
pState->pfnProcessPrims_simd16 = ClipTriangles_simd16;
pfnBinner_simd16 = GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0));
// set up the frontend attribute count
- pState->state.feNumAttributes = 0;
+ pState->state.feNumAttributes = 0;
const SWR_BACKEND_STATE& backendState = pState->state.backendState;
if (backendState.swizzleEnable)
{
// attribute swizzling is enabled, iterate over the map and record the max attribute used
for (uint32_t i = 0; i < backendState.numAttributes; ++i)
{
- pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
+ pState->state.feNumAttributes =
+ std::max(pState->state.feNumAttributes,
+ (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
}
}
else
DWORD maxAttrib;
if (_BitScanReverse64(&maxAttrib, streamMasks))
{
- pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
+ pState->state.feNumAttributes =
+ std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
}
}
// complicated logic to test for cases where we don't need backing hottile memory for a draw
- // have to check for the special case where depth/stencil test is enabled but depthwrite is disabled.
- pState->state.depthHottileEnable = ((!(pState->state.depthStencilState.depthTestEnable &&
- !pState->state.depthStencilState.depthWriteEnable &&
- !pState->state.depthBoundsState.depthBoundsTestEnable &&
- pState->state.depthStencilState.depthTestFunc == ZFUNC_ALWAYS)) &&
- (pState->state.depthStencilState.depthTestEnable ||
- pState->state.depthStencilState.depthWriteEnable ||
- pState->state.depthBoundsState.depthBoundsTestEnable)) ? true : false;
-
- pState->state.stencilHottileEnable = (((!(pState->state.depthStencilState.stencilTestEnable &&
- !pState->state.depthStencilState.stencilWriteEnable &&
- pState->state.depthStencilState.stencilTestFunc == ZFUNC_ALWAYS)) ||
- // for stencil we have to check the double sided state as well
- (!(pState->state.depthStencilState.doubleSidedStencilTestEnable &&
- !pState->state.depthStencilState.stencilWriteEnable &&
- pState->state.depthStencilState.backfaceStencilTestFunc == ZFUNC_ALWAYS))) &&
- (pState->state.depthStencilState.stencilTestEnable ||
- pState->state.depthStencilState.stencilWriteEnable)) ? true : false;
-
+ // have to check for the special case where depth/stencil test is enabled but depthwrite is
+ // disabled.
+ pState->state.depthHottileEnable =
+ ((!(pState->state.depthStencilState.depthTestEnable &&
+ !pState->state.depthStencilState.depthWriteEnable &&
+ !pState->state.depthBoundsState.depthBoundsTestEnable &&
+ pState->state.depthStencilState.depthTestFunc == ZFUNC_ALWAYS)) &&
+ (pState->state.depthStencilState.depthTestEnable ||
+ pState->state.depthStencilState.depthWriteEnable ||
+ pState->state.depthBoundsState.depthBoundsTestEnable))
+ ? true
+ : false;
+
+ pState->state.stencilHottileEnable =
+ (((!(pState->state.depthStencilState.stencilTestEnable &&
+ !pState->state.depthStencilState.stencilWriteEnable &&
+ pState->state.depthStencilState.stencilTestFunc == ZFUNC_ALWAYS)) ||
+ // for stencil we have to check the double sided state as well
+ (!(pState->state.depthStencilState.doubleSidedStencilTestEnable &&
+ !pState->state.depthStencilState.stencilWriteEnable &&
+ pState->state.depthStencilState.backfaceStencilTestFunc == ZFUNC_ALWAYS))) &&
+ (pState->state.depthStencilState.stencilTestEnable ||
+ pState->state.depthStencilState.stencilWriteEnable))
+ ? true
+ : false;
uint32_t hotTileEnable = pState->state.psState.renderTargetMask;
// Disable hottile for surfaces with no writes
if (psState.pfnPixelShader != nullptr)
{
- DWORD rt;
+ DWORD rt;
uint32_t rtMask = pState->state.psState.renderTargetMask;
while (_BitScanForward(&rt, rtMask))
{
pState->state.colorHottileEnable = hotTileEnable;
-
// Setup depth quantization function
if (pState->state.depthHottileEnable)
{
switch (pState->state.rastState.depthFormat)
{
- case R32_FLOAT_X8X24_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT_X8X24_TYPELESS > ; break;
- case R32_FLOAT: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; break;
- case R24_UNORM_X8_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R24_UNORM_X8_TYPELESS > ; break;
- case R16_UNORM: pState->state.pfnQuantizeDepth = QuantizeDepth < R16_UNORM > ; break;
- default: SWR_INVALID("Unsupported depth format for depth quantiztion.");
- pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+ case R32_FLOAT_X8X24_TYPELESS:
+ pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT_X8X24_TYPELESS>;
+ break;
+ case R32_FLOAT:
+ pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
+ break;
+ case R24_UNORM_X8_TYPELESS:
+ pState->state.pfnQuantizeDepth = QuantizeDepth<R24_UNORM_X8_TYPELESS>;
+ break;
+ case R16_UNORM:
+ pState->state.pfnQuantizeDepth = QuantizeDepth<R16_UNORM>;
+ break;
+ default:
+ SWR_INVALID("Unsupported depth format for depth quantiztion.");
+ pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
}
}
else
{
// set up pass-through quantize if depth isn't enabled
- pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+ pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
}
}
//////////////////////////////////////////////////////////////////////////
/// @brief InitDraw
/// @param pDC - Draw context to initialize for this draw.
-void InitDraw(
- DRAW_CONTEXT *pDC,
- bool isSplitDraw)
+void InitDraw(DRAW_CONTEXT* pDC, bool isSplitDraw)
{
// We don't need to re-setup the scissors/pipeline state again for split draw.
if (isSplitDraw == false)
SetupMacroTileScissors(pDC);
SetupPipeline(pDC);
}
-
}
/// @brief We can split the draw for certain topologies for better performance.
/// @param totalVerts - Total vertices for draw
/// @param topology - Topology used for draw
-uint32_t MaxVertsPerDraw(
- DRAW_CONTEXT* pDC,
- uint32_t totalVerts,
- PRIMITIVE_TOPOLOGY topology)
+uint32_t MaxVertsPerDraw(DRAW_CONTEXT* pDC, uint32_t totalVerts, PRIMITIVE_TOPOLOGY topology)
{
API_STATE& state = pDC->pState->state;
if (pDC->pState->state.tsState.tsEnable)
{
uint32_t vertsPerPrim = topology - TOP_PATCHLIST_BASE;
- vertsPerDraw = vertsPerPrim * KNOB_MAX_TESS_PRIMS_PER_DRAW;
+ vertsPerDraw = vertsPerPrim * KNOB_MAX_TESS_PRIMS_PER_DRAW;
}
break;
default:
return vertsPerDraw;
}
-
//////////////////////////////////////////////////////////////////////////
/// @brief DrawInstanced
/// @param hContext - Handle passed back from SwrCreateContext
/// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
/// @param startVertex - Specifies start vertex for draw. (vertex data)
/// @param numInstances - How many instances to render.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void DrawInstanced(
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numVertices,
- uint32_t startVertex,
- uint32_t numInstances = 1,
- uint32_t startInstance = 0)
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void DrawInstanced(HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numVertices,
+ uint32_t startVertex,
+ uint32_t numInstances = 1,
+ uint32_t startInstance = 0)
{
if (KNOB_TOSS_DRAW)
{
return;
}
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIDraw, pDC->drawId);
uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
- uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
- uint32_t remainingVerts = numVertices;
+ uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
+ uint32_t remainingVerts = numVertices;
- API_STATE *pState = &pDC->pState->state;
- pState->topology = topology;
+ API_STATE* pState = &pDC->pState->state;
+ pState->topology = topology;
pState->forceFront = false;
// disable culling for points/lines
if (topology == TOP_POINT_LIST)
{
pState->rastState.cullMode = SWR_CULLMODE_NONE;
- pState->forceFront = true;
+ pState->forceFront = true;
}
else if (topology == TOP_RECT_LIST)
{
int draw = 0;
while (remainingVerts)
{
- uint32_t numVertsForDraw = (remainingVerts < maxVertsPerDraw) ?
- remainingVerts : maxVertsPerDraw;
+ uint32_t numVertsForDraw =
+ (remainingVerts < maxVertsPerDraw) ? remainingVerts : maxVertsPerDraw;
- bool isSplitDraw = (draw > 0) ? true : false;
- DRAW_CONTEXT* pDC = GetDrawContext(pContext, isSplitDraw);
+ bool isSplitDraw = (draw > 0) ? true : false;
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext, isSplitDraw);
InitDraw(pDC, isSplitDraw);
- pDC->FeWork.type = DRAW;
- pDC->FeWork.pfnWork = GetProcessDrawFunc(
- false, // IsIndexed
- false, // bEnableCutIndex
- pState->tsState.tsEnable,
- pState->gsState.gsEnable,
- pState->soState.soEnable,
- pDC->pState->pfnProcessPrims != nullptr);
- pDC->FeWork.desc.draw.numVerts = numVertsForDraw;
- pDC->FeWork.desc.draw.startVertex = startVertex;
- pDC->FeWork.desc.draw.numInstances = numInstances;
+ pDC->FeWork.type = DRAW;
+ pDC->FeWork.pfnWork = GetProcessDrawFunc(false, // IsIndexed
+ false, // bEnableCutIndex
+ pState->tsState.tsEnable,
+ pState->gsState.gsEnable,
+ pState->soState.soEnable,
+ pDC->pState->pfnProcessPrims != nullptr);
+ pDC->FeWork.desc.draw.numVerts = numVertsForDraw;
+ pDC->FeWork.desc.draw.startVertex = startVertex;
+ pDC->FeWork.desc.draw.numInstances = numInstances;
pDC->FeWork.desc.draw.startInstance = startInstance;
- pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
+ pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
pDC->FeWork.desc.draw.startVertexID = draw * maxVertsPerDraw;
pDC->cleanupState = (remainingVerts == numVertsForDraw);
- //enqueue DC
+ // enqueue DC
QueueDraw(pContext);
- AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertsForDraw, startVertex, numInstances,
- startInstance, pState->tsState.tsEnable, pState->gsState.gsEnable, pState->soState.soEnable, pState->gsState.outputTopology, draw));
+ AR_API_EVENT(DrawInstancedEvent(pDC->drawId,
+ topology,
+ numVertsForDraw,
+ startVertex,
+ numInstances,
+ startInstance,
+ pState->tsState.tsEnable,
+ pState->gsState.gsEnable,
+ pState->soState.soEnable,
+ pState->gsState.outputTopology,
+ draw));
remainingVerts -= numVertsForDraw;
draw++;
}
// restore culling state
- pDC = GetDrawContext(pContext);
+ pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
RDTSC_END(APIDraw, numVertices * numInstances);
/// @param topology - Specifies topology for draw.
/// @param startVertex - Specifies start vertex in vertex buffer for draw.
/// @param primCount - Number of vertices.
-void SwrDraw(
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t startVertex,
- uint32_t numVertices)
+void SwrDraw(HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t startVertex,
+ uint32_t numVertices)
{
DrawInstanced(hContext, topology, numVertices, startVertex);
}
/// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
/// @param numInstances - How many instances to render.
/// @param startVertex - Specifies start vertex for draw. (vertex data)
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void SwrDrawInstanced(
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numVertsPerInstance,
- uint32_t numInstances,
- uint32_t startVertex,
- uint32_t startInstance
- )
-{
- DrawInstanced(hContext, topology, numVertsPerInstance, startVertex, numInstances, startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void SwrDrawInstanced(HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numVertsPerInstance,
+ uint32_t numInstances,
+ uint32_t startVertex,
+ uint32_t startInstance)
+{
+ DrawInstanced(
+ hContext, topology, numVertsPerInstance, startVertex, numInstances, startInstance);
}
//////////////////////////////////////////////////////////////////////////
/// @param indexOffset - Starting index into index buffer.
/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
/// @param numInstances - Number of instances to render.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void DrawIndexedInstance(
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t indexOffset,
- int32_t baseVertex,
- uint32_t numInstances = 1,
- uint32_t startInstance = 0)
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void DrawIndexedInstance(HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numIndices,
+ uint32_t indexOffset,
+ int32_t baseVertex,
+ uint32_t numInstances = 1,
+ uint32_t startInstance = 0)
{
if (KNOB_TOSS_DRAW)
{
return;
}
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- API_STATE* pState = &pDC->pState->state;
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ API_STATE* pState = &pDC->pState->state;
RDTSC_BEGIN(APIDrawIndexed, pDC->drawId);
uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
- uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
- uint32_t remainingIndices = numIndices;
+ uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
+ uint32_t remainingIndices = numIndices;
uint32_t indexSize = 0;
switch (pState->indexBuffer.format)
{
- case R32_UINT: indexSize = sizeof(uint32_t); break;
- case R16_UINT: indexSize = sizeof(uint16_t); break;
- case R8_UINT: indexSize = sizeof(uint8_t); break;
+ case R32_UINT:
+ indexSize = sizeof(uint32_t);
+ break;
+ case R16_UINT:
+ indexSize = sizeof(uint16_t);
+ break;
+ case R8_UINT:
+ indexSize = sizeof(uint8_t);
+ break;
default:
SWR_INVALID("Invalid index buffer format: %d", pState->indexBuffer.format);
}
- int draw = 0;
+ int draw = 0;
gfxptr_t xpIB = pState->indexBuffer.xpIndices;
xpIB += (uint64_t)indexOffset * (uint64_t)indexSize;
- pState->topology = topology;
+ pState->topology = topology;
pState->forceFront = false;
// disable culling for points/lines
if (topology == TOP_POINT_LIST)
{
pState->rastState.cullMode = SWR_CULLMODE_NONE;
- pState->forceFront = true;
+ pState->forceFront = true;
}
else if (topology == TOP_RECT_LIST)
{
while (remainingIndices)
{
- uint32_t numIndicesForDraw = (remainingIndices < maxIndicesPerDraw) ?
- remainingIndices : maxIndicesPerDraw;
+ uint32_t numIndicesForDraw =
+ (remainingIndices < maxIndicesPerDraw) ? remainingIndices : maxIndicesPerDraw;
// When breaking up draw, we need to obtain new draw context for each iteration.
bool isSplitDraw = (draw > 0) ? true : false;
pDC = GetDrawContext(pContext, isSplitDraw);
InitDraw(pDC, isSplitDraw);
- pDC->FeWork.type = DRAW;
- pDC->FeWork.pfnWork = GetProcessDrawFunc(
- true, // IsIndexed
- pState->frontendState.bEnableCutIndex,
- pState->tsState.tsEnable,
- pState->gsState.gsEnable,
- pState->soState.soEnable,
- pDC->pState->pfnProcessPrims != nullptr);
- pDC->FeWork.desc.draw.pDC = pDC;
+ pDC->FeWork.type = DRAW;
+ pDC->FeWork.pfnWork = GetProcessDrawFunc(true, // IsIndexed
+ pState->frontendState.bEnableCutIndex,
+ pState->tsState.tsEnable,
+ pState->gsState.gsEnable,
+ pState->soState.soEnable,
+ pDC->pState->pfnProcessPrims != nullptr);
+ pDC->FeWork.desc.draw.pDC = pDC;
pDC->FeWork.desc.draw.numIndices = numIndicesForDraw;
- pDC->FeWork.desc.draw.xpIB = xpIB;
- pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
+ pDC->FeWork.desc.draw.xpIB = xpIB;
+ pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
- pDC->FeWork.desc.draw.numInstances = numInstances;
+ pDC->FeWork.desc.draw.numInstances = numInstances;
pDC->FeWork.desc.draw.startInstance = startInstance;
- pDC->FeWork.desc.draw.baseVertex = baseVertex;
- pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
+ pDC->FeWork.desc.draw.baseVertex = baseVertex;
+ pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
pDC->cleanupState = (remainingIndices == numIndicesForDraw);
- //enqueue DC
+ // enqueue DC
QueueDraw(pContext);
- AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndicesForDraw, indexOffset, baseVertex,
- numInstances, startInstance, pState->tsState.tsEnable, pState->gsState.gsEnable, pState->soState.soEnable, pState->gsState.outputTopology, draw));
+ AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId,
+ topology,
+ numIndicesForDraw,
+ indexOffset,
+ baseVertex,
+ numInstances,
+ startInstance,
+ pState->tsState.tsEnable,
+ pState->gsState.gsEnable,
+ pState->soState.soEnable,
+ pState->gsState.outputTopology,
+ draw));
xpIB += maxIndicesPerDraw * indexSize;
remainingIndices -= numIndicesForDraw;
}
// Restore culling state
- pDC = GetDrawContext(pContext);
+ pDC = GetDrawContext(pContext);
pDC->pState->state.rastState.cullMode = oldCullMode;
-
+
RDTSC_END(APIDrawIndexed, numIndices * numInstances);
}
-
//////////////////////////////////////////////////////////////////////////
/// @brief DrawIndexed
/// @param hContext - Handle passed back from SwrCreateContext
/// @param numIndices - Number of indices to read sequentially from index buffer.
/// @param indexOffset - Starting index into index buffer.
/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-void SwrDrawIndexed(
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t indexOffset,
- int32_t baseVertex
- )
+void SwrDrawIndexed(HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numIndices,
+ uint32_t indexOffset,
+ int32_t baseVertex)
{
DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex);
}
/// @param numInstances - Number of instances to render.
/// @param indexOffset - Starting index into index buffer.
/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void SwrDrawIndexedInstanced(
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t numInstances,
- uint32_t indexOffset,
- int32_t baseVertex,
- uint32_t startInstance)
-{
- DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void SwrDrawIndexedInstanced(HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numIndices,
+ uint32_t numInstances,
+ uint32_t indexOffset,
+ int32_t baseVertex,
+ uint32_t startInstance)
+{
+ DrawIndexedInstance(
+ hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance);
}
//////////////////////////////////////////////////////////////////////////
/// @brief SwrInvalidateTiles
/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
+/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
+/// invalidate.
/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
/// be hottile size-aligned.
-void SWR_API SwrInvalidateTiles(
- HANDLE hContext,
- uint32_t attachmentMask,
- const SWR_RECT& invalidateRect)
+void SWR_API SwrInvalidateTiles(HANDLE hContext,
+ uint32_t attachmentMask,
+ const SWR_RECT& invalidateRect)
{
if (KNOB_TOSS_DRAW)
{
return;
}
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- pDC->FeWork.type = DISCARDINVALIDATETILES;
- pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
+ pDC->FeWork.type = DISCARDINVALIDATETILES;
+ pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
- pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
+ pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
- pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
+ pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
- pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
+ pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
- //enqueue
+ // enqueue
QueueDraw(pContext);
AR_API_EVENT(SwrInvalidateTilesEvent(pDC->drawId));
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
/// discarded.
-void SWR_API SwrDiscardRect(
- HANDLE hContext,
- uint32_t attachmentMask,
- const SWR_RECT& rect)
+void SWR_API SwrDiscardRect(HANDLE hContext, uint32_t attachmentMask, const SWR_RECT& rect)
{
if (KNOB_TOSS_DRAW)
{
return;
}
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
// Queue a load to the hottile
- pDC->FeWork.type = DISCARDINVALIDATETILES;
- pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
+ pDC->FeWork.type = DISCARDINVALIDATETILES;
+ pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
- pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
+ pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
- pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
+ pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
- pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
+ pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
- //enqueue
+ // enqueue
QueueDraw(pContext);
AR_API_EVENT(SwrDiscardRectEvent(pDC->drawId));
/// @param threadGroupCountX - Number of thread groups dispatched in X direction
/// @param threadGroupCountY - Number of thread groups dispatched in Y direction
/// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
-void SwrDispatch(
- HANDLE hContext,
- uint32_t threadGroupCountX,
- uint32_t threadGroupCountY,
- uint32_t threadGroupCountZ)
+void SwrDispatch(HANDLE hContext,
+ uint32_t threadGroupCountX,
+ uint32_t threadGroupCountY,
+ uint32_t threadGroupCountZ)
{
if (KNOB_TOSS_DRAW)
{
return;
}
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIDispatch, pDC->drawId);
- AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
- pDC->isCompute = true; // This is a compute context.
+ AR_API_EVENT(
+ DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
+ pDC->isCompute = true; // This is a compute context.
COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
pTaskData->threadGroupCountZ = threadGroupCountZ;
uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
- uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
- pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
+ uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
+ pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);
QueueDispatch(pContext);
// Deswizzles, converts and stores current contents of the hot tiles to surface
// described by pState
-void SWR_API SwrStoreTiles(
- HANDLE hContext,
- uint32_t attachmentMask,
- SWR_TILE_STATE postStoreTileState,
- const SWR_RECT& storeRect)
+void SWR_API SwrStoreTiles(HANDLE hContext,
+ uint32_t attachmentMask,
+ SWR_TILE_STATE postStoreTileState,
+ const SWR_RECT& storeRect)
{
if (KNOB_TOSS_DRAW)
{
return;
}
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIStoreTiles, pDC->drawId);
- pDC->FeWork.type = STORETILES;
- pDC->FeWork.pfnWork = ProcessStoreTiles;
- pDC->FeWork.desc.storeTiles.attachmentMask = attachmentMask;
+ pDC->FeWork.type = STORETILES;
+ pDC->FeWork.pfnWork = ProcessStoreTiles;
+ pDC->FeWork.desc.storeTiles.attachmentMask = attachmentMask;
pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState;
- pDC->FeWork.desc.storeTiles.rect = storeRect;
+ pDC->FeWork.desc.storeTiles.rect = storeRect;
pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect;
- //enqueue
+ // enqueue
QueueDraw(pContext);
AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId));
/// @param z - depth value use for clearing depth buffer
/// @param stencil - stencil value used for clearing stencil buffer
/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
-void SWR_API SwrClearRenderTarget(
- HANDLE hContext,
- uint32_t attachmentMask,
- uint32_t renderTargetArrayIndex,
- const float clearColor[4],
- float z,
- uint8_t stencil,
- const SWR_RECT& clearRect)
+void SWR_API SwrClearRenderTarget(HANDLE hContext,
+ uint32_t attachmentMask,
+ uint32_t renderTargetArrayIndex,
+ const float clearColor[4],
+ float z,
+ uint8_t stencil,
+ const SWR_RECT& clearRect)
{
if (KNOB_TOSS_DRAW)
{
return;
}
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId);
- pDC->FeWork.type = CLEAR;
- pDC->FeWork.pfnWork = ProcessClear;
+ pDC->FeWork.type = CLEAR;
+ pDC->FeWork.pfnWork = ProcessClear;
pDC->FeWork.desc.clear.rect = clearRect;
pDC->FeWork.desc.clear.rect &= g_MaxScissorRect;
- pDC->FeWork.desc.clear.attachmentMask = attachmentMask;
+ pDC->FeWork.desc.clear.attachmentMask = attachmentMask;
pDC->FeWork.desc.clear.renderTargetArrayIndex = renderTargetArrayIndex;
- pDC->FeWork.desc.clear.clearDepth = z;
- pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];
- pDC->FeWork.desc.clear.clearRTColor[1] = clearColor[1];
- pDC->FeWork.desc.clear.clearRTColor[2] = clearColor[2];
- pDC->FeWork.desc.clear.clearRTColor[3] = clearColor[3];
- pDC->FeWork.desc.clear.clearStencil = stencil;
+ pDC->FeWork.desc.clear.clearDepth = z;
+ pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];
+ pDC->FeWork.desc.clear.clearRTColor[1] = clearColor[1];
+ pDC->FeWork.desc.clear.clearRTColor[2] = clearColor[2];
+ pDC->FeWork.desc.clear.clearRTColor[3] = clearColor[3];
+ pDC->FeWork.desc.clear.clearStencil = stencil;
// enqueue draw
QueueDraw(pContext);
/// sampler.
/// SWR is responsible for the allocation of the private context state.
/// @param hContext - Handle passed back from SwrCreateContext
-VOID* SwrGetPrivateContextState(
- HANDLE hContext)
+VOID* SwrGetPrivateContextState(HANDLE hContext)
{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
- DRAW_STATE* pState = pDC->pState;
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ DRAW_STATE* pState = pDC->pState;
if (pState->pPrivateState == nullptr)
{
- pState->pPrivateState = pState->pArena->AllocAligned(pContext->privateStateSize, KNOB_SIMD_WIDTH*sizeof(float));
+ pState->pPrivateState = pState->pArena->AllocAligned(pContext->privateStateSize,
+ KNOB_SIMD_WIDTH * sizeof(float));
}
return pState->pPrivateState;
/// @param hContext - Handle passed back from SwrCreateContext
/// @param size - Size of allocation
/// @param align - Alignment needed for allocation.
-VOID* SwrAllocDrawContextMemory(
- HANDLE hContext,
- uint32_t size,
- uint32_t align)
+VOID* SwrAllocDrawContextMemory(HANDLE hContext, uint32_t size, uint32_t align)
{
- SWR_CONTEXT* pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
return pDC->pState->pArena->AllocAligned(size, align);
}
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
-void SwrEnableStatsFE(
- HANDLE hContext,
- bool enable)
+void SwrEnableStatsFE(HANDLE hContext, bool enable)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
pDC->pState->state.enableStatsFE = enable;
}
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
-void SwrEnableStatsBE(
- HANDLE hContext,
- bool enable)
+void SwrEnableStatsBE(HANDLE hContext, bool enable)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
pDC->pState->state.enableStatsBE = enable;
}
//////////////////////////////////////////////////////////////////////////
/// @brief Mark end of frame - used for performance profiling
/// @param hContext - Handle passed back from SwrCreateContext
-void SWR_API SwrEndFrame(
- HANDLE hContext)
+void SWR_API SwrEndFrame(HANDLE hContext)
{
- SWR_CONTEXT *pContext = GetContext(hContext);
- DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+ SWR_CONTEXT* pContext = GetContext(hContext);
+ DRAW_CONTEXT* pDC = GetDrawContext(pContext);
(void)pDC; // var used
RDTSC_ENDFRAME();
InitRasterizerFunctions();
}
-void SwrGetInterface(SWR_INTERFACE &out_funcs)
-{
- out_funcs.pfnSwrCreateContext = SwrCreateContext;
- out_funcs.pfnSwrDestroyContext = SwrDestroyContext;
- out_funcs.pfnSwrBindApiThread = SwrBindApiThread;
- out_funcs.pfnSwrSaveState = SwrSaveState;
- out_funcs.pfnSwrRestoreState = SwrRestoreState;
- out_funcs.pfnSwrSync = SwrSync;
- out_funcs.pfnSwrStallBE = SwrStallBE;
- out_funcs.pfnSwrWaitForIdle = SwrWaitForIdle;
- out_funcs.pfnSwrWaitForIdleFE = SwrWaitForIdleFE;
- out_funcs.pfnSwrSetVertexBuffers = SwrSetVertexBuffers;
- out_funcs.pfnSwrSetIndexBuffer = SwrSetIndexBuffer;
- out_funcs.pfnSwrSetFetchFunc = SwrSetFetchFunc;
- out_funcs.pfnSwrSetSoFunc = SwrSetSoFunc;
- out_funcs.pfnSwrSetSoState = SwrSetSoState;
- out_funcs.pfnSwrSetSoBuffers = SwrSetSoBuffers;
- out_funcs.pfnSwrSetVertexFunc = SwrSetVertexFunc;
- out_funcs.pfnSwrSetFrontendState = SwrSetFrontendState;
- out_funcs.pfnSwrSetGsState = SwrSetGsState;
- out_funcs.pfnSwrSetGsFunc = SwrSetGsFunc;
- out_funcs.pfnSwrSetCsFunc = SwrSetCsFunc;
- out_funcs.pfnSwrSetTsState = SwrSetTsState;
- out_funcs.pfnSwrSetHsFunc = SwrSetHsFunc;
- out_funcs.pfnSwrSetDsFunc = SwrSetDsFunc;
- out_funcs.pfnSwrSetDepthStencilState = SwrSetDepthStencilState;
- out_funcs.pfnSwrSetBackendState = SwrSetBackendState;
- out_funcs.pfnSwrSetDepthBoundsState = SwrSetDepthBoundsState;
- out_funcs.pfnSwrSetPixelShaderState = SwrSetPixelShaderState;
- out_funcs.pfnSwrSetBlendState = SwrSetBlendState;
- out_funcs.pfnSwrSetBlendFunc = SwrSetBlendFunc;
- out_funcs.pfnSwrDraw = SwrDraw;
- out_funcs.pfnSwrDrawInstanced = SwrDrawInstanced;
- out_funcs.pfnSwrDrawIndexed = SwrDrawIndexed;
- out_funcs.pfnSwrDrawIndexedInstanced = SwrDrawIndexedInstanced;
- out_funcs.pfnSwrInvalidateTiles = SwrInvalidateTiles;
- out_funcs.pfnSwrDiscardRect = SwrDiscardRect;
- out_funcs.pfnSwrDispatch = SwrDispatch;
- out_funcs.pfnSwrStoreTiles = SwrStoreTiles;
- out_funcs.pfnSwrClearRenderTarget = SwrClearRenderTarget;
- out_funcs.pfnSwrSetRastState = SwrSetRastState;
- out_funcs.pfnSwrSetViewports = SwrSetViewports;
- out_funcs.pfnSwrSetScissorRects = SwrSetScissorRects;
+void SwrGetInterface(SWR_INTERFACE& out_funcs)
+{
+ out_funcs.pfnSwrCreateContext = SwrCreateContext;
+ out_funcs.pfnSwrDestroyContext = SwrDestroyContext;
+ out_funcs.pfnSwrBindApiThread = SwrBindApiThread;
+ out_funcs.pfnSwrSaveState = SwrSaveState;
+ out_funcs.pfnSwrRestoreState = SwrRestoreState;
+ out_funcs.pfnSwrSync = SwrSync;
+ out_funcs.pfnSwrStallBE = SwrStallBE;
+ out_funcs.pfnSwrWaitForIdle = SwrWaitForIdle;
+ out_funcs.pfnSwrWaitForIdleFE = SwrWaitForIdleFE;
+ out_funcs.pfnSwrSetVertexBuffers = SwrSetVertexBuffers;
+ out_funcs.pfnSwrSetIndexBuffer = SwrSetIndexBuffer;
+ out_funcs.pfnSwrSetFetchFunc = SwrSetFetchFunc;
+ out_funcs.pfnSwrSetSoFunc = SwrSetSoFunc;
+ out_funcs.pfnSwrSetSoState = SwrSetSoState;
+ out_funcs.pfnSwrSetSoBuffers = SwrSetSoBuffers;
+ out_funcs.pfnSwrSetVertexFunc = SwrSetVertexFunc;
+ out_funcs.pfnSwrSetFrontendState = SwrSetFrontendState;
+ out_funcs.pfnSwrSetGsState = SwrSetGsState;
+ out_funcs.pfnSwrSetGsFunc = SwrSetGsFunc;
+ out_funcs.pfnSwrSetCsFunc = SwrSetCsFunc;
+ out_funcs.pfnSwrSetTsState = SwrSetTsState;
+ out_funcs.pfnSwrSetHsFunc = SwrSetHsFunc;
+ out_funcs.pfnSwrSetDsFunc = SwrSetDsFunc;
+ out_funcs.pfnSwrSetDepthStencilState = SwrSetDepthStencilState;
+ out_funcs.pfnSwrSetBackendState = SwrSetBackendState;
+ out_funcs.pfnSwrSetDepthBoundsState = SwrSetDepthBoundsState;
+ out_funcs.pfnSwrSetPixelShaderState = SwrSetPixelShaderState;
+ out_funcs.pfnSwrSetBlendState = SwrSetBlendState;
+ out_funcs.pfnSwrSetBlendFunc = SwrSetBlendFunc;
+ out_funcs.pfnSwrDraw = SwrDraw;
+ out_funcs.pfnSwrDrawInstanced = SwrDrawInstanced;
+ out_funcs.pfnSwrDrawIndexed = SwrDrawIndexed;
+ out_funcs.pfnSwrDrawIndexedInstanced = SwrDrawIndexedInstanced;
+ out_funcs.pfnSwrInvalidateTiles = SwrInvalidateTiles;
+ out_funcs.pfnSwrDiscardRect = SwrDiscardRect;
+ out_funcs.pfnSwrDispatch = SwrDispatch;
+ out_funcs.pfnSwrStoreTiles = SwrStoreTiles;
+ out_funcs.pfnSwrClearRenderTarget = SwrClearRenderTarget;
+ out_funcs.pfnSwrSetRastState = SwrSetRastState;
+ out_funcs.pfnSwrSetViewports = SwrSetViewports;
+ out_funcs.pfnSwrSetScissorRects = SwrSetScissorRects;
out_funcs.pfnSwrGetPrivateContextState = SwrGetPrivateContextState;
out_funcs.pfnSwrAllocDrawContextMemory = SwrAllocDrawContextMemory;
- out_funcs.pfnSwrEnableStatsFE = SwrEnableStatsFE;
- out_funcs.pfnSwrEnableStatsBE = SwrEnableStatsBE;
- out_funcs.pfnSwrEndFrame = SwrEndFrame;
- out_funcs.pfnSwrInit = SwrInit;
+ out_funcs.pfnSwrEnableStatsFE = SwrEnableStatsFE;
+ out_funcs.pfnSwrEnableStatsBE = SwrEnableStatsBE;
+ out_funcs.pfnSwrEndFrame = SwrEndFrame;
+ out_funcs.pfnSwrInit = SwrInit;
out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile;
out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface;
out_funcs.pfnSwrStoreHotTileClear = SwrStoreHotTileClear;
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file api.h
-*
-* @brief API definitions
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file api.h
+ *
+ * @brief API definitions
+ *
+ ******************************************************************************/
#ifndef __SWR_API_H__
#define __SWR_API_H__
#include "common/formats.h"
#include "core/state.h"
-typedef void(SWR_API *PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
+typedef void(SWR_API* PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
//////////////////////////////////////////////////////////////////////////
/// @brief Rectangle structure
int32_t xmin; ///< inclusive
int32_t ymin; ///< inclusive
int32_t xmax; ///< exclusive
- int32_t ymax; ///< exclusive
+ int32_t ymax; ///< exclusive
- bool operator == (const SWR_RECT& rhs)
+ bool operator==(const SWR_RECT& rhs)
{
- return (this->ymin == rhs.ymin &&
- this->ymax == rhs.ymax &&
- this->xmin == rhs.xmin &&
- this->xmax == rhs.xmax);
+ return (this->ymin == rhs.ymin && this->ymax == rhs.ymax && this->xmin == rhs.xmin &&
+ this->xmax == rhs.xmax);
}
- bool operator != (const SWR_RECT& rhs)
- {
- return !(*this == rhs);
- }
+ bool operator!=(const SWR_RECT& rhs) { return !(*this == rhs); }
SWR_RECT& Intersect(const SWR_RECT& other)
{
this->xmax = std::min(this->xmax, other.xmax);
this->ymax = std::min(this->ymax, other.ymax);
- if (xmax - xmin < 0 ||
- ymax - ymin < 0)
+ if (xmax - xmin < 0 || ymax - ymin < 0)
{
// Zero area
ymin = ymax = xmin = xmax = 0;
return *this;
}
- SWR_RECT& operator &= (const SWR_RECT& other)
- {
- return Intersect(other);
- }
+ SWR_RECT& operator&=(const SWR_RECT& other) { return Intersect(other); }
SWR_RECT& Union(const SWR_RECT& other)
{
return *this;
}
- SWR_RECT& operator |= (const SWR_RECT& other)
- {
- return Union(other);
- }
+ SWR_RECT& operator|=(const SWR_RECT& other) { return Union(other); }
void Translate(int32_t x, int32_t y)
{
/// @param x - destination x coordinate
/// @param y - destination y coordinate
/// @param pDstHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pDstHotTile);
+typedef void(SWR_API* PFN_LOAD_TILE)(HANDLE hPrivateContext,
+ HANDLE hWorkerPrivateData,
+ SWR_FORMAT dstFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ uint8_t* pDstHotTile);
//////////////////////////////////////////////////////////////////////////
/// @brief Function signature for store hot tiles
/// @param x - destination x coordinate
/// @param y - destination y coordinate
/// @param pSrcHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile);
+typedef void(SWR_API* PFN_STORE_TILE)(HANDLE hPrivateContext,
+ HANDLE hWorkerPrivateData,
+ SWR_FORMAT srcFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ uint8_t* pSrcHotTile);
//////////////////////////////////////////////////////////////////////////
/// @brief Function signature for clearing from the hot tiles clear value
/// @param y - destination y coordinate
/// @param renderTargetArrayIndex - render target array offset from arrayIndex
/// @param pClearColor - pointer to the hot tile's clear value
-typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
- SWR_RENDERTARGET_ATTACHMENT rtIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, const float* pClearColor);
+typedef void(SWR_API* PFN_CLEAR_TILE)(HANDLE hPrivateContext,
+ HANDLE hWorkerPrivateData,
+ SWR_RENDERTARGET_ATTACHMENT rtIndex,
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ const float* pClearColor);
//////////////////////////////////////////////////////////////////////////
/// @brief Callback to allow driver to update their copy of streamout write offset.
/// @param hPrivateContext - handle to private data
/// @param soBufferSlot - buffer slot for write offset
/// @param soWriteOffset - update value for so write offset.
-typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
- uint32_t soBufferSlot, uint32_t soWriteOffset);
+typedef void(SWR_API* PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
+ uint32_t soBufferSlot,
+ uint32_t soWriteOffset);
//////////////////////////////////////////////////////////////////////////
/// @brief Callback to allow driver to update their copy of stats.
/// @param hPrivateContext - handle to private data
/// @param pStats - pointer to draw stats
-typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
- const SWR_STATS* pStats);
+typedef void(SWR_API* PFN_UPDATE_STATS)(HANDLE hPrivateContext, const SWR_STATS* pStats);
//////////////////////////////////////////////////////////////////////////
/// @brief Callback to allow driver to update their copy of FE stats.
/// to sum up the stats across all of the workers.
/// @param hPrivateContext - handle to private data
/// @param pStats - pointer to draw stats
-typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext,
- const SWR_STATS_FE* pStats);
+typedef void(SWR_API* PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats);
//////////////////////////////////////////////////////////////////////////
/// BucketManager
/////////////////////////////////////////////////////////////////////////
struct SWR_THREADING_INFO
{
- uint32_t BASE_NUMA_NODE;
- uint32_t BASE_CORE;
- uint32_t BASE_THREAD;
- uint32_t MAX_WORKER_THREADS;
- uint32_t MAX_NUMA_NODES;
- uint32_t MAX_CORES_PER_NUMA_NODE;
- uint32_t MAX_THREADS_PER_CORE;
- bool SINGLE_THREADED;
+ uint32_t BASE_NUMA_NODE;
+ uint32_t BASE_CORE;
+ uint32_t BASE_THREAD;
+ uint32_t MAX_WORKER_THREADS;
+ uint32_t MAX_NUMA_NODES;
+ uint32_t MAX_CORES_PER_NUMA_NODE;
+ uint32_t MAX_THREADS_PER_CORE;
+ bool SINGLE_THREADED;
};
//////////////////////////////////////////////////////////////////////////
uint32_t bindAPIThread0; // Default is true if numAPIReservedThreads is > 0,
// binds thread used in SwrCreateContext to API Reserved
// thread 0
- uint32_t numAPIThreadsPerCore; // 0 - means use all threads per core, else clamp to this number.
- // Independent of KNOB_MAX_THREADS_PER_CORE.
+ uint32_t numAPIThreadsPerCore; // 0 - means use all threads per core, else clamp to this number.
+ // Independent of KNOB_MAX_THREADS_PER_CORE.
};
//////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
struct SWR_WORKER_PRIVATE_STATE
{
- typedef void (SWR_API *PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
+ typedef void(SWR_API* PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
- size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
- PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null
- ///< worker data will be initialized to 0.
- PFN_WORKER_DATA pfnFinishWorkerData; ///< Finish / destroy function for worker data.
- ///< Can be null.
+ size_t perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
+ PFN_WORKER_DATA pfnInitWorkerData; ///< Init function for worker data. If null
+ ///< worker data will be initialized to 0.
+ PFN_WORKER_DATA pfnFinishWorkerData; ///< Finish / destroy function for worker data.
+ ///< Can be null.
};
//////////////////////////////////////////////////////////////////////////
{
// External functions (e.g. sampler) need per draw context state.
// Use SwrGetPrivateContextState() to access private state.
- size_t privateStateSize;
+ size_t privateStateSize;
// Optional per-worker state, can be NULL for no worker-private data
- SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
+ SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
// Callback functions
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
- PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Pointer to rdtsc buckets mgr returned to the caller.
// Only populated when KNOB_ENABLE_RDTSC is set
- BucketManager* pBucketMgr;
+ BucketManager* pBucketMgr;
// Output: size required memory passed to for SwrSaveState / SwrRestoreState
- size_t contextSaveSize;
+ size_t contextSaveSize;
// ArchRast event manager.
- HANDLE hArEventManager;
+ HANDLE hArEventManager;
// Input (optional): Threading info that overrides any set KNOB values.
- SWR_THREADING_INFO* pThreadInfo;
+ SWR_THREADING_INFO* pThreadInfo;
// Input (optional): Info for reserving API threads
- SWR_API_THREADING_INFO* pApiThreadInfo;
+ SWR_API_THREADING_INFO* pApiThreadInfo;
// Input: if set to non-zero value, overrides KNOB value for maximum
// number of draws in flight
- uint32_t MAX_DRAWS_IN_FLIGHT;
+ uint32_t MAX_DRAWS_IN_FLIGHT;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Create SWR Context.
/// @param pCreateInfo - pointer to creation info.
-SWR_FUNC(HANDLE, SwrCreateContext,
- SWR_CREATECONTEXT_INFO* pCreateInfo);
+SWR_FUNC(HANDLE, SwrCreateContext, SWR_CREATECONTEXT_INFO* pCreateInfo);
//////////////////////////////////////////////////////////////////////////
/// @brief Destroys SWR Context.
/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrDestroyContext,
- HANDLE hContext);
+SWR_FUNC(void, SwrDestroyContext, HANDLE hContext);
//////////////////////////////////////////////////////////////////////////
/// @brief Bind current thread to an API reserved HW thread
/// @param hContext - Handle passed back from SwrCreateContext
/// @param apiThreadId - index of reserved HW thread to bind to.
-SWR_FUNC(void, SwrBindApiThread,
- HANDLE hContext,
- uint32_t apiThreadId);
+SWR_FUNC(void, SwrBindApiThread, HANDLE hContext, uint32_t apiThreadId);
//////////////////////////////////////////////////////////////////////////
/// @brief Saves API state associated with hContext
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pOutputStateBlock - Memory block to receive API state data
/// @param memSize - Size of memory pointed to by pOutputStateBlock
-SWR_FUNC(void, SwrSaveState,
- HANDLE hContext,
- void* pOutputStateBlock,
- size_t memSize);
+SWR_FUNC(void, SwrSaveState, HANDLE hContext, void* pOutputStateBlock, size_t memSize);
//////////////////////////////////////////////////////////////////////////
/// @brief Restores API state to hContext previously saved with SwrSaveState
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pStateBlock - Memory block to read API state data from
/// @param memSize - Size of memory pointed to by pStateBlock
-SWR_FUNC(void, SwrRestoreState,
- HANDLE hContext,
- const void* pStateBlock,
- size_t memSize);
+SWR_FUNC(void, SwrRestoreState, HANDLE hContext, const void* pStateBlock, size_t memSize);
//////////////////////////////////////////////////////////////////////////
/// @brief Sync cmd. Executes the callback func when all rendering up to this sync
/// has been completed
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pfnFunc - pointer to callback function,
-/// @param userData - user data to pass back
-SWR_FUNC(void, SwrSync,
- HANDLE hContext,
- PFN_CALLBACK_FUNC pfnFunc,
- uint64_t userData,
- uint64_t userData2,
- uint64_t userData3);
+/// @param userData - user data to pass back
+SWR_FUNC(void,
+ SwrSync,
+ HANDLE hContext,
+ PFN_CALLBACK_FUNC pfnFunc,
+ uint64_t userData,
+ uint64_t userData2,
+ uint64_t userData3);
//////////////////////////////////////////////////////////////////////////
/// @brief Stall cmd. Stalls the backend until all previous work has been completed.
/// Frontend work can continue to make progress
/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrStallBE,
- HANDLE hContext);
+SWR_FUNC(void, SwrStallBE, HANDLE hContext);
//////////////////////////////////////////////////////////////////////////
/// @brief Blocks until all rendering has been completed.
/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrWaitForIdle,
- HANDLE hContext);
+SWR_FUNC(void, SwrWaitForIdle, HANDLE hContext);
//////////////////////////////////////////////////////////////////////////
/// @brief Blocks until all FE rendering has been completed.
/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrWaitForIdleFE,
- HANDLE hContext);
+SWR_FUNC(void, SwrWaitForIdleFE, HANDLE hContext);
//////////////////////////////////////////////////////////////////////////
/// @brief Set vertex buffer state.
/// @param hContext - Handle passed back from SwrCreateContext
/// @param numBuffers - Number of vertex buffer state descriptors.
/// @param pVertexBuffers - Array of vertex buffer state descriptors.
-SWR_FUNC(void, SwrSetVertexBuffers,
- HANDLE hContext,
- uint32_t numBuffers,
- const SWR_VERTEX_BUFFER_STATE* pVertexBuffers);
+SWR_FUNC(void,
+ SwrSetVertexBuffers,
+ HANDLE hContext,
+ uint32_t numBuffers,
+ const SWR_VERTEX_BUFFER_STATE* pVertexBuffers);
//////////////////////////////////////////////////////////////////////////
/// @brief Set index buffer
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pIndexBuffer - Index buffer.
-SWR_FUNC(void, SwrSetIndexBuffer,
- HANDLE hContext,
- const SWR_INDEX_BUFFER_STATE* pIndexBuffer);
+SWR_FUNC(void, SwrSetIndexBuffer, HANDLE hContext, const SWR_INDEX_BUFFER_STATE* pIndexBuffer);
//////////////////////////////////////////////////////////////////////////
/// @brief Set fetch shader pointer.
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pfnFetchFunc - Pointer to shader.
-SWR_FUNC(void, SwrSetFetchFunc,
- HANDLE hContext,
- PFN_FETCH_FUNC pfnFetchFunc);
+SWR_FUNC(void, SwrSetFetchFunc, HANDLE hContext, PFN_FETCH_FUNC pfnFetchFunc);
//////////////////////////////////////////////////////////////////////////
/// @brief Set streamout shader pointer.
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pfnSoFunc - Pointer to shader.
/// @param streamIndex - specifies stream
-SWR_FUNC(void, SwrSetSoFunc,
- HANDLE hContext,
- PFN_SO_FUNC pfnSoFunc,
- uint32_t streamIndex);
+SWR_FUNC(void, SwrSetSoFunc, HANDLE hContext, PFN_SO_FUNC pfnSoFunc, uint32_t streamIndex);
//////////////////////////////////////////////////////////////////////////
/// @brief Set streamout state
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pSoState - Pointer to streamout state.
-SWR_FUNC(void, SwrSetSoState,
- HANDLE hContext,
- SWR_STREAMOUT_STATE* pSoState);
+SWR_FUNC(void, SwrSetSoState, HANDLE hContext, SWR_STREAMOUT_STATE* pSoState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set streamout buffer state
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pSoBuffer - Pointer to streamout buffer.
/// @param slot - Slot to bind SO buffer to.
-SWR_FUNC(void, SwrSetSoBuffers,
- HANDLE hContext,
- SWR_STREAMOUT_BUFFER* pSoBuffer,
- uint32_t slot);
+SWR_FUNC(void, SwrSetSoBuffers, HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t slot);
//////////////////////////////////////////////////////////////////////////
/// @brief Set vertex shader pointer.
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pfnVertexFunc - Pointer to shader.
-SWR_FUNC(void, SwrSetVertexFunc,
- HANDLE hContext,
- PFN_VERTEX_FUNC pfnVertexFunc);
+SWR_FUNC(void, SwrSetVertexFunc, HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc);
//////////////////////////////////////////////////////////////////////////
/// @brief Set frontend state.
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetFrontendState,
- HANDLE hContext,
- SWR_FRONTEND_STATE *pState);
+SWR_FUNC(void, SwrSetFrontendState, HANDLE hContext, SWR_FRONTEND_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set geometry shader state.
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetGsState,
- HANDLE hContext,
- SWR_GS_STATE *pState);
+SWR_FUNC(void, SwrSetGsState, HANDLE hContext, SWR_GS_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set geometry shader
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to geometry shader function
-SWR_FUNC(void, SwrSetGsFunc,
- HANDLE hContext,
- PFN_GS_FUNC pfnGsFunc);
+SWR_FUNC(void, SwrSetGsFunc, HANDLE hContext, PFN_GS_FUNC pfnGsFunc);
//////////////////////////////////////////////////////////////////////////
/// @brief Set compute shader
/// @param totalSpillFillSize - size in bytes needed for spill/fill.
/// @param scratchSpaceSizePerInstance - size of the scratch space needed per simd instance
/// @param numInstances - number of simd instances that are run per execution of the shader
-SWR_FUNC(void, SwrSetCsFunc,
- HANDLE hContext,
- PFN_CS_FUNC pfnCsFunc,
- uint32_t totalThreadsInGroup,
- uint32_t totalSpillFillSize,
- uint32_t scratchSpaceSizePerInstance,
- uint32_t numInstances
- );
+SWR_FUNC(void,
+ SwrSetCsFunc,
+ HANDLE hContext,
+ PFN_CS_FUNC pfnCsFunc,
+ uint32_t totalThreadsInGroup,
+ uint32_t totalSpillFillSize,
+ uint32_t scratchSpaceSizePerInstance,
+ uint32_t numInstances);
//////////////////////////////////////////////////////////////////////////
/// @brief Set tessellation state.
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetTsState,
- HANDLE hContext,
- SWR_TS_STATE *pState);
+SWR_FUNC(void, SwrSetTsState, HANDLE hContext, SWR_TS_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set hull shader
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pfnFunc - Pointer to shader function
-SWR_FUNC(void, SwrSetHsFunc,
- HANDLE hContext,
- PFN_HS_FUNC pfnFunc);
+SWR_FUNC(void, SwrSetHsFunc, HANDLE hContext, PFN_HS_FUNC pfnFunc);
//////////////////////////////////////////////////////////////////////////
/// @brief Set domain shader
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pfnFunc - Pointer to shader function
-SWR_FUNC(void, SwrSetDsFunc,
- HANDLE hContext,
- PFN_DS_FUNC pfnFunc);
+SWR_FUNC(void, SwrSetDsFunc, HANDLE hContext, PFN_DS_FUNC pfnFunc);
//////////////////////////////////////////////////////////////////////////
/// @brief Set depth stencil state
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetDepthStencilState,
- HANDLE hContext,
- SWR_DEPTH_STENCIL_STATE *pState);
+SWR_FUNC(void, SwrSetDepthStencilState, HANDLE hContext, SWR_DEPTH_STENCIL_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set backend state
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetBackendState,
- HANDLE hContext,
- SWR_BACKEND_STATE *pState);
+SWR_FUNC(void, SwrSetBackendState, HANDLE hContext, SWR_BACKEND_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set depth bounds state
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetDepthBoundsState,
- HANDLE hContext,
- SWR_DEPTH_BOUNDS_STATE *pState);
+SWR_FUNC(void, SwrSetDepthBoundsState, HANDLE hContext, SWR_DEPTH_BOUNDS_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set pixel shader state
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetPixelShaderState,
- HANDLE hContext,
- SWR_PS_STATE *pState);
+SWR_FUNC(void, SwrSetPixelShaderState, HANDLE hContext, SWR_PS_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set blend state
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetBlendState,
- HANDLE hContext,
- SWR_BLEND_STATE *pState);
+SWR_FUNC(void, SwrSetBlendState, HANDLE hContext, SWR_BLEND_STATE* pState);
//////////////////////////////////////////////////////////////////////////
/// @brief Set blend function
/// @param hContext - Handle passed back from SwrCreateContext
/// @param renderTarget - render target index
/// @param pfnBlendFunc - function pointer
-SWR_FUNC(void, SwrSetBlendFunc,
- HANDLE hContext,
- uint32_t renderTarget,
- PFN_BLEND_JIT_FUNC pfnBlendFunc);
+SWR_FUNC(
+ void, SwrSetBlendFunc, HANDLE hContext, uint32_t renderTarget, PFN_BLEND_JIT_FUNC pfnBlendFunc);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDraw
/// @param topology - Specifies topology for draw.
/// @param startVertex - Specifies start vertex in vertex buffer for draw.
/// @param primCount - Number of vertices.
-SWR_FUNC(void, SwrDraw,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t startVertex,
- uint32_t primCount);
+SWR_FUNC(void,
+ SwrDraw,
+ HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t startVertex,
+ uint32_t primCount);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDrawInstanced
/// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
/// @param numInstances - How many instances to render.
/// @param startVertex - Specifies start vertex for draw. (vertex data)
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-SWR_FUNC(void, SwrDrawInstanced,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numVertsPerInstance,
- uint32_t numInstances,
- uint32_t startVertex,
- uint32_t startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+SWR_FUNC(void,
+ SwrDrawInstanced,
+ HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numVertsPerInstance,
+ uint32_t numInstances,
+ uint32_t startVertex,
+ uint32_t startInstance);
//////////////////////////////////////////////////////////////////////////
/// @brief DrawIndexed
/// @param numIndices - Number of indices to read sequentially from index buffer.
/// @param indexOffset - Starting index into index buffer.
/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-SWR_FUNC(void, SwrDrawIndexed,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t indexOffset,
- int32_t baseVertex);
+SWR_FUNC(void,
+ SwrDrawIndexed,
+ HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numIndices,
+ uint32_t indexOffset,
+ int32_t baseVertex);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDrawIndexedInstanced
/// @param numInstances - Number of instances to render.
/// @param indexOffset - Starting index into index buffer.
/// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-SWR_FUNC(void, SwrDrawIndexedInstanced,
- HANDLE hContext,
- PRIMITIVE_TOPOLOGY topology,
- uint32_t numIndices,
- uint32_t numInstances,
- uint32_t indexOffset,
- int32_t baseVertex,
- uint32_t startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+SWR_FUNC(void,
+ SwrDrawIndexedInstanced,
+ HANDLE hContext,
+ PRIMITIVE_TOPOLOGY topology,
+ uint32_t numIndices,
+ uint32_t numInstances,
+ uint32_t indexOffset,
+ int32_t baseVertex,
+ uint32_t startInstance);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrInvalidateTiles
/// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
+/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
+/// invalidate.
/// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to
/// be hottile size-aligned.
-SWR_FUNC(void, SwrInvalidateTiles,
- HANDLE hContext,
- uint32_t attachmentMask,
- const SWR_RECT& invalidateRect);
+SWR_FUNC(void,
+ SwrInvalidateTiles,
+ HANDLE hContext,
+ uint32_t attachmentMask,
+ const SWR_RECT& invalidateRect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDiscardRect
/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
/// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be
/// discarded.
-SWR_FUNC(void, SwrDiscardRect,
- HANDLE hContext,
- uint32_t attachmentMask,
- const SWR_RECT& rect);
+SWR_FUNC(void, SwrDiscardRect, HANDLE hContext, uint32_t attachmentMask, const SWR_RECT& rect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrDispatch
/// @param threadGroupCountX - Number of thread groups dispatched in X direction
/// @param threadGroupCountY - Number of thread groups dispatched in Y direction
/// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
-SWR_FUNC(void, SwrDispatch,
- HANDLE hContext,
- uint32_t threadGroupCountX,
- uint32_t threadGroupCountY,
- uint32_t threadGroupCountZ);
-
+SWR_FUNC(void,
+ SwrDispatch,
+ HANDLE hContext,
+ uint32_t threadGroupCountX,
+ uint32_t threadGroupCountY,
+ uint32_t threadGroupCountZ);
enum SWR_TILE_STATE
{
- SWR_TILE_INVALID = 0, // tile is in unitialized state and should be loaded with surface contents before rendering
- SWR_TILE_DIRTY = 2, // tile contains newer data than surface it represents
- SWR_TILE_RESOLVED = 3, // is in sync with surface it represents
+ SWR_TILE_INVALID = 0, // tile is in unitialized state and should be loaded with surface contents
+ // before rendering
+ SWR_TILE_DIRTY = 2, // tile contains newer data than surface it represents
+ SWR_TILE_RESOLVED = 3, // is in sync with surface it represents
};
-/// @todo Add a good description for what attachments are and when and why you would use the different SWR_TILE_STATEs.
-SWR_FUNC(void, SwrStoreTiles,
- HANDLE hContext,
- uint32_t attachmentMask,
- SWR_TILE_STATE postStoreTileState,
- const SWR_RECT& storeRect);
-
+/// @todo Add a good description for what attachments are and when and why you would use the
+/// different SWR_TILE_STATEs.
+SWR_FUNC(void,
+ SwrStoreTiles,
+ HANDLE hContext,
+ uint32_t attachmentMask,
+ SWR_TILE_STATE postStoreTileState,
+ const SWR_RECT& storeRect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
/// @param z - depth value use for clearing depth buffer
/// @param stencil - stencil value used for clearing stencil buffer
/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
-SWR_FUNC(void, SwrClearRenderTarget,
- HANDLE hContext,
- uint32_t attachmentMask,
- uint32_t renderTargetArrayIndex,
- const float clearColor[4],
- float z,
- uint8_t stencil,
- const SWR_RECT& clearRect);
+SWR_FUNC(void,
+ SwrClearRenderTarget,
+ HANDLE hContext,
+ uint32_t attachmentMask,
+ uint32_t renderTargetArrayIndex,
+ const float clearColor[4],
+ float z,
+ uint8_t stencil,
+ const SWR_RECT& clearRect);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrSetRastState
/// @param hContext - Handle passed back from SwrCreateContext
/// @param pRastState - New SWR_RASTSTATE used for SwrDraw* commands
-SWR_FUNC(void, SwrSetRastState,
- HANDLE hContext,
- const SWR_RASTSTATE *pRastState);
+SWR_FUNC(void, SwrSetRastState, HANDLE hContext, const SWR_RASTSTATE* pRastState);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrSetViewports
/// @param numViewports - number of viewports passed in
/// @param pViewports - Specifies extents of viewport.
/// @param pMatrices - If not specified then SWR computes a default one.
-SWR_FUNC(void, SwrSetViewports,
- HANDLE hContext,
- uint32_t numViewports,
- const SWR_VIEWPORT* pViewports,
- const SWR_VIEWPORT_MATRICES* pMatrices);
+SWR_FUNC(void,
+ SwrSetViewports,
+ HANDLE hContext,
+ uint32_t numViewports,
+ const SWR_VIEWPORT* pViewports,
+ const SWR_VIEWPORT_MATRICES* pMatrices);
//////////////////////////////////////////////////////////////////////////
/// @brief SwrSetScissorRects
/// @param hContext - Handle passed back from SwrCreateContext
/// @param numScissors - number of scissors passed in
/// @param pScissors - array of scissors
-SWR_FUNC(void, SwrSetScissorRects,
- HANDLE hContext,
- uint32_t numScissors,
- const SWR_RECT* pScissors);
+SWR_FUNC(
+ void, SwrSetScissorRects, HANDLE hContext, uint32_t numScissors, const SWR_RECT* pScissors);
//////////////////////////////////////////////////////////////////////////
/// @brief Returns a pointer to the private context state for the current
/// @note Client needs to resend private state prior to each draw call.
/// Also, SWR is responsible for the private state memory.
/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void*, SwrGetPrivateContextState,
- HANDLE hContext);
+SWR_FUNC(void*, SwrGetPrivateContextState, HANDLE hContext);
//////////////////////////////////////////////////////////////////////////
/// @brief Clients can use this to allocate memory for draw/dispatch
/// @param hContext - Handle passed back from SwrCreateContext
/// @param size - Size of allocation
/// @param align - Alignment needed for allocation.
-SWR_FUNC(void*, SwrAllocDrawContextMemory,
- HANDLE hContext,
- uint32_t size,
- uint32_t align);
+SWR_FUNC(void*, SwrAllocDrawContextMemory, HANDLE hContext, uint32_t size, uint32_t align);
//////////////////////////////////////////////////////////////////////////
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
-SWR_FUNC(void, SwrEnableStatsFE,
- HANDLE hContext,
- bool enable);
+SWR_FUNC(void, SwrEnableStatsFE, HANDLE hContext, bool enable);
//////////////////////////////////////////////////////////////////////////
/// @brief Enables stats counting
/// @param hContext - Handle passed back from SwrCreateContext
/// @param enable - If true then counts are incremented.
-SWR_FUNC(void, SwrEnableStatsBE,
- HANDLE hContext,
- bool enable);
+SWR_FUNC(void, SwrEnableStatsBE, HANDLE hContext, bool enable);
//////////////////////////////////////////////////////////////////////////
/// @brief Mark end of frame - used for performance profiling
/// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrEndFrame,
- HANDLE hContext);
+SWR_FUNC(void, SwrEndFrame, HANDLE hContext);
//////////////////////////////////////////////////////////////////////////
/// @brief Initialize swr backend and memory internal tables
/// @param renderTargetIndex - Index to src render target
/// @param x, y - Coordinates to raster tile.
/// @param pDstHotTile - Pointer to Hot Tile
-SWR_FUNC(void, SwrLoadHotTile,
- HANDLE hWorkerPrivateData,
- const SWR_SURFACE_STATE *pSrcSurface,
- SWR_FORMAT dstFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
- uint8_t *pDstHotTile);
+SWR_FUNC(void,
+ SwrLoadHotTile,
+ HANDLE hWorkerPrivateData,
+ const SWR_SURFACE_STATE* pSrcSurface,
+ SWR_FORMAT dstFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ uint8_t* pDstHotTile);
//////////////////////////////////////////////////////////////////////////
/// @brief Deswizzles and stores a full hottile to a render surface
/// @param renderTargetIndex - Index to destination render target
/// @param x, y - Coordinates to raster tile.
/// @param pSrcHotTile - Pointer to Hot Tile
-SWR_FUNC(void, SwrStoreHotTileToSurface,
- HANDLE hWorkerPrivateData,
- SWR_SURFACE_STATE *pDstSurface,
- SWR_FORMAT srcFormat,
- SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
- uint8_t *pSrcHotTile);
+SWR_FUNC(void,
+ SwrStoreHotTileToSurface,
+ HANDLE hWorkerPrivateData,
+ SWR_SURFACE_STATE* pDstSurface,
+ SWR_FORMAT srcFormat,
+ SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ uint8_t* pSrcHotTile);
//////////////////////////////////////////////////////////////////////////
/// @brief Writes clear color to every pixel of a render surface
/// @param renderTargetIndex - Index to destination render target
/// @param x, y - Coordinates to raster tile.
/// @param pClearColor - Pointer to clear color
-SWR_FUNC(void, SwrStoreHotTileClear,
- HANDLE hWorkerPrivateData,
- SWR_SURFACE_STATE *pDstSurface,
+SWR_FUNC(void,
+ SwrStoreHotTileClear,
+ HANDLE hWorkerPrivateData,
+ SWR_SURFACE_STATE* pDstSurface,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x,
- uint32_t y,
- uint32_t renderTargetArrayIndex,
- const float* pClearColor);
+ uint32_t x,
+ uint32_t y,
+ uint32_t renderTargetArrayIndex,
+ const float* pClearColor);
struct SWR_INTERFACE
{
- PFNSwrCreateContext pfnSwrCreateContext;
- PFNSwrDestroyContext pfnSwrDestroyContext;
- PFNSwrBindApiThread pfnSwrBindApiThread;
- PFNSwrSaveState pfnSwrSaveState;
- PFNSwrRestoreState pfnSwrRestoreState;
- PFNSwrSync pfnSwrSync;
- PFNSwrStallBE pfnSwrStallBE;
- PFNSwrWaitForIdle pfnSwrWaitForIdle;
- PFNSwrWaitForIdleFE pfnSwrWaitForIdleFE;
- PFNSwrSetVertexBuffers pfnSwrSetVertexBuffers;
- PFNSwrSetIndexBuffer pfnSwrSetIndexBuffer;
- PFNSwrSetFetchFunc pfnSwrSetFetchFunc;
- PFNSwrSetSoFunc pfnSwrSetSoFunc;
- PFNSwrSetSoState pfnSwrSetSoState;
- PFNSwrSetSoBuffers pfnSwrSetSoBuffers;
- PFNSwrSetVertexFunc pfnSwrSetVertexFunc;
- PFNSwrSetFrontendState pfnSwrSetFrontendState;
- PFNSwrSetGsState pfnSwrSetGsState;
- PFNSwrSetGsFunc pfnSwrSetGsFunc;
- PFNSwrSetCsFunc pfnSwrSetCsFunc;
- PFNSwrSetTsState pfnSwrSetTsState;
- PFNSwrSetHsFunc pfnSwrSetHsFunc;
- PFNSwrSetDsFunc pfnSwrSetDsFunc;
- PFNSwrSetDepthStencilState pfnSwrSetDepthStencilState;
- PFNSwrSetBackendState pfnSwrSetBackendState;
- PFNSwrSetDepthBoundsState pfnSwrSetDepthBoundsState;
- PFNSwrSetPixelShaderState pfnSwrSetPixelShaderState;
- PFNSwrSetBlendState pfnSwrSetBlendState;
- PFNSwrSetBlendFunc pfnSwrSetBlendFunc;
- PFNSwrDraw pfnSwrDraw;
- PFNSwrDrawInstanced pfnSwrDrawInstanced;
- PFNSwrDrawIndexed pfnSwrDrawIndexed;
- PFNSwrDrawIndexedInstanced pfnSwrDrawIndexedInstanced;
- PFNSwrInvalidateTiles pfnSwrInvalidateTiles;
- PFNSwrDiscardRect pfnSwrDiscardRect;
- PFNSwrDispatch pfnSwrDispatch;
- PFNSwrStoreTiles pfnSwrStoreTiles;
- PFNSwrClearRenderTarget pfnSwrClearRenderTarget;
- PFNSwrSetRastState pfnSwrSetRastState;
- PFNSwrSetViewports pfnSwrSetViewports;
- PFNSwrSetScissorRects pfnSwrSetScissorRects;
+ PFNSwrCreateContext pfnSwrCreateContext;
+ PFNSwrDestroyContext pfnSwrDestroyContext;
+ PFNSwrBindApiThread pfnSwrBindApiThread;
+ PFNSwrSaveState pfnSwrSaveState;
+ PFNSwrRestoreState pfnSwrRestoreState;
+ PFNSwrSync pfnSwrSync;
+ PFNSwrStallBE pfnSwrStallBE;
+ PFNSwrWaitForIdle pfnSwrWaitForIdle;
+ PFNSwrWaitForIdleFE pfnSwrWaitForIdleFE;
+ PFNSwrSetVertexBuffers pfnSwrSetVertexBuffers;
+ PFNSwrSetIndexBuffer pfnSwrSetIndexBuffer;
+ PFNSwrSetFetchFunc pfnSwrSetFetchFunc;
+ PFNSwrSetSoFunc pfnSwrSetSoFunc;
+ PFNSwrSetSoState pfnSwrSetSoState;
+ PFNSwrSetSoBuffers pfnSwrSetSoBuffers;
+ PFNSwrSetVertexFunc pfnSwrSetVertexFunc;
+ PFNSwrSetFrontendState pfnSwrSetFrontendState;
+ PFNSwrSetGsState pfnSwrSetGsState;
+ PFNSwrSetGsFunc pfnSwrSetGsFunc;
+ PFNSwrSetCsFunc pfnSwrSetCsFunc;
+ PFNSwrSetTsState pfnSwrSetTsState;
+ PFNSwrSetHsFunc pfnSwrSetHsFunc;
+ PFNSwrSetDsFunc pfnSwrSetDsFunc;
+ PFNSwrSetDepthStencilState pfnSwrSetDepthStencilState;
+ PFNSwrSetBackendState pfnSwrSetBackendState;
+ PFNSwrSetDepthBoundsState pfnSwrSetDepthBoundsState;
+ PFNSwrSetPixelShaderState pfnSwrSetPixelShaderState;
+ PFNSwrSetBlendState pfnSwrSetBlendState;
+ PFNSwrSetBlendFunc pfnSwrSetBlendFunc;
+ PFNSwrDraw pfnSwrDraw;
+ PFNSwrDrawInstanced pfnSwrDrawInstanced;
+ PFNSwrDrawIndexed pfnSwrDrawIndexed;
+ PFNSwrDrawIndexedInstanced pfnSwrDrawIndexedInstanced;
+ PFNSwrInvalidateTiles pfnSwrInvalidateTiles;
+ PFNSwrDiscardRect pfnSwrDiscardRect;
+ PFNSwrDispatch pfnSwrDispatch;
+ PFNSwrStoreTiles pfnSwrStoreTiles;
+ PFNSwrClearRenderTarget pfnSwrClearRenderTarget;
+ PFNSwrSetRastState pfnSwrSetRastState;
+ PFNSwrSetViewports pfnSwrSetViewports;
+ PFNSwrSetScissorRects pfnSwrSetScissorRects;
PFNSwrGetPrivateContextState pfnSwrGetPrivateContextState;
PFNSwrAllocDrawContextMemory pfnSwrAllocDrawContextMemory;
- PFNSwrEnableStatsFE pfnSwrEnableStatsFE;
- PFNSwrEnableStatsBE pfnSwrEnableStatsBE;
- PFNSwrEndFrame pfnSwrEndFrame;
- PFNSwrInit pfnSwrInit;
- PFNSwrLoadHotTile pfnSwrLoadHotTile;
+ PFNSwrEnableStatsFE pfnSwrEnableStatsFE;
+ PFNSwrEnableStatsBE pfnSwrEnableStatsBE;
+ PFNSwrEndFrame pfnSwrEndFrame;
+ PFNSwrInit pfnSwrInit;
+ PFNSwrLoadHotTile pfnSwrLoadHotTile;
PFNSwrStoreHotTileToSurface pfnSwrStoreHotTileToSurface;
- PFNSwrStoreHotTileClear pfnSwrStoreHotTileClear;
+ PFNSwrStoreHotTileClear pfnSwrStoreHotTileClear;
};
extern "C" {
-typedef void (SWR_API * PFNSwrGetInterface)(SWR_INTERFACE &out_funcs);
-SWR_VISIBLE void SWR_API SwrGetInterface(SWR_INTERFACE &out_funcs);
+typedef void(SWR_API* PFNSwrGetInterface)(SWR_INTERFACE& out_funcs);
+SWR_VISIBLE void SWR_API SwrGetInterface(SWR_INTERFACE& out_funcs);
}
#endif
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file arena.h
-*
-* @brief Arena memory manager
-* The arena is convenient and fast for managing allocations for any of
-* our allocations that are associated with operations and can all be freed
-* once when their operation has completed. Allocations are cheap since
-* most of the time its simply an increment of an offset. Also, no need to
-* free individual allocations. All of the arena memory can be freed at once.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file arena.h
+ *
+ * @brief Arena memory manager
+ * The arena is convenient and fast for managing allocations for any of
+ * our allocations that are associated with operations and can all be freed
+ * once when their operation has completed. Allocations are cheap since
+ * most of the time its simply an increment of an offset. Also, no need to
+ * free individual allocations. All of the arena memory can be freed at once.
+ *
+ ******************************************************************************/
#pragma once
#include <mutex>
struct ArenaBlock
{
size_t blockSize = 0;
- ArenaBlock* pNext = nullptr;
+ ArenaBlock* pNext = nullptr;
};
-static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
- "Increase BLOCK_ALIGN size");
+static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
class DefaultAllocator
{
SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
- p->blockSize = size;
+ p->blockSize = size;
return p;
}
};
// Caching Allocator for Arena
-template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
+template <uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
struct CachingAllocatorT : DefaultAllocator
{
ArenaBlock* AllocateAligned(size_t size, size_t align)
{
// search cached blocks
std::lock_guard<std::mutex> l(m_mutex);
- ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
- ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
+ ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
+ ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
if (pBlock)
{
else
{
pPrevBlock = &m_oldCachedBlocks[bucket];
- pBlock = SearchBlocks(pPrevBlock, size, align);
+ pBlock = SearchBlocks(pPrevBlock, size, align);
if (pBlock)
{
{
SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
pPrevBlock->pNext = pBlock->pNext;
- pBlock->pNext = nullptr;
+ pBlock->pNext = nullptr;
return pBlock;
}
void FreeOldBlocks()
{
- if (!m_cachedSize) { return; }
+ if (!m_cachedSize)
+ {
+ return;
+ }
std::lock_guard<std::mutex> l(m_mutex);
bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
pBlock = pNext;
}
m_oldCachedBlocks[i].pNext = nullptr;
- m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
+ m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
}
if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
// We know that all blocks are the same size.
// Just move the list over.
m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
- m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
- m_cachedBlocks[i].pNext = nullptr;
+ m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
+ m_cachedBlocks[i].pNext = nullptr;
if (m_pOldLastCachedBlocks[i]->pNext)
{
m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
while (pBlock)
{
ArenaBlock* pNext = pBlock->pNext;
- pBlock->pNext = nullptr;
+ pBlock->pNext = nullptr;
m_cachedSize -= pBlock->blockSize;
InsertCachedBlock<true>(i, pBlock);
pBlock = pNext;
}
- m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
+ m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
m_cachedBlocks[i].pNext = nullptr;
}
}
{
for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
{
- m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
+ m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
}
}
{
SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
- ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
+ ArenaBlock* pPrevBlock =
+ OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
ArenaBlock* pBlock = pPrevBlock->pNext;
while (pBlock)
break;
}
pPrevBlock = pBlock;
- pBlock = pBlock->pNext;
+ pBlock = pBlock->pNext;
}
// Insert into list
SWR_ASSUME_ASSERT(pPrevBlock);
pPrevBlock->pNext = pNewBlock;
- pNewBlock->pNext = pBlock;
+ pNewBlock->pNext = pBlock;
if (OldBlockT)
{
static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
{
- ArenaBlock* pBlock = pPrevBlock->pNext;
+ ArenaBlock* pBlock = pPrevBlock->pNext;
ArenaBlock* pPotentialBlock = nullptr;
- ArenaBlock* pPotentialPrev = nullptr;
+ ArenaBlock* pPotentialPrev = nullptr;
while (pBlock)
{
// We could use this as it is larger than we wanted, but
// continue to search for a better match
pPotentialBlock = pBlock;
- pPotentialPrev = pPrevBlock;
+ pPotentialPrev = pPrevBlock;
}
}
else
{
// Blocks are sorted by size (biggest first)
- // So, if we get here, there are no blocks
+ // So, if we get here, there are no blocks
// large enough, fall through to allocation.
pBlock = nullptr;
break;
}
pPrevBlock = pBlock;
- pBlock = pBlock->pNext;
+ pBlock = pBlock->pNext;
}
if (!pBlock)
{
// Couldn't find an exact match, use next biggest size
- pBlock = pPotentialBlock;
+ pBlock = pPotentialBlock;
pPrevBlock = pPotentialPrev;
}
}
// buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
- static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
- static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
- static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
+ static const uint32_t CACHE_NUM_BUCKETS = NumBucketsT;
+ static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
+ static const size_t MAX_UNUSED_SIZE = sizeof(MEGABYTE);
- ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
- ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
- ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
- ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
- std::mutex m_mutex;
+ ArenaBlock m_cachedBlocks[CACHE_NUM_BUCKETS];
+ ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
+ ArenaBlock m_oldCachedBlocks[CACHE_NUM_BUCKETS];
+ ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
+ std::mutex m_mutex;
- size_t m_totalAllocated = 0;
+ size_t m_totalAllocated = 0;
- size_t m_cachedSize = 0;
- size_t m_oldCachedSize = 0;
+ size_t m_cachedSize = 0;
+ size_t m_oldCachedSize = 0;
};
typedef CachingAllocatorT<> CachingAllocator;
-template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
+template <typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
class TArena
{
public:
- TArena(T& in_allocator) : m_allocator(in_allocator) {}
- TArena() : m_allocator(m_defAllocator) {}
- ~TArena()
- {
- Reset(true);
- }
+ TArena(T& in_allocator) : m_allocator(in_allocator) {}
+ TArena() : m_allocator(m_defAllocator) {}
+ ~TArena() { Reset(true); }
- void* AllocAligned(size_t size, size_t align)
+ void* AllocAligned(size_t size, size_t align)
{
if (0 == size)
{
if (m_pCurBlock)
{
ArenaBlock* pCurBlock = m_pCurBlock;
- size_t offset = AlignUp(m_offset, align);
+ size_t offset = AlignUp(m_offset, align);
if ((offset + size) <= pCurBlock->blockSize)
{
void* pMem = PtrAdd(pCurBlock, offset);
- m_offset = offset + size;
+ m_offset = offset + size;
return pMem;
}
}
static const size_t ArenaBlockSize = BlockSizeT;
- size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
+ size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
// Add in one BLOCK_ALIGN unit to store ArenaBlock in.
blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
- ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
+ ArenaBlock* pNewBlock = m_allocator.AllocateAligned(
+ blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
SWR_ASSERT(pNewBlock != nullptr);
if (pNewBlock != nullptr)
{
- m_offset = ARENA_BLOCK_ALIGN;
+ m_offset = ARENA_BLOCK_ALIGN;
pNewBlock->pNext = m_pCurBlock;
m_pCurBlock = pNewBlock;
return AllocAligned(size, align);
}
- void* Alloc(size_t size)
- {
- return AllocAligned(size, 1);
- }
+ void* Alloc(size_t size) { return AllocAligned(size, 1); }
void* AllocAlignedSync(size_t size, size_t align)
{
if (m_pCurBlock)
{
- ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
- m_pCurBlock->pNext = nullptr;
+ ArenaBlock* pUsedBlocks = m_pCurBlock->pNext;
+ m_pCurBlock->pNext = nullptr;
while (pUsedBlocks)
{
ArenaBlock* pBlock = pUsedBlocks;
- pUsedBlocks = pBlock->pNext;
+ pUsedBlocks = pBlock->pNext;
m_allocator.Free(pBlock);
}
bool IsEmpty()
{
- return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
+ return (m_pCurBlock == nullptr) ||
+ (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
}
private:
-
- ArenaBlock* m_pCurBlock = nullptr;
- size_t m_offset = ARENA_BLOCK_ALIGN;
+ ArenaBlock* m_pCurBlock = nullptr;
+ size_t m_offset = ARENA_BLOCK_ALIGN;
/// @note Mutex is only used by sync allocation functions.
- std::mutex m_mutex;
+ std::mutex m_mutex;
- DefaultAllocator m_defAllocator;
- T& m_allocator;
+ DefaultAllocator m_defAllocator;
+ T& m_allocator;
};
-using StdArena = TArena<DefaultAllocator>;
-using CachingArena = TArena<CachingAllocator>;
+using StdArena = TArena<DefaultAllocator>;
+using CachingArena = TArena<CachingAllocator>;
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-* operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ * operations.
+ *
+ ******************************************************************************/
#include <smmintrin.h>
/// @param pDC - pointer to draw context (dispatch).
/// @param workerId - The unique worker ID that is assigned to this thread.
/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
+void ProcessComputeBE(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t threadGroupId,
+ void*& pSpillFillBuffer,
+ void*& pScratchSpace)
{
- SWR_CONTEXT *pContext = pDC->pContext;
+ SWR_CONTEXT* pContext = pDC->pContext;
RDTSC_BEGIN(BEDispatch, pDC->drawId);
{
pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
}
-
- size_t scratchSpaceSize = pDC->pState->state.scratchSpaceSize * pDC->pState->state.scratchSpaceNumInstances;
+
+ size_t scratchSpaceSize =
+ pDC->pState->state.scratchSpaceSize * pDC->pState->state.scratchSpaceNumInstances;
if (scratchSpaceSize && pScratchSpace == nullptr)
{
pScratchSpace = pDC->pArena->AllocAlignedSync(scratchSpaceSize, KNOB_SIMD_BYTES);
const API_STATE& state = GetApiState(pDC);
- SWR_CS_CONTEXT csContext{ 0 };
- csContext.tileCounter = threadGroupId;
- csContext.dispatchDims[0] = pTaskData->threadGroupCountX;
- csContext.dispatchDims[1] = pTaskData->threadGroupCountY;
- csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
- csContext.pTGSM = pContext->ppScratch[workerId];
- csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
- csContext.pScratchSpace = (uint8_t*)pScratchSpace;
+ SWR_CS_CONTEXT csContext{0};
+ csContext.tileCounter = threadGroupId;
+ csContext.dispatchDims[0] = pTaskData->threadGroupCountX;
+ csContext.dispatchDims[1] = pTaskData->threadGroupCountY;
+ csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
+ csContext.pTGSM = pContext->ppScratch[workerId];
+ csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
+ csContext.pScratchSpace = (uint8_t*)pScratchSpace;
csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
- state.pfnCsFunc(GetPrivateState(pDC), pContext->threadPool.pThreadData[workerId].pWorkerPrivateData, &csContext);
+ state.pfnCsFunc(GetPrivateState(pDC),
+ pContext->threadPool.pThreadData[workerId].pWorkerPrivateData,
+ &csContext);
UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
AR_EVENT(CSStats(csContext.stats.numInstExecuted));
/// @param pDC - pointer to draw context (dispatch).
/// @param workerId - The unique worker ID that is assigned to this thread.
/// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
+void ProcessShutdownBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
{
// Dummy function
}
-void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
+void ProcessSyncBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
{
uint32_t x, y;
MacroTileMgr::getTileIndices(macroTile, x, y);
SWR_ASSERT(x == 0 && y == 0);
}
-void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, STORE_TILES_DESC* pDesc,
- SWR_RENDERTARGET_ATTACHMENT attachment)
+void ProcessStoreTileBE(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroTile,
+ STORE_TILES_DESC* pDesc,
+ SWR_RENDERTARGET_ATTACHMENT attachment)
{
- SWR_CONTEXT *pContext = pDC->pContext;
- HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+ SWR_CONTEXT* pContext = pDC->pContext;
+ HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
case SWR_ATTACHMENT_COLOR4:
case SWR_ATTACHMENT_COLOR5:
case SWR_ATTACHMENT_COLOR6:
- case SWR_ATTACHMENT_COLOR7: srcFormat = KNOB_COLOR_HOT_TILE_FORMAT; break;
- case SWR_ATTACHMENT_DEPTH: srcFormat = KNOB_DEPTH_HOT_TILE_FORMAT; break;
- case SWR_ATTACHMENT_STENCIL: srcFormat = KNOB_STENCIL_HOT_TILE_FORMAT; break;
- default: SWR_INVALID("Unknown attachment: %d", attachment); srcFormat = KNOB_COLOR_HOT_TILE_FORMAT; break;
+ case SWR_ATTACHMENT_COLOR7:
+ srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
+ break;
+ case SWR_ATTACHMENT_DEPTH:
+ srcFormat = KNOB_DEPTH_HOT_TILE_FORMAT;
+ break;
+ case SWR_ATTACHMENT_STENCIL:
+ srcFormat = KNOB_STENCIL_HOT_TILE_FORMAT;
+ break;
+ default:
+ SWR_INVALID("Unknown attachment: %d", attachment);
+ srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
+ break;
}
uint32_t x, y;
MacroTileMgr::getTileIndices(macroTile, x, y);
// Only need to store the hottile if it's been rendered to...
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, attachment, false);
+ HOTTILE* pHotTile =
+ pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, attachment, false);
if (pHotTile)
{
// clear if clear is pending (i.e., not rendered to), then mark as dirty for store.
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
SWR_ASSERT(pfnClearTiles != nullptr);
- pfnClearTiles(pDC, hWorkerPrivateData, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
+ pfnClearTiles(pDC,
+ hWorkerPrivateData,
+ attachment,
+ macroTile,
+ pHotTile->renderTargetArrayIndex,
+ pHotTile->clearData,
+ pDesc->rect);
}
- if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
+ if (pHotTile->state == HOTTILE_DIRTY ||
+ pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
{
int32_t destX = KNOB_MACROTILE_X_DIM * x;
int32_t destY = KNOB_MACROTILE_Y_DIM * y;
- pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, srcFormat,
- attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+ pContext->pfnStoreTile(GetPrivateState(pDC),
+ hWorkerPrivateData,
+ srcFormat,
+ attachment,
+ destX,
+ destY,
+ pHotTile->renderTargetArrayIndex,
+ pHotTile->pBuffer);
}
-
if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED)
{
- if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY && pHotTile->state == HOTTILE_RESOLVED))
+ if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY &&
+ pHotTile->state == HOTTILE_RESOLVED))
{
pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
}
RDTSC_END(BEStoreTiles, 1);
}
-void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
+void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
{
- STORE_TILES_DESC *pDesc = (STORE_TILES_DESC*)pData;
+ STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pData;
- unsigned long rt = 0;
- uint32_t mask = pDesc->attachmentMask;
+ unsigned long rt = 0;
+ uint32_t mask = pDesc->attachmentMask;
while (_BitScanForward(&rt, mask))
{
mask &= ~(1 << rt);
}
}
-void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
+void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroTile,
+ void* pData)
{
- DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData;
- SWR_CONTEXT *pContext = pDC->pContext;
+ DISCARD_INVALIDATE_TILES_DESC* pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pData;
+ SWR_CONTEXT* pContext = pDC->pContext;
const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
{
if (pDesc->attachmentMask & (1 << i))
{
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(
- pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i, pDesc->createNewTiles, numSamples);
+ HOTTILE* pHotTile =
+ pContext->pHotTileMgr->GetHotTileNoLoad(pContext,
+ pDC,
+ macroTile,
+ (SWR_RENDERTARGET_ATTACHMENT)i,
+ pDesc->createNewTiles,
+ numSamples);
if (pHotTile)
{
pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
}
}
-template<uint32_t sampleCountT>
-void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <uint32_t sampleCountT>
+void BackendNullPS(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t x,
+ uint32_t y,
+ SWR_TRIANGLE_DESC& work,
+ RenderOutputBuffers& renderBuffers)
{
RDTSC_BEGIN(BENullBackend, pDC->drawId);
///@todo: handle center multisample pattern
RDTSC_BEGIN(BESetup, pDC->drawId);
- const API_STATE &state = GetApiState(pDC);
+ const API_STATE& state = GetApiState(pDC);
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
- const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
+ const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
// iterate over active samples
- unsigned long sample = 0;
- uint32_t sampleMask = state.blendState.sampleMask;
+ unsigned long sample = 0;
+ uint32_t sampleMask = state.blendState.sampleMask;
while (_BitScanForward(&sample, sampleMask))
{
sampleMask &= ~(1 << sample);
if (coverageMask)
{
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t *pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+ uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
+ uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
{
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+ static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+ "Unsupported depth hot tile format");
- const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+ const simdscalar z =
+ _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
CalcSampleBarycentrics(coeffs, psContext);
// interpolate and quantize z
- psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+ psContext.vZ = vplaneps(coeffs.vZa,
+ coeffs.vZb,
+ coeffs.vZc,
+ psContext.vI.sample,
+ psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
{
- coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
+ coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
+ work.pUserClipBuffer,
+ psContext.vI.sample,
+ psContext.vJ.sample);
}
- simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
+ simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
simdscalar stencilPassMask = vCoverageMask;
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
- simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
- psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
- pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+ simdscalar depthPassMask = DepthStencilTest(&state,
+ work.triFlags.frontFacing,
+ work.triFlags.viewportIndex,
+ psContext.vZ,
+ pDepthSample,
+ vCoverageMask,
+ pStencilSample,
+ &stencilPassMask);
+ AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask),
+ _simd_movemask_ps(stencilPassMask),
+ _simd_movemask_ps(vCoverageMask)));
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ psContext.vZ,
+ pDepthSample,
+ depthPassMask,
+ vCoverageMask,
+ pStencilSample,
+ stencilPassMask);
RDTSC_END(BEEarlyDepthTest, 0);
- uint32_t statMask = _simd_movemask_ps(depthPassMask);
+ uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
UPDATE_STAT_BE(DepthPassCount, statCount);
}
}
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+ pStencilBuffer +=
+ (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
vXSamplePosUL = _simd_add_ps(vXSamplePosUL, dx);
}
RDTSC_END(BENullBackend, 0);
}
-PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {};
+PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {};
PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
-PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
- [2] // centroid
- [2] // canEarlyZ
- = {};
-PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [2] // isCenterPattern
- [SWR_INPUT_COVERAGE_COUNT]
- [2] // centroid
- [2] // forcedSampleCount
- [2] // canEarlyZ
- = {};
-PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [SWR_INPUT_COVERAGE_COUNT]
+PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2] // centroid
+ [2] // canEarlyZ
+ = {};
+PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2] // isCenterPattern
+ [SWR_INPUT_COVERAGE_COUNT][2] // centroid
+ [2] // forcedSampleCount
+ [2] // canEarlyZ
+ = {};
+PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT]
[2] // centroid
[2] // canEarlyZ
- = {};
+ = {};
void InitBackendFuncTables()
-{
+{
InitBackendPixelRate();
InitBackendSingleFuncTable(gBackendSingleSample);
InitBackendSampleFuncTable(gBackendSampleRateTable);
- gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;
- gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS < SWR_MULTISAMPLE_2X > ;
- gBackendNullPs[SWR_MULTISAMPLE_4X] = &BackendNullPS < SWR_MULTISAMPLE_4X > ;
- gBackendNullPs[SWR_MULTISAMPLE_8X] = &BackendNullPS < SWR_MULTISAMPLE_8X > ;
- gBackendNullPs[SWR_MULTISAMPLE_16X] = &BackendNullPS < SWR_MULTISAMPLE_16X > ;
+ gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS<SWR_MULTISAMPLE_1X>;
+ gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS<SWR_MULTISAMPLE_2X>;
+ gBackendNullPs[SWR_MULTISAMPLE_4X] = &BackendNullPS<SWR_MULTISAMPLE_4X>;
+ gBackendNullPs[SWR_MULTISAMPLE_8X] = &BackendNullPS<SWR_MULTISAMPLE_8X>;
+ gBackendNullPs[SWR_MULTISAMPLE_16X] = &BackendNullPS<SWR_MULTISAMPLE_16X>;
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.h
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-* operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.h
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ * operations.
+ *
+ ******************************************************************************/
#pragma once
#include "common/os.h"
#include "depthstencil.h"
#include "rdtsc_core.h"
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
-void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
-void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
-void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
+void ProcessComputeBE(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t threadGroupId,
+ void*& pSpillFillBuffer,
+ void*& pScratchSpace);
+void ProcessSyncBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
+void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
+void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
+void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroTile,
+ void* pData);
+void ProcessShutdownBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
-typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, HANDLE hWorkerData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
+typedef void (*PFN_CLEAR_TILES)(DRAW_CONTEXT*,
+ HANDLE hWorkerData,
+ SWR_RENDERTARGET_ATTACHMENT rt,
+ uint32_t,
+ uint32_t,
+ DWORD[4],
+ const SWR_RECT& rect);
-extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS];
+extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS];
extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
-extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
- [2] // centroid
- [2]; // canEarlyZ
-extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [2] // isCenterPattern
- [SWR_INPUT_COVERAGE_COUNT]
- [2] // centroid
- [2] // forcedSampleCount
- [2] // canEarlyZ
- ;
+extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2] // centroid
+ [2]; // canEarlyZ
+extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2] // isCenterPattern
+ [SWR_INPUT_COVERAGE_COUNT][2] // centroid
+ [2] // forcedSampleCount
+ [2] // canEarlyZ
+ ;
extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
- [SWR_INPUT_COVERAGE_COUNT]
- [2] // centroid
- [2]; // canEarlyZ
-
+ [SWR_INPUT_COVERAGE_COUNT][2] // centroid
+ [2]; // canEarlyZ
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-* operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ * operations.
+ *
+ ******************************************************************************/
#include <smmintrin.h>
#include <algorithm>
-template<SWR_FORMAT format>
-void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
+template <SWR_FORMAT format>
+void ClearRasterTile(uint8_t* pTileBuffer, simdvector& value)
{
- auto lambda = [&](int32_t comp)
- {
+ auto lambda = [&](int32_t comp) {
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
};
- const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
+ const uint32_t numIter =
+ (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
for (uint32_t i = 0; i < numIter; ++i)
{
}
#if USE_8x2_TILE_BACKEND
-template<SWR_FORMAT format>
-void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
+template <SWR_FORMAT format>
+void ClearRasterTile(uint8_t* pTileBuffer, simd16vector& value)
{
- auto lambda = [&](int32_t comp)
- {
+ auto lambda = [&](int32_t comp) {
FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
};
- const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
+ const uint32_t numIter =
+ (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
for (uint32_t i = 0; i < numIter; ++i)
{
}
#endif
-template<SWR_FORMAT format>
-INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
+template <SWR_FORMAT format>
+INLINE void ClearMacroTile(DRAW_CONTEXT* pDC,
+ HANDLE hWorkerPrivateData,
+ SWR_RENDERTARGET_ATTACHMENT rt,
+ uint32_t macroTile,
+ uint32_t renderTargetArrayIndex,
+ DWORD clear[4],
+ const SWR_RECT& rect)
{
// convert clear color to hottile format
// clear color is in RGBA float/uint32
vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
}
- vComp = FormatTraits<format>::pack(comp, vComp);
+ vComp = FormatTraits<format>::pack(comp, vComp);
vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
}
vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp)));
vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
}
- vComp = FormatTraits<format>::pack(comp, vComp);
+ vComp = FormatTraits<format>::pack(comp, vComp);
vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
}
MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
// Init to full macrotile
- SWR_RECT clearTile =
- {
+ SWR_RECT clearTile = {
KNOB_MACROTILE_X_DIM * int32_t(tileX),
KNOB_MACROTILE_Y_DIM * int32_t(tileY),
KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
clearTile &= rect;
// translate to local hottile origin
- clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
+ clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM,
+ -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
// Make maximums inclusive (needed for convert to raster tiles)
clearTile.xmax -= 1;
const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
// compute steps between raster tile samples / raster tiles / macro tile rows
- const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
- const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
+ const uint32_t rasterTileSampleStep =
+ KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
+ const uint32_t rasterTileStep =
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
- const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
-
- HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, hWorkerPrivateData, macroTile, rt, true, numSamples, renderTargetArrayIndex);
- uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
- uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
+ const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
+
+ HOTTILE* pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroTile,
+ rt,
+ true,
+ numSamples,
+ renderTargetArrayIndex);
+ uint32_t rasterTileStartOffset =
+ (ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp>>(
+ pitch, clearTile.xmin, clearTile.ymin)) *
+ numSamples;
+ uint8_t* pRasterTileRow =
+ pHotTile->pBuffer +
+ rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ,
+ // FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
// loop over all raster tiles in the current hot tile
for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
uint8_t* pRasterTile = pRasterTileRow;
for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
{
- for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
+ for (int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
{
ClearRasterTile<format>(pRasterTile, vClear);
pRasterTile += rasterTileSampleStep;
pHotTile->state = HOTTILE_DIRTY;
}
-
-void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
+void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
{
- SWR_CONTEXT *pContext = pDC->pContext;
- HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+ SWR_CONTEXT* pContext = pDC->pContext;
+ HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
if (KNOB_FAST_CLEAR)
{
- CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
+ CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
- uint32_t numSamples = GetNumSamples(sampleCount);
+ uint32_t numSamples = GetNumSamples(sampleCount);
SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
{
- unsigned long rt = 0;
- uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
+ unsigned long rt = 0;
+ uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
while (_BitScanForward(&rt, mask))
{
mask &= ~(1 << rt);
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
+ HOTTILE* pHotTile =
+ pContext->pHotTileMgr->GetHotTile(pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroTile,
+ (SWR_RENDERTARGET_ATTACHMENT)rt,
+ true,
+ numSamples,
+ pClear->renderTargetArrayIndex);
// All we want to do here is to mark the hot tile as being in a "needs clear" state.
pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
- pHotTile->state = HOTTILE_CLEAR;
+ pHotTile->state = HOTTILE_CLEAR;
}
}
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
{
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
+ HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroTile,
+ SWR_ATTACHMENT_DEPTH,
+ true,
+ numSamples,
+ pClear->renderTargetArrayIndex);
pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
- pHotTile->state = HOTTILE_CLEAR;
+ pHotTile->state = HOTTILE_CLEAR;
}
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
{
- HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
+ HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroTile,
+ SWR_ATTACHMENT_STENCIL,
+ true,
+ numSamples,
+ pClear->renderTargetArrayIndex);
pHotTile->clearData[0] = pClear->clearStencil;
- pHotTile->state = HOTTILE_CLEAR;
+ pHotTile->state = HOTTILE_CLEAR;
}
RDTSC_END(BEClear, 1);
else
{
// Legacy clear
- CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
+ CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
RDTSC_BEGIN(BEClear, pDC->drawId);
if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
- unsigned long rt = 0;
- uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
+ unsigned long rt = 0;
+ uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
while (_BitScanForward(&rt, mask))
{
mask &= ~(1 << rt);
- pfnClearTiles(pDC, hWorkerPrivateData, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+ pfnClearTiles(pDC,
+ hWorkerPrivateData,
+ (SWR_RENDERTARGET_ATTACHMENT)rt,
+ macroTile,
+ pClear->renderTargetArrayIndex,
+ clearData,
+ pClear->rect);
}
}
if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
{
DWORD clearData[4];
- clearData[0] = *(DWORD*)&pClear->clearDepth;
+ clearData[0] = *(DWORD*)&pClear->clearDepth;
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
SWR_ASSERT(pfnClearTiles != nullptr);
- pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+ pfnClearTiles(pDC,
+ hWorkerPrivateData,
+ SWR_ATTACHMENT_DEPTH,
+ macroTile,
+ pClear->renderTargetArrayIndex,
+ clearData,
+ pClear->rect);
}
if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
{
DWORD clearData[4];
- clearData[0] = pClear->clearStencil;
+ clearData[0] = pClear->clearStencil;
PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
- pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+ pfnClearTiles(pDC,
+ hWorkerPrivateData,
+ SWR_ATTACHMENT_STENCIL,
+ macroTile,
+ pClear->renderTargetArrayIndex,
+ clearData,
+ pClear->rect);
}
RDTSC_END(BEClear, 1);
{
memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
- gClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>;
- gClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>;
- gClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>;
- gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
- gClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>;
+ gClearTilesTable[R8G8B8A8_UNORM] = ClearMacroTile<R8G8B8A8_UNORM>;
+ gClearTilesTable[B8G8R8A8_UNORM] = ClearMacroTile<B8G8R8A8_UNORM>;
+ gClearTilesTable[R32_FLOAT] = ClearMacroTile<R32_FLOAT>;
+ gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
+ gClearTilesTable[R8_UINT] = ClearMacroTile<R8_UINT>;
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.h
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-* operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.h
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ * operations.
+ *
+ ******************************************************************************/
#pragma once
-void InitBackendSingleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_INPUT_COVERAGE_COUNT][2][2]);
-void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]);
+void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2]);
+void InitBackendSampleFuncTable(
+ PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]);
-static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext);
+static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs,
+ SWR_PS_CONTEXT& psContext);
enum SWR_BACKEND_FUNCS
#if KNOB_SIMD_WIDTH == 8
static const __m256 vCenterOffsetsX = __m256{0.5, 1.5, 0.5, 1.5, 2.5, 3.5, 2.5, 3.5};
static const __m256 vCenterOffsetsY = __m256{0.5, 0.5, 1.5, 1.5, 0.5, 0.5, 1.5, 1.5};
-static const __m256 vULOffsetsX = __m256{0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0};
-static const __m256 vULOffsetsY = __m256{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0};
+static const __m256 vULOffsetsX = __m256{0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0};
+static const __m256 vULOffsetsY = __m256{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0};
#define MASK 0xff
#endif
-static INLINE simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar const &vI, simdscalar const &vJ)
+static INLINE simdmask ComputeUserClipMask(uint8_t clipMask,
+ float* pUserClipBuffer,
+ simdscalar const& vI,
+ simdscalar const& vJ)
{
- simdscalar vClipMask = _simd_setzero_ps();
- uint32_t numClipDistance = _mm_popcnt_u32(clipMask);
+ simdscalar vClipMask = _simd_setzero_ps();
+ uint32_t numClipDistance = _mm_popcnt_u32(clipMask);
for (uint32_t i = 0; i < numClipDistance; ++i)
{
INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
{
- static const uint32_t RasterTileColorOffsets[16]
- { 0,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 10,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 11,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 12,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 13,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 14,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 15,
+ static const uint32_t RasterTileColorOffsets[16]{
+ 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+ 10,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+ 11,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+ 12,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+ 13,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+ 14,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+ 15,
};
assert(sampleNum < 16);
return RasterTileColorOffsets[sampleNum];
INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
{
- static const uint32_t RasterTileDepthOffsets[16]
- { 0,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 10,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 11,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 12,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 13,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 14,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 15,
+ static const uint32_t RasterTileDepthOffsets[16]{
+ 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+ 10,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+ 11,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+ 12,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+ 13,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+ 14,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+ 15,
};
assert(sampleNum < 16);
return RasterTileDepthOffsets[sampleNum];
INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
{
- static const uint32_t RasterTileStencilOffsets[16]
- { 0,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 8,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 9,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 10,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 11,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 12,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 13,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 14,
- (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 15,
+ static const uint32_t RasterTileStencilOffsets[16]{
+ 0,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 2,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 3,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 4,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 5,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 6,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 7,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 8,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 9,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 10,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 11,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 12,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 13,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 14,
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+ 15,
};
assert(sampleNum < 16);
return RasterTileStencilOffsets[sampleNum];
}
-template<typename T, uint32_t InputCoverage>
+template <typename T, uint32_t InputCoverage>
struct generateInputCoverage
{
- INLINE generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask)
+ INLINE generateInputCoverage(const uint64_t* const coverageMask,
+ uint32_t (&inputMask)[KNOB_SIMD_WIDTH],
+ const uint32_t sampleMask)
{
// will need to update for avx512
assert(KNOB_SIMD_WIDTH == 8);
simdscalari mask[2];
simdscalari sampleCoverage[2];
-
- if(T::bIsCenterPattern)
+
+ if (T::bIsCenterPattern)
{
// center coverage is the same for all samples; just broadcast to the sample slots
uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
- if(T::MultisampleT::numSamples == 1)
+ if (T::MultisampleT::numSamples == 1)
{
sampleCoverage[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
}
- else if(T::MultisampleT::numSamples == 2)
+ else if (T::MultisampleT::numSamples == 2)
{
- sampleCoverage[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
+ sampleCoverage[0] =
+ _simd_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
}
- else if(T::MultisampleT::numSamples == 4)
+ else if (T::MultisampleT::numSamples == 4)
{
- sampleCoverage[0] = _simd_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
+ sampleCoverage[0] = _simd_set_epi32(
+ 0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
}
- else if(T::MultisampleT::numSamples == 8)
+ else if (T::MultisampleT::numSamples == 8)
{
sampleCoverage[0] = _simd_set1_epi32(centerCoverage);
}
- else if(T::MultisampleT::numSamples == 16)
+ else if (T::MultisampleT::numSamples == 16)
{
sampleCoverage[0] = _simd_set1_epi32(centerCoverage);
sampleCoverage[1] = _simd_set1_epi32(centerCoverage);
}
else
{
- simdscalari src = _simd_set1_epi32(0);
+ simdscalari src = _simd_set1_epi32(0);
simdscalari index0 = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
- if(T::MultisampleT::numSamples == 1)
+ if (T::MultisampleT::numSamples == 1)
{
mask[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
}
- else if(T::MultisampleT::numSamples == 2)
+ else if (T::MultisampleT::numSamples == 2)
{
mask[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
}
- else if(T::MultisampleT::numSamples == 4)
+ else if (T::MultisampleT::numSamples == 4)
{
mask[0] = _simd_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
}
- else if(T::MultisampleT::numSamples == 8)
+ else if (T::MultisampleT::numSamples == 8)
{
mask[0] = _simd_set1_epi32(-1);
}
- else if(T::MultisampleT::numSamples == 16)
+ else if (T::MultisampleT::numSamples == 16)
{
mask[0] = _simd_set1_epi32(-1);
mask[1] = _simd_set1_epi32(-1);
- index1 = _simd_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
+ index1 = _simd_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
}
// gather coverage for samples 0-7
- sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
- if(T::MultisampleT::numSamples > 8)
+ sampleCoverage[0] =
+ _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src),
+ (const float*)coverageMask,
+ index0,
+ _mm256_castsi256_ps(mask[0]),
+ 8));
+ if (T::MultisampleT::numSamples > 8)
{
// gather coverage for samples 8-15
- sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
+ sampleCoverage[1] =
+ _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src),
+ (const float*)coverageMask,
+ index1,
+ _mm256_castsi256_ps(mask[1]),
+ 8));
}
}
- mask[0] = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0);
+ mask[0] = _mm256_set_epi8(-1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 0xC,
+ 0x8,
+ 0x4,
+ 0x0,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ -1,
+ 0xC,
+ 0x8,
+ 0x4,
+ 0x0);
// pull out the 8bit 4x2 coverage for samples 0-7 into the lower 32 bits of each 128bit lane
simdscalari packedCoverage0 = _simd_shuffle_epi8(sampleCoverage[0], mask[0]);
simdscalari packedCoverage1;
- if(T::MultisampleT::numSamples > 8)
+ if (T::MultisampleT::numSamples > 8)
{
- // pull out the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit lane
+ // pull out the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit
+ // lane
packedCoverage1 = _simd_shuffle_epi8(sampleCoverage[1], mask[0]);
}
- #if (KNOB_ARCH == KNOB_ARCH_AVX)
- // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
+#if (KNOB_ARCH == KNOB_ARCH_AVX)
+ // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
simdscalari hiToLow = _mm256_permute2f128_si256(packedCoverage0, packedCoverage0, 0x83);
- simdscalar shufRes = _mm256_shuffle_ps(_mm256_castsi256_ps(hiToLow), _mm256_castsi256_ps(hiToLow), _MM_SHUFFLE(1, 1, 0, 1));
- packedCoverage0 = _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE));
+ simdscalar shufRes = _mm256_shuffle_ps(
+ _mm256_castsi256_ps(hiToLow), _mm256_castsi256_ps(hiToLow), _MM_SHUFFLE(1, 1, 0, 1));
+ packedCoverage0 = _mm256_castps_si256(
+ _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE));
simdscalari packedSampleCoverage;
- if(T::MultisampleT::numSamples > 8)
+ if (T::MultisampleT::numSamples > 8)
{
// pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
- hiToLow = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83);
- shufRes = _mm256_shuffle_ps(_mm256_castsi256_ps(hiToLow), _mm256_castsi256_ps(hiToLow), _MM_SHUFFLE(1, 1, 0, 1));
- shufRes = _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage1), shufRes, 0xFE);
- packedCoverage1 = _mm256_castps_si256(_mm256_castpd_ps(_mm256_shuffle_pd(_mm256_castps_pd(shufRes), _mm256_castps_pd(shufRes), 0x01)));
- packedSampleCoverage = _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), _mm256_castsi256_ps(packedCoverage1), 0xFC));
+ hiToLow = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83);
+ shufRes = _mm256_shuffle_ps(_mm256_castsi256_ps(hiToLow),
+ _mm256_castsi256_ps(hiToLow),
+ _MM_SHUFFLE(1, 1, 0, 1));
+ shufRes = _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage1), shufRes, 0xFE);
+ packedCoverage1 = _mm256_castps_si256(_mm256_castpd_ps(
+ _mm256_shuffle_pd(_mm256_castps_pd(shufRes), _mm256_castps_pd(shufRes), 0x01)));
+ packedSampleCoverage = _mm256_castps_si256(_mm256_blend_ps(
+ _mm256_castsi256_ps(packedCoverage0), _mm256_castsi256_ps(packedCoverage1), 0xFC));
}
else
{
packedSampleCoverage = packedCoverage0;
}
- #else
+#else
simdscalari permMask = _simd_set_epi32(0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x4, 0x0);
- // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
+ // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
packedCoverage0 = _mm256_permutevar8x32_epi32(packedCoverage0, permMask);
simdscalari packedSampleCoverage;
- if(T::MultisampleT::numSamples > 8)
+ if (T::MultisampleT::numSamples > 8)
{
permMask = _simd_set_epi32(0x7, 0x7, 0x7, 0x7, 0x4, 0x0, 0x7, 0x7);
// pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
{
packedSampleCoverage = packedCoverage0;
}
- #endif
+#endif
- for(int32_t i = KNOB_SIMD_WIDTH - 1; i >= 0; i--)
+ for (int32_t i = KNOB_SIMD_WIDTH - 1; i >= 0; i--)
{
- // convert packed sample coverage masks into single coverage masks for all samples for each pixel in the 4x2
+ // convert packed sample coverage masks into single coverage masks for all samples for
+ // each pixel in the 4x2
inputMask[i] = _simd_movemask_epi8(packedSampleCoverage);
- if(!T::bForcedSampleCount)
+ if (!T::bForcedSampleCount)
{
// input coverage has to be anded with sample mask if MSAA isn't forced on
inputMask[i] &= sampleMask;
}
}
- INLINE generateInputCoverage(const uint64_t *const coverageMask, simdscalar &inputCoverage, const uint32_t sampleMask)
+ INLINE generateInputCoverage(const uint64_t* const coverageMask,
+ simdscalar& inputCoverage,
+ const uint32_t sampleMask)
{
uint32_t inputMask[KNOB_SIMD_WIDTH];
generateInputCoverage<T, T::InputCoverage>(coverageMask, inputMask, sampleMask);
- inputCoverage = _simd_castsi_ps(_simd_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]));
+ inputCoverage = _simd_castsi_ps(_simd_set_epi32(inputMask[7],
+ inputMask[6],
+ inputMask[5],
+ inputMask[4],
+ inputMask[3],
+ inputMask[2],
+ inputMask[1],
+ inputMask[0]));
}
-
};
-template<typename T>
+template <typename T>
struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
{
- INLINE generateInputCoverage(const uint64_t *const coverageMask, simdscalar &inputCoverage, const uint32_t sampleMask)
+ INLINE generateInputCoverage(const uint64_t* const coverageMask,
+ simdscalar& inputCoverage,
+ const uint32_t sampleMask)
{
// will need to update for avx512
assert(KNOB_SIMD_WIDTH == 8);
- simdscalari vec = _simd_set1_epi32(coverageMask[0]);
+ simdscalari vec = _simd_set1_epi32(coverageMask[0]);
const simdscalari bit = _simd_set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
- vec = _simd_and_si(vec, bit);
- vec = _simd_cmplt_epi32(_simd_setzero_si(), vec);
- vec = _simd_blendv_epi32(_simd_setzero_si(), _simd_set1_epi32(1), vec);
- inputCoverage = _simd_castsi_ps(vec);
+ vec = _simd_and_si(vec, bit);
+ vec = _simd_cmplt_epi32(_simd_setzero_si(), vec);
+ vec = _simd_blendv_epi32(_simd_setzero_si(), _simd_set1_epi32(1), vec);
+ inputCoverage = _simd_castsi_ps(vec);
}
- INLINE generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask)
+ INLINE generateInputCoverage(const uint64_t* const coverageMask,
+ uint32_t (&inputMask)[KNOB_SIMD_WIDTH],
+ const uint32_t sampleMask)
{
- uint32_t simdCoverage = (coverageMask[0] & MASK);
+ uint32_t simdCoverage = (coverageMask[0] & MASK);
static const uint32_t FullCoverageMask = (1 << T::MultisampleT::numSamples) - 1;
- for(int i = 0; i < KNOB_SIMD_WIDTH; i++)
+ for (int i = 0; i < KNOB_SIMD_WIDTH; i++)
{
// set all samples to covered if conservative coverage mask is set for that pixel
inputMask[i] = (((1 << i) & simdCoverage) > 0) ? FullCoverageMask : 0;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Centroid behaves exactly as follows :
-// (1) If all samples in the primitive are covered, the attribute is evaluated at the pixel center (even if the sample pattern does not happen to
+// (1) If all samples in the primitive are covered, the attribute is evaluated at the pixel center
+// (even if the sample pattern does not happen to
// have a sample location there).
-// (2) Else the attribute is evaluated at the first covered sample, in increasing order of sample index, where sample coverage is after ANDing the
+// (2) Else the attribute is evaluated at the first covered sample, in increasing order of sample
+// index, where sample coverage is after ANDing the
// coverage with the SampleMask Rasterizer State.
-// (3) If no samples are covered, such as on helper pixels executed off the bounds of a primitive to fill out 2x2 pixel stamps, the attribute is
-// evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the pixel, then the first sample covered by the
-// SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
+// (3) If no samples are covered, such as on helper pixels executed off the bounds of a primitive to
+// fill out 2x2 pixel stamps, the attribute is
+// evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the
+// pixel, then the first sample covered by the SampleMask Rasterizer State is the evaluation
+// point.Otherwise (full SampleMask), the pixel center is the evaluation point.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-template<typename T>
-INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos,
- const uint64_t *const coverageMask, const uint32_t sampleMask,
- simdscalar const &vXSamplePosUL, simdscalar const &vYSamplePosUL)
+template <typename T>
+INLINE void CalcCentroidPos(SWR_PS_CONTEXT& psContext,
+ const SWR_MULTISAMPLE_POS& samplePos,
+ const uint64_t* const coverageMask,
+ const uint32_t sampleMask,
+ simdscalar const& vXSamplePosUL,
+ simdscalar const& vYSamplePosUL)
{
uint32_t inputMask[KNOB_SIMD_WIDTH];
generateInputCoverage<T, T::InputCoverage>(coverageMask, inputMask, sampleMask);
(inputMask[6] > 0) ? (_BitScanForward(&sampleNum[6], inputMask[6])) : (sampleNum[6] = 0);
(inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
- // look up and set the sample offsets from UL pixel corner for first covered sample
+ // look up and set the sample offsets from UL pixel corner for first covered sample
simdscalar vXSample = _simd_set_ps(samplePos.X(sampleNum[7]),
- samplePos.X(sampleNum[6]),
- samplePos.X(sampleNum[5]),
- samplePos.X(sampleNum[4]),
- samplePos.X(sampleNum[3]),
- samplePos.X(sampleNum[2]),
- samplePos.X(sampleNum[1]),
- samplePos.X(sampleNum[0]));
+ samplePos.X(sampleNum[6]),
+ samplePos.X(sampleNum[5]),
+ samplePos.X(sampleNum[4]),
+ samplePos.X(sampleNum[3]),
+ samplePos.X(sampleNum[2]),
+ samplePos.X(sampleNum[1]),
+ samplePos.X(sampleNum[0]));
simdscalar vYSample = _simd_set_ps(samplePos.Y(sampleNum[7]),
- samplePos.Y(sampleNum[6]),
- samplePos.Y(sampleNum[5]),
- samplePos.Y(sampleNum[4]),
- samplePos.Y(sampleNum[3]),
- samplePos.Y(sampleNum[2]),
- samplePos.Y(sampleNum[1]),
- samplePos.Y(sampleNum[0]));
+ samplePos.Y(sampleNum[6]),
+ samplePos.Y(sampleNum[5]),
+ samplePos.Y(sampleNum[4]),
+ samplePos.Y(sampleNum[3]),
+ samplePos.Y(sampleNum[2]),
+ samplePos.Y(sampleNum[1]),
+ samplePos.Y(sampleNum[0]));
// add sample offset to UL pixel corner
vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
// Case (1) and case (3b) - All samples covered or not covered with full SampleMask
static const simdscalari vFullyCoveredMask = T::MultisampleT::FullSampleMask();
- simdscalari vInputCoveragei = _simd_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]);
+ simdscalari vInputCoveragei = _simd_set_epi32(inputMask[7],
+ inputMask[6],
+ inputMask[5],
+ inputMask[4],
+ inputMask[3],
+ inputMask[2],
+ inputMask[1],
+ inputMask[0]);
simdscalari vAllSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vFullyCoveredMask);
static const simdscalari vZero = _simd_setzero_si();
- const simdscalari vSampleMask = _simd_and_si(_simd_set1_epi32(sampleMask), vFullyCoveredMask);
- simdscalari vNoSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vZero);
- simdscalari vIsFullSampleMask = _simd_cmpeq_epi32(vSampleMask, vFullyCoveredMask);
- simdscalari vCase3b = _simd_and_si(vNoSamplesCovered, vIsFullSampleMask);
+ const simdscalari vSampleMask = _simd_and_si(_simd_set1_epi32(sampleMask), vFullyCoveredMask);
+ simdscalari vNoSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vZero);
+ simdscalari vIsFullSampleMask = _simd_cmpeq_epi32(vSampleMask, vFullyCoveredMask);
+ simdscalari vCase3b = _simd_and_si(vNoSamplesCovered, vIsFullSampleMask);
simdscalari vEvalAtCenter = _simd_or_si(vAllSamplesCovered, vCase3b);
// set the centroid position based on results from above
- psContext.vX.centroid = _simd_blendv_ps(vXSample, psContext.vX.center, _simd_castsi_ps(vEvalAtCenter));
- psContext.vY.centroid = _simd_blendv_ps(vYSample, psContext.vY.center, _simd_castsi_ps(vEvalAtCenter));
+ psContext.vX.centroid =
+ _simd_blendv_ps(vXSample, psContext.vX.center, _simd_castsi_ps(vEvalAtCenter));
+ psContext.vY.centroid =
+ _simd_blendv_ps(vYSample, psContext.vY.center, _simd_castsi_ps(vEvalAtCenter));
// Case (3a) No samples covered and partial sample mask
simdscalari vSomeSampleMaskSamples = _simd_cmplt_epi32(vSampleMask, vFullyCoveredMask);
// sample mask should never be all 0's for this case, but handle it anyways
unsigned long firstCoveredSampleMaskSample = 0;
- (sampleMask > 0) ? (_BitScanForward(&firstCoveredSampleMaskSample, sampleMask)) : (firstCoveredSampleMaskSample = 0);
+ (sampleMask > 0) ? (_BitScanForward(&firstCoveredSampleMaskSample, sampleMask))
+ : (firstCoveredSampleMaskSample = 0);
simdscalari vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
// blend in case 3a pixel locations
- psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
- psContext.vY.centroid = _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
+ psContext.vX.centroid =
+ _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
+ psContext.vY.centroid =
+ _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
}
-INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext,
- const simdscalar &vXSamplePosUL, const simdscalar &vYSamplePosUL)
+INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs,
+ SWR_PS_CONTEXT& psContext,
+ const simdscalar& vXSamplePosUL,
+ const simdscalar& vYSamplePosUL)
{
// evaluate I,J
- psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
- psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
+ psContext.vI.centroid =
+ vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
+ psContext.vJ.centroid =
+ vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
// interpolate 1/w
- psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
+ psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW,
+ coeffs.vBOneOverW,
+ coeffs.vCOneOverW,
+ psContext.vI.centroid,
+ psContext.vJ.centroid);
}
-INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar const &z, float minz, float maxz)
+INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar const& z, float minz, float maxz)
{
const simdscalar minzMask = _simd_cmpge_ps(z, _simd_set1_ps(minz));
const simdscalar maxzMask = _simd_cmple_ps(z, _simd_set1_ps(maxz));
return _simd_movemask_ps(_simd_and_ps(minzMask, maxzMask));
}
-template<typename T>
+template <typename T>
INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
{
// RT has to be single sample if we're in forcedMSAA mode
- if(T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X))
+ if (T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X))
{
return 1;
}
- // unless we're forced to single sample, in which case we run the OM at the sample count of the RT
- else if(T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X))
+ // unless we're forced to single sample, in which case we run the OM at the sample count of the
+ // RT
+ else if (T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X))
{
return GetNumSamples(blendSampleCount);
}
}
}
-inline void SetupBarycentricCoeffs(BarycentricCoeffs *coeffs, const SWR_TRIANGLE_DESC &work)
+inline void SetupBarycentricCoeffs(BarycentricCoeffs* coeffs, const SWR_TRIANGLE_DESC& work)
{
// broadcast scalars
coeffs->vCOneOverW = _simd_broadcast_ss(&work.OneOverW[2]);
}
-inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uint8_t **pDepthBuffer, uint8_t **pStencilBuffer, uint32_t colorHotTileMask, RenderOutputBuffers &renderBuffers)
+inline void SetupRenderBuffers(uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS],
+ uint8_t** pDepthBuffer,
+ uint8_t** pStencilBuffer,
+ uint32_t colorHotTileMask,
+ RenderOutputBuffers& renderBuffers)
{
-
DWORD index;
while (_BitScanForward(&index, colorHotTileMask))
{
if (pStencilBuffer)
{
- *pStencilBuffer = renderBuffers.pStencil;;
+ *pStencilBuffer = renderBuffers.pStencil;
+ ;
}
}
-template<typename T>
-void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos, SWR_TRIANGLE_DESC &work)
+template <typename T>
+void SetupPixelShaderContext(SWR_PS_CONTEXT* psContext,
+ const SWR_MULTISAMPLE_POS& samplePos,
+ SWR_TRIANGLE_DESC& work)
{
- psContext->pAttribs = work.pAttribs;
- psContext->pPerspAttribs = work.pPerspAttribs;
- psContext->frontFace = work.triFlags.frontFacing;
+ psContext->pAttribs = work.pAttribs;
+ psContext->pPerspAttribs = work.pPerspAttribs;
+ psContext->frontFace = work.triFlags.frontFacing;
psContext->renderTargetArrayIndex = work.triFlags.renderTargetArrayIndex;
- // save Ia/Ib/Ic and Ja/Jb/Jc if we need to reevaluate i/j/k in the shader because of pull attribs
+ // save Ia/Ib/Ic and Ja/Jb/Jc if we need to reevaluate i/j/k in the shader because of pull
+ // attribs
psContext->I = work.I;
psContext->J = work.J;
psContext->recipDet = work.recipDet;
- psContext->pRecipW = work.pRecipW;
- psContext->pSamplePosX = samplePos.X();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
- psContext->pSamplePosY = samplePos.Y();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
+ psContext->pRecipW = work.pRecipW;
+ psContext->pSamplePosX =
+ samplePos.X(); // reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
+ psContext->pSamplePosY =
+ samplePos.Y(); // reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
- psContext->sampleIndex = 0;
+ psContext->sampleIndex = 0;
}
-template<typename T, bool IsSingleSample>
-void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos,
- const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
+template <typename T, bool IsSingleSample>
+void CalcCentroid(SWR_PS_CONTEXT* psContext,
+ const SWR_MULTISAMPLE_POS& samplePos,
+ const BarycentricCoeffs& coeffs,
+ const uint64_t* const coverageMask,
+ uint32_t sampleMask)
{
- if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid positions are still different
+ if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid
+ // positions are still different
{
// for 1x case, centroid is pixel center
- psContext->vX.centroid = psContext->vX.center;
- psContext->vY.centroid = psContext->vY.center;
- psContext->vI.centroid = psContext->vI.center;
- psContext->vJ.centroid = psContext->vJ.center;
+ psContext->vX.centroid = psContext->vX.center;
+ psContext->vY.centroid = psContext->vY.center;
+ psContext->vI.centroid = psContext->vI.center;
+ psContext->vJ.centroid = psContext->vJ.center;
psContext->vOneOverW.centroid = psContext->vOneOverW.center;
}
else
}
else
{
- // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
- CalcCentroidPos<T>(*psContext, samplePos, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
+ // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate
+ // coverage 2X'..
+ CalcCentroidPos<T>(*psContext,
+ samplePos,
+ coverageMask,
+ sampleMask,
+ psContext->vX.UL,
+ psContext->vY.UL);
}
CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
}
}
-template<typename T>
+template <typename T>
struct PixelRateZTestLoop
{
- PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
- uint8_t*& depthBuffer, uint8_t*& stencilBuffer, const uint8_t ClipDistanceMask) :
- pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
- samplePos(state.rastState.samplePositions),
- clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
+ PixelRateZTestLoop(DRAW_CONTEXT* DC,
+ uint32_t _workerId,
+ const SWR_TRIANGLE_DESC& Work,
+ const BarycentricCoeffs& Coeffs,
+ const API_STATE& apiState,
+ uint8_t*& depthBuffer,
+ uint8_t*& stencilBuffer,
+ const uint8_t ClipDistanceMask) :
+ pDC(DC),
+ workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
+ samplePos(state.rastState.samplePositions), clipDistanceMask(ClipDistanceMask),
+ pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
INLINE
- uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext,
- const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
+ uint32_t operator()(simdscalar& activeLanes,
+ SWR_PS_CONTEXT& psContext,
+ const CORE_BUCKETS BEDepthBucket,
+ uint32_t currentSimdIn8x8 = 0)
{
- uint32_t statCount = 0;
+ uint32_t statCount = 0;
simdscalar anyDepthSamplePassed = _simd_setzero_ps();
- for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
+ for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
- const uint8_t *pCoverageMask = (uint8_t*)&work.coverageMask[sample];
- vCoverageMask[sample] = _simd_and_ps(activeLanes, _simd_vmask_ps(pCoverageMask[currentSimdIn8x8] & MASK));
+ const uint8_t* pCoverageMask = (uint8_t*)&work.coverageMask[sample];
+ vCoverageMask[sample] =
+ _simd_and_ps(activeLanes, _simd_vmask_ps(pCoverageMask[currentSimdIn8x8] & MASK));
- if(!_simd_movemask_ps(vCoverageMask[sample]))
+ if (!_simd_movemask_ps(vCoverageMask[sample]))
{
- vCoverageMask[sample] = depthPassMask[sample] = stencilPassMask[sample] = _simd_setzero_ps();
+ vCoverageMask[sample] = depthPassMask[sample] = stencilPassMask[sample] =
+ _simd_setzero_ps();
continue;
}
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+ uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
+ uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
{
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+ static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+ "Unsupported depth hot tile format");
- const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+ const simdscalar z = _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
- vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
+ vCoverageMask[sample] =
+ _simd_and_ps(vCoverageMask[sample],
+ _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
// calc I & J per sample
CalcSampleBarycentrics(coeffs, psContext);
- if(psState.writesODepth)
+ if (psState.writesODepth)
{
{
// broadcast and test oDepth(psContext.vZ) written from the PS for each sample
}
else
{
- vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+ vZ[sample] = vplaneps(
+ coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
}
///@todo: perspective correct vs non-perspective correct clipping?
// if clip distances are enabled, we need to interpolate for each sample
- if(clipDistanceMask)
+ if (clipDistanceMask)
{
- uint8_t clipMask = ComputeUserClipMask(clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
+ uint8_t clipMask = ComputeUserClipMask(clipDistanceMask,
+ work.pUserClipBuffer,
+ psContext.vI.sample,
+ psContext.vJ.sample);
- vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(~clipMask));
+ vCoverageMask[sample] =
+ _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(~clipMask));
}
// ZTest for this sample
///@todo Need to uncomment out this bucket.
- //RDTSC_BEGIN(BEDepthBucket, pDC->drawId);
- depthPassMask[sample] = vCoverageMask[sample];
+ // RDTSC_BEGIN(BEDepthBucket, pDC->drawId);
+ depthPassMask[sample] = vCoverageMask[sample];
stencilPassMask[sample] = vCoverageMask[sample];
- depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
- vZ[sample], pDepthSample, vCoverageMask[sample],
- pStencilSample, &stencilPassMask[sample]);
- //RDTSC_END(BEDepthBucket, 0);
+ depthPassMask[sample] = DepthStencilTest(&state,
+ work.triFlags.frontFacing,
+ work.triFlags.viewportIndex,
+ vZ[sample],
+ pDepthSample,
+ vCoverageMask[sample],
+ pStencilSample,
+ &stencilPassMask[sample]);
+ // RDTSC_END(BEDepthBucket, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
- if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
+ if (psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
{
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
- pDepthSample, depthPassMask[sample], vCoverageMask[sample], pStencilSample, stencilPassMask[sample]);
-
- if(!_simd_movemask_ps(depthPassMask[sample]))
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ vZ[sample],
+ pDepthSample,
+ depthPassMask[sample],
+ vCoverageMask[sample],
+ pStencilSample,
+ stencilPassMask[sample]);
+
+ if (!_simd_movemask_ps(depthPassMask[sample]))
{
continue;
}
}
anyDepthSamplePassed = _simd_or_ps(anyDepthSamplePassed, depthPassMask[sample]);
- uint32_t statMask = _simd_movemask_ps(depthPassMask[sample]);
+ uint32_t statMask = _simd_movemask_ps(depthPassMask[sample]);
statCount += _mm_popcnt_u32(statMask);
}
private:
// functor inputs
DRAW_CONTEXT* pDC;
- uint32_t workerId;
+ uint32_t workerId;
- const SWR_TRIANGLE_DESC& work;
- const BarycentricCoeffs& coeffs;
- const API_STATE& state;
- const SWR_PS_STATE& psState;
+ const SWR_TRIANGLE_DESC& work;
+ const BarycentricCoeffs& coeffs;
+ const API_STATE& state;
+ const SWR_PS_STATE& psState;
const SWR_MULTISAMPLE_POS& samplePos;
- const uint8_t clipDistanceMask;
- uint8_t*& pDepthBuffer;
- uint8_t*& pStencilBuffer;
+ const uint8_t clipDistanceMask;
+ uint8_t*& pDepthBuffer;
+ uint8_t*& pStencilBuffer;
};
-INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
+INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT& psContext)
{
// evaluate I,J
- psContext.vI.center = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
- psContext.vJ.center = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.center, psContext.vY.center);
+ psContext.vI.center =
+ vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
+ psContext.vJ.center =
+ vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.center, psContext.vY.center);
psContext.vI.center = _simd_mul_ps(psContext.vI.center, coeffs.vRecipDet);
psContext.vJ.center = _simd_mul_ps(psContext.vJ.center, coeffs.vRecipDet);
// interpolate 1/w
- psContext.vOneOverW.center = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.center, psContext.vJ.center);
+ psContext.vOneOverW.center = vplaneps(coeffs.vAOneOverW,
+ coeffs.vBOneOverW,
+ coeffs.vCOneOverW,
+ psContext.vI.center,
+ psContext.vJ.center);
}
-static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
+static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs,
+ SWR_PS_CONTEXT& psContext)
{
// evaluate I,J
- psContext.vI.sample = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
- psContext.vJ.sample = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.sample, psContext.vY.sample);
+ psContext.vI.sample =
+ vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
+ psContext.vJ.sample =
+ vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.sample, psContext.vY.sample);
psContext.vI.sample = _simd_mul_ps(psContext.vI.sample, coeffs.vRecipDet);
psContext.vJ.sample = _simd_mul_ps(psContext.vJ.sample, coeffs.vRecipDet);
// interpolate 1/w
- psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample);
+ psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW,
+ coeffs.vBOneOverW,
+ coeffs.vCOneOverW,
+ psContext.vI.sample,
+ psContext.vJ.sample);
}
// Merge Output to 4x2 SIMD Tile Format
-INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId)
+INLINE void OutputMerger4x2(DRAW_CONTEXT* pDC,
+ SWR_PS_CONTEXT& psContext,
+ uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS],
+ uint32_t sample,
+ const SWR_BLEND_STATE* pBlendState,
+ const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS],
+ simdscalar& coverageMask,
+ simdscalar const& depthPassMask,
+ uint32_t renderTargetMask,
+ uint32_t workerId)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
- simdvector blendOut;
+ simdvector blendOut;
DWORD rt = 0;
while (_BitScanForward(&rt, renderTargetMask))
{
renderTargetMask &= ~(1 << rt);
- uint8_t *pColorSample = pColorBase[rt] + rasterTileColorOffset;
+ uint8_t* pColorSample = pColorBase[rt] + rasterTileColorOffset;
- const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
+ const SWR_RENDER_TARGET_BLEND_STATE* pRTBlend = &pBlendState->renderTarget[rt];
- SWR_BLEND_CONTEXT blendContext = { 0 };
+ SWR_BLEND_CONTEXT blendContext = {0};
{
// pfnBlendFunc may not update all channels. Initialize with PS output.
/// TODO: move this into the blend JIT.
blendOut = psContext.shaded[rt];
blendContext.pBlendState = pBlendState;
- blendContext.src = &psContext.shaded[rt];
- blendContext.src1 = &psContext.shaded[1];
- blendContext.src0alpha = reinterpret_cast<simdvector *>(&psContext.shaded[0].w);
- blendContext.sampleNum = sample;
- blendContext.pDst = (simdvector *) &pColorSample;
- blendContext.result = &blendOut;
- blendContext.oMask = &psContext.oMask;
- blendContext.pMask = reinterpret_cast<simdscalari *>(&coverageMask);
+ blendContext.src = &psContext.shaded[rt];
+ blendContext.src1 = &psContext.shaded[1];
+ blendContext.src0alpha = reinterpret_cast<simdvector*>(&psContext.shaded[0].w);
+ blendContext.sampleNum = sample;
+ blendContext.pDst = (simdvector*)&pColorSample;
+ blendContext.result = &blendOut;
+ blendContext.oMask = &psContext.oMask;
+ blendContext.pMask = reinterpret_cast<simdscalari*>(&coverageMask);
// Blend outputs and update coverage mask for alpha test
- if(pfnBlendFunc[rt] != nullptr)
+ if (pfnBlendFunc[rt] != nullptr)
{
pfnBlendFunc[rt](&blendContext);
}
}
// Track alpha events
- AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+ AR_EVENT(
+ AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
- // final write mask
+ // final write mask
simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
///@todo can only use maskstore fast path if bpc is 32. Assuming hot tile is RGBA32_FLOAT.
- static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
+ static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
+ "Unsupported hot tile format");
const uint32_t simd = KNOB_SIMD_WIDTH * sizeof(float);
// store with color mask
- if(!pRTBlend->writeDisableRed)
+ if (!pRTBlend->writeDisableRed)
{
_simd_maskstore_ps((float*)pColorSample, outputMask, blendOut.x);
}
- if(!pRTBlend->writeDisableGreen)
+ if (!pRTBlend->writeDisableGreen)
{
_simd_maskstore_ps((float*)(pColorSample + simd), outputMask, blendOut.y);
}
- if(!pRTBlend->writeDisableBlue)
+ if (!pRTBlend->writeDisableBlue)
{
_simd_maskstore_ps((float*)(pColorSample + simd * 2), outputMask, blendOut.z);
}
- if(!pRTBlend->writeDisableAlpha)
+ if (!pRTBlend->writeDisableAlpha)
{
_simd_maskstore_ps((float*)(pColorSample + simd * 3), outputMask, blendOut.w);
}
#if USE_8x2_TILE_BACKEND
// Merge Output to 8x2 SIMD16 Tile Format
-INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
- const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId)
+INLINE void OutputMerger8x2(DRAW_CONTEXT* pDC,
+ SWR_PS_CONTEXT& psContext,
+ uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS],
+ uint32_t sample,
+ const SWR_BLEND_STATE* pBlendState,
+ const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS],
+ simdscalar& coverageMask,
+ simdscalar const& depthPassMask,
+ uint32_t renderTargetMask,
+ bool useAlternateOffset,
+ uint32_t workerId)
{
// type safety guaranteed from template instantiation in BEChooser<>::GetFunc
uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
{
renderTargetMask &= ~(1 << rt);
- const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
+ const SWR_RENDER_TARGET_BLEND_STATE* pRTBlend = &pBlendState->renderTarget[rt];
simdscalar* pColorSample;
- bool hotTileEnable = !pRTBlend->writeDisableAlpha || !pRTBlend->writeDisableRed || !pRTBlend->writeDisableGreen || !pRTBlend->writeDisableBlue;
+ bool hotTileEnable = !pRTBlend->writeDisableAlpha || !pRTBlend->writeDisableRed ||
+ !pRTBlend->writeDisableGreen || !pRTBlend->writeDisableBlue;
if (hotTileEnable)
{
- pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
- blendSrc[0] = pColorSample[0];
- blendSrc[1] = pColorSample[2];
- blendSrc[2] = pColorSample[4];
- blendSrc[3] = pColorSample[6];
+ pColorSample = reinterpret_cast<simdscalar*>(pColorBase[rt] + rasterTileColorOffset);
+ blendSrc[0] = pColorSample[0];
+ blendSrc[1] = pColorSample[2];
+ blendSrc[2] = pColorSample[4];
+ blendSrc[3] = pColorSample[6];
}
else
{
pColorSample = nullptr;
}
- SWR_BLEND_CONTEXT blendContext = { 0 };
+ SWR_BLEND_CONTEXT blendContext = {0};
{
// pfnBlendFunc may not update all channels. Initialize with PS output.
/// TODO: move this into the blend JIT.
blendOut = psContext.shaded[rt];
- blendContext.pBlendState = pBlendState;
- blendContext.src = &psContext.shaded[rt];
- blendContext.src1 = &psContext.shaded[1];
- blendContext.src0alpha = reinterpret_cast<simdvector *>(&psContext.shaded[0].w);
- blendContext.sampleNum = sample;
- blendContext.pDst = &blendSrc;
- blendContext.result = &blendOut;
- blendContext.oMask = &psContext.oMask;
- blendContext.pMask = reinterpret_cast<simdscalari *>(&coverageMask);
+ blendContext.pBlendState = pBlendState;
+ blendContext.src = &psContext.shaded[rt];
+ blendContext.src1 = &psContext.shaded[1];
+ blendContext.src0alpha = reinterpret_cast<simdvector*>(&psContext.shaded[0].w);
+ blendContext.sampleNum = sample;
+ blendContext.pDst = &blendSrc;
+ blendContext.result = &blendOut;
+ blendContext.oMask = &psContext.oMask;
+ blendContext.pMask = reinterpret_cast<simdscalari*>(&coverageMask);
// Blend outputs and update coverage mask for alpha test
- if(pfnBlendFunc[rt] != nullptr)
+ if (pfnBlendFunc[rt] != nullptr)
{
pfnBlendFunc[rt](&blendContext);
}
}
// Track alpha events
- AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+ AR_EVENT(
+ AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
- // final write mask
+ // final write mask
simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
///@todo can only use maskstore fast path if bpc is 32. Assuming hot tile is RGBA32_FLOAT.
- static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
+ static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
+ "Unsupported hot tile format");
// store with color mask
if (!pRTBlend->writeDisableRed)
{
- _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[0]), outputMask, blendOut.x);
+ _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[0]), outputMask, blendOut.x);
}
if (!pRTBlend->writeDisableGreen)
{
- _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[2]), outputMask, blendOut.y);
+ _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[2]), outputMask, blendOut.y);
}
if (!pRTBlend->writeDisableBlue)
{
- _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[4]), outputMask, blendOut.z);
+ _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[4]), outputMask, blendOut.z);
}
if (!pRTBlend->writeDisableAlpha)
{
- _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[6]), outputMask, blendOut.w);
+ _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[6]), outputMask, blendOut.w);
}
}
}
#endif
-template<typename T>
-void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <typename T>
+void BackendPixelRate(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t x,
+ uint32_t y,
+ SWR_TRIANGLE_DESC& work,
+ RenderOutputBuffers& renderBuffers)
{
- ///@todo: Need to move locals off stack to prevent __chkstk's from being generated for the backend
+ ///@todo: Need to move locals off stack to prevent __chkstk's from being generated for the
+ /// backend
RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
- const API_STATE &state = GetApiState(pDC);
+ const API_STATE& state = GetApiState(pDC);
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
- SWR_CONTEXT *pContext = pDC->pContext;
- void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+ SWR_CONTEXT* pContext = pDC->pContext;
+ void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
- SWR_PS_CONTEXT psContext;
+ SWR_PS_CONTEXT psContext;
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
+ SetupRenderBuffers(psContext.pColorBuffer,
+ &pDepthBuffer,
+ &pStencilBuffer,
+ state.colorHottileEnable,
+ renderBuffers);
RDTSC_END(BESetup, 0);
- PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.backendState.clipDistanceMask);
+ PixelRateZTestLoop<T> PixelRateZTest(pDC,
+ workerId,
+ work,
+ coeffs,
+ state,
+ pDepthBuffer,
+ pStencilBuffer,
+ state.backendState.clipDistanceMask);
- psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
+ psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
- for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
+ for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
- psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
+ psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
- for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
+ for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
{
#if USE_8x2_TILE_BACKEND
const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
#endif
simdscalar activeLanes;
- if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
+ if (!(work.anyCoveredSamples & MASK))
+ {
+ goto Endtile;
+ };
activeLanes = _simd_vmask_ps(work.anyCoveredSamples & MASK);
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
- const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
+ const uint64_t* pCoverageMask =
+ (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+ ? &work.innerCoverageMask
+ : &work.coverageMask[0];
- generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
+ generateInputCoverage<T, T::InputCoverage>(
+ pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, false>(
+ &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
RDTSC_END(BEBarycentric, 0);
- if(T::bForcedSampleCount)
+ if (T::bForcedSampleCount)
{
- // candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
- const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
- activeLanes = _simd_and_ps(activeLanes, vSampleMask);
+ // candidate pixels (that passed coverage) will cause shader invocation if any bits
+ // in the samplemask are set
+ const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(
+ _simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
+ activeLanes = _simd_and_ps(activeLanes, vSampleMask);
}
// Early-Z?
- if(T::bCanEarlyZ && !T::bForcedSampleCount)
+ if (T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
}
// if we have no covered samples that passed depth at this point, go to next tile
- if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
+ if (!_simd_movemask_ps(activeLanes))
+ {
+ goto Endtile;
+ };
- if(state.psState.usesSourceDepth)
+ if (state.psState.usesSourceDepth)
{
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
// interpolate and quantize z
- psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+ psContext.vZ = vplaneps(
+ coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 0);
}
// pixels that are currently active
psContext.activeMask = _simd_castps_si(activeLanes);
- psContext.oMask = T::MultisampleT::FullSampleMask();
+ psContext.oMask = T::MultisampleT::FullSampleMask();
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
AR_EVENT(PSStats(psContext.stats.numInstExecuted));
// update active lanes to remove any discarded or oMask'd pixels
- activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
- if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
+ activeLanes = _simd_castsi_ps(_simd_and_si(
+ psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
+ if (!_simd_movemask_ps(activeLanes))
+ {
+ goto Endtile;
+ };
// late-Z
- if(!T::bCanEarlyZ && !T::bForcedSampleCount)
+ if (!T::bCanEarlyZ && !T::bForcedSampleCount)
{
uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
UPDATE_STAT_BE(DepthPassCount, depthPassCount);
AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
}
- // if we have no covered samples that passed depth at this point, skip OM and go to next tile
- if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
+ // if we have no covered samples that passed depth at this point, skip OM and go to next
+ // tile
+ if (!_simd_movemask_ps(activeLanes))
+ {
+ goto Endtile;
+ };
// output merger
// loop over all samples, broadcasting the results of the PS to all passing pixels
- for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++)
+ for (uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount);
+ sample++)
{
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
- // center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
- uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
+ // center pattern does a single coverage/depth/stencil test, standard pattern tests
+ // all samples
+ uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
simdscalar coverageMask, depthMask;
- if(T::bForcedSampleCount)
+ if (T::bForcedSampleCount)
{
coverageMask = depthMask = activeLanes;
}
{
coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
- if(!_simd_movemask_ps(depthMask))
+ if (!_simd_movemask_ps(depthMask))
{
// stencil should already have been written in early/lateZ tests
RDTSC_END(BEOutputMerger, 0);
continue;
}
}
-
+
// broadcast the results of the PS to all passing pixels
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
-#else // USE_8x2_TILE_BACKEND
- OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId);
+ OutputMerger8x2(pDC,
+ psContext,
+ psContext.pColorBuffer,
+ sample,
+ &state.blendState,
+ state.pfnBlendFunc,
+ coverageMask,
+ depthMask,
+ state.psState.renderTargetMask,
+ useAlternateOffset,
+ workerId);
+#else // USE_8x2_TILE_BACKEND
+ OutputMerger4x2(pDC,
+ psContext,
+ psContext.pColorBuffer,
+ sample,
+ &state.blendState,
+ state.pfnBlendFunc,
+ coverageMask,
+ depthMask,
+ state.psState.renderTargetMask,
+ workerId);
#endif // USE_8x2_TILE_BACKEND
- if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
+ if (!state.psState.forceEarlyZ && !T::bForcedSampleCount)
{
- uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
-
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
- pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
+ uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
+ uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ PixelRateZTest.vZ[coverageSampleNum],
+ pDepthSample,
+ depthMask,
+ coverageMask,
+ pStencilSample,
+ PixelRateZTest.stencilPassMask[coverageSampleNum]);
}
RDTSC_END(BEOutputMerger, 0);
}
-Endtile:
+ Endtile:
RDTSC_BEGIN(BEEndTile, pDC->drawId);
- for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
+ for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
{
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
- if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+ if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
- DWORD rt;
+ DWORD rt;
uint32_t rtMask = state.colorHottileEnable;
while (_BitScanForward(&rt, rtMask))
{
rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] +=
+ (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
- DWORD rt;
+ DWORD rt;
uint32_t rtMask = state.colorHottileEnable;
while (_BitScanForward(&rt, rtMask))
{
rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] +=
+ (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
#endif
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+ pStencilBuffer +=
+ (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
RDTSC_END(BEEndTile, 0);
- psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
+ psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
}
- psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
+ psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
RDTSC_END(BEPixelRateBackend, 0);
}
-template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
- uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
- >
+template <uint32_t sampleCountT = SWR_MULTISAMPLE_1X,
+ uint32_t isCenter = 0,
+ uint32_t coverage = 0,
+ uint32_t centroid = 0,
+ uint32_t forced = 0,
+ uint32_t canEarlyZ = 0
+ >
struct SwrBackendTraits
{
- static const bool bIsCenterPattern = (isCenter == 1);
- static const uint32_t InputCoverage = coverage;
- static const bool bCentroidPos = (centroid == 1);
- static const bool bForcedSampleCount = (forced == 1);
- static const bool bCanEarlyZ = (canEarlyZ == 1);
+ static const bool bIsCenterPattern = (isCenter == 1);
+ static const uint32_t InputCoverage = coverage;
+ static const bool bCentroidPos = (centroid == 1);
+ static const bool bForcedSampleCount = (forced == 1);
+ static const bool bCanEarlyZ = (canEarlyZ == 1);
typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
};
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-* operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ * operations.
+ *
+ ******************************************************************************/
#include <smmintrin.h>
#include <algorithm>
-template<typename T>
-void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <typename T>
+void BackendSampleRate(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t x,
+ uint32_t y,
+ SWR_TRIANGLE_DESC& work,
+ RenderOutputBuffers& renderBuffers)
{
RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
- void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
- const API_STATE &state = GetApiState(pDC);
+ void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+ const API_STATE& state = GetApiState(pDC);
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
- SWR_PS_CONTEXT psContext;
+ SWR_PS_CONTEXT psContext;
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
+ SetupRenderBuffers(psContext.pColorBuffer,
+ &pDepthBuffer,
+ &pStencilBuffer,
+ state.colorHottileEnable,
+ renderBuffers);
RDTSC_END(BESetup, 0);
- psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
+ psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
- psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
+ psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
#endif
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
- const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
+ const uint64_t* pCoverageMask =
+ (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+ ? &work.innerCoverageMask
+ : &work.coverageMask[0];
- generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
+ generateInputCoverage<T, T::InputCoverage>(
+ pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, false>(
+ &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
RDTSC_END(BEBarycentric, 0);
if (coverageMask)
{
// offset depth/stencil buffers current sample
- uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
- uint8_t *pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+ uint8_t* pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
+ uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
{
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+ static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+ "Unsupported depth hot tile format");
- const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+ const simdscalar z =
+ _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
CalcSampleBarycentrics(coeffs, psContext);
// interpolate and quantize z
- psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+ psContext.vZ = vplaneps(coeffs.vZa,
+ coeffs.vZb,
+ coeffs.vZc,
+ psContext.vI.sample,
+ psContext.vJ.sample);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 0);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
{
- coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
+ coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
+ work.pUserClipBuffer,
+ psContext.vI.sample,
+ psContext.vJ.sample);
}
- simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
- simdscalar depthPassMask = vCoverageMask;
+ simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
+ simdscalar depthPassMask = vCoverageMask;
simdscalar stencilPassMask = vCoverageMask;
// Early-Z?
if (T::bCanEarlyZ)
{
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
- psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+ depthPassMask = DepthStencilTest(&state,
+ work.triFlags.frontFacing,
+ work.triFlags.viewportIndex,
+ psContext.vZ,
+ pDepthSample,
+ vCoverageMask,
+ pStencilSample,
+ &stencilPassMask);
+ AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
+ _simd_movemask_ps(stencilPassMask),
+ _simd_movemask_ps(vCoverageMask)));
RDTSC_END(BEEarlyDepthTest, 0);
// early-exit if no samples passed depth or earlyZ is forced on.
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
{
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
- pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ psContext.vZ,
+ pDepthSample,
+ depthPassMask,
+ vCoverageMask,
+ pStencilSample,
+ stencilPassMask);
if (!_simd_movemask_ps(depthPassMask))
{
}
psContext.sampleIndex = sample;
- psContext.activeMask = _simd_castps_si(vCoverageMask);
+ psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
if (!T::bCanEarlyZ)
{
RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
- psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
- AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+ depthPassMask = DepthStencilTest(&state,
+ work.triFlags.frontFacing,
+ work.triFlags.viewportIndex,
+ psContext.vZ,
+ pDepthSample,
+ vCoverageMask,
+ pStencilSample,
+ &stencilPassMask);
+ AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
+ _simd_movemask_ps(stencilPassMask),
+ _simd_movemask_ps(vCoverageMask)));
RDTSC_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
// need to call depth/stencil write for stencil write
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
- pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ psContext.vZ,
+ pDepthSample,
+ depthPassMask,
+ vCoverageMask,
+ pStencilSample,
+ stencilPassMask);
work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
continue;
}
}
- uint32_t statMask = _simd_movemask_ps(depthPassMask);
+ uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
+ OutputMerger8x2(pDC,
+ psContext,
+ psContext.pColorBuffer,
+ sample,
+ &state.blendState,
+ state.pfnBlendFunc,
+ vCoverageMask,
+ depthPassMask,
+ state.psState.renderTargetMask,
+ useAlternateOffset,
+ workerId);
#else
- OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId);
+ OutputMerger4x2(pDC,
+ psContext,
+ psContext.pColorBuffer,
+ sample,
+ &state.blendState,
+ state.pfnBlendFunc,
+ vCoverageMask,
+ depthPassMask,
+ state.psState.renderTargetMask,
+ workerId);
#endif
// do final depth write after all pixel kills
if (!state.psState.forceEarlyZ)
{
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
- pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ psContext.vZ,
+ pDepthSample,
+ depthPassMask,
+ vCoverageMask,
+ pStencilSample,
+ stencilPassMask);
}
RDTSC_END(BEOutputMerger, 0);
}
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
- DWORD rt;
+ DWORD rt;
uint32_t rtMask = state.colorHottileEnable;
while (_BitScanForward(&rt, rtMask))
{
rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] +=
+ (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
while (_BitScanForward(&rt, rtMask))
{
rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] +=
+ (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
#endif
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+ pStencilBuffer +=
+ (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
RDTSC_END(BEEndTile, 0);
- psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
+ psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
}
- psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
+ psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
{
switch (tArg)
{
- case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
+ case SWR_BACKEND_MSAA_SAMPLE_RATE:
+ return BackendSampleRate<SwrBackendTraits<ArgsT...>>;
+ break;
case SWR_BACKEND_SINGLE_SAMPLE:
case SWR_BACKEND_MSAA_PIXEL_RATE:
SWR_ASSERT(0 && "Invalid backend func\n");
{
switch (tArg)
{
- case SWR_INPUT_COVERAGE_NONE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
- case SWR_INPUT_COVERAGE_NORMAL: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
- case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
+ case SWR_INPUT_COVERAGE_NONE:
+ return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+ remainingArgs...);
+ break;
+ case SWR_INPUT_COVERAGE_NORMAL:
+ return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(
+ remainingArgs...);
+ break;
+ case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE:
+ return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(
+ remainingArgs...);
+ break;
default:
SWR_ASSERT(0 && "Invalid sample pattern\n");
- return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
+ return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+ remainingArgs...);
break;
}
}
{
switch (tArg)
{
- case SWR_MULTISAMPLE_1X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_2X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_4X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_8X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_16X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
+ case SWR_MULTISAMPLE_1X:
+ return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_2X:
+ return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_4X:
+ return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_8X:
+ return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_16X:
+ return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...);
+ break;
default:
SWR_ASSERT(0 && "Invalid sample count\n");
return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
}
};
-void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
+void InitBackendSampleFuncTable(
+ PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
{
- for (uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
+ for (uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT;
+ sampleCount++)
{
for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
{
for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[sampleCount][inputCoverage][centroid][canEarlyZ] =
- BEChooserSampleRate<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
- (centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
+ BEChooserSampleRate<>::GetFunc(
+ (SWR_MULTISAMPLE_COUNT)sampleCount,
+ false,
+ (SWR_INPUT_COVERAGE)inputCoverage,
+ (centroid > 0),
+ false,
+ (canEarlyZ > 0),
+ (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
}
}
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-* operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ * operations.
+ *
+ ******************************************************************************/
#include <smmintrin.h>
#include <algorithm>
-template<typename T>
-void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <typename T>
+void BackendSingleSample(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t x,
+ uint32_t y,
+ SWR_TRIANGLE_DESC& work,
+ RenderOutputBuffers& renderBuffers)
{
RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
RDTSC_BEGIN(BESetup, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
- const API_STATE &state = GetApiState(pDC);
+ const API_STATE& state = GetApiState(pDC);
BarycentricCoeffs coeffs;
SetupBarycentricCoeffs(&coeffs, work);
- SWR_PS_CONTEXT psContext;
+ SWR_PS_CONTEXT psContext;
const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
SetupPixelShaderContext<T>(&psContext, samplePos, work);
uint8_t *pDepthBuffer, *pStencilBuffer;
- SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
+ SetupRenderBuffers(psContext.pColorBuffer,
+ &pDepthBuffer,
+ &pStencilBuffer,
+ state.colorHottileEnable,
+ renderBuffers);
RDTSC_END(BESetup, 1);
- psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
+ psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
{
- psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
+ psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
{
if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
{
- static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+ static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+ "Unsupported depth hot tile format");
- const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthBuffer));
+ const simdscalar z =
+ _simd_load_ps(reinterpret_cast<const float*>(pDepthBuffer));
const float minz = state.depthBoundsState.depthBoundsTestMinValue;
const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
{
- const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
+ const uint64_t* pCoverageMask =
+ (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+ ? &work.innerCoverageMask
+ : &work.coverageMask[0];
- generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
+ generateInputCoverage<T, T::InputCoverage>(
+ pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
}
RDTSC_BEGIN(BEBarycentric, pDC->drawId);
CalcPixelBarycentrics(coeffs, psContext);
- CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
+ CalcCentroid<T, true>(
+ &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
// interpolate and quantize z
- psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+ psContext.vZ = vplaneps(
+ coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
RDTSC_END(BEBarycentric, 1);
// interpolate user clip distance if available
if (state.backendState.clipDistanceMask)
{
- coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.center, psContext.vJ.center);
+ coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
+ work.pUserClipBuffer,
+ psContext.vI.center,
+ psContext.vJ.center);
}
- simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
- simdscalar depthPassMask = vCoverageMask;
+ simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
+ simdscalar depthPassMask = vCoverageMask;
simdscalar stencilPassMask = vCoverageMask;
// Early-Z?
if (T::bCanEarlyZ)
{
RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
- psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
- AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+ depthPassMask = DepthStencilTest(&state,
+ work.triFlags.frontFacing,
+ work.triFlags.viewportIndex,
+ psContext.vZ,
+ pDepthBuffer,
+ vCoverageMask,
+ pStencilBuffer,
+ &stencilPassMask);
+ AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
+ _simd_movemask_ps(stencilPassMask),
+ _simd_movemask_ps(vCoverageMask)));
RDTSC_END(BEEarlyDepthTest, 0);
// early-exit if no pixels passed depth or earlyZ is forced on
if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
{
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
- pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ psContext.vZ,
+ pDepthBuffer,
+ depthPassMask,
+ vCoverageMask,
+ pStencilBuffer,
+ stencilPassMask);
if (!_simd_movemask_ps(depthPassMask))
{
}
psContext.sampleIndex = 0;
- psContext.activeMask = _simd_castps_si(vCoverageMask);
+ psContext.activeMask = _simd_castps_si(vCoverageMask);
// execute pixel shader
RDTSC_BEGIN(BEPixelShader, pDC->drawId);
if (!T::bCanEarlyZ)
{
RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
- depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
- psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
- AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+ depthPassMask = DepthStencilTest(&state,
+ work.triFlags.frontFacing,
+ work.triFlags.viewportIndex,
+ psContext.vZ,
+ pDepthBuffer,
+ vCoverageMask,
+ pStencilBuffer,
+ &stencilPassMask);
+ AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
+ _simd_movemask_ps(stencilPassMask),
+ _simd_movemask_ps(vCoverageMask)));
RDTSC_END(BELateDepthTest, 0);
if (!_simd_movemask_ps(depthPassMask))
{
// need to call depth/stencil write for stencil write
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
- pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ psContext.vZ,
+ pDepthBuffer,
+ depthPassMask,
+ vCoverageMask,
+ pStencilBuffer,
+ stencilPassMask);
goto Endtile;
}
- } else {
+ }
+ else
+ {
// for early z, consolidate discards from shader
// into depthPassMask
depthPassMask = _simd_and_ps(depthPassMask, vCoverageMask);
}
- uint32_t statMask = _simd_movemask_ps(depthPassMask);
+ uint32_t statMask = _simd_movemask_ps(depthPassMask);
uint32_t statCount = _mm_popcnt_u32(statMask);
UPDATE_STAT_BE(DepthPassCount, statCount);
// output merger
RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
#if USE_8x2_TILE_BACKEND
- OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
+ OutputMerger8x2(pDC,
+ psContext,
+ psContext.pColorBuffer,
+ 0,
+ &state.blendState,
+ state.pfnBlendFunc,
+ vCoverageMask,
+ depthPassMask,
+ state.psState.renderTargetMask,
+ useAlternateOffset,
+ workerId);
#else
- OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId);
+ OutputMerger4x2(pDC,
+ psContext,
+ psContext.pColorBuffer,
+ 0,
+ &state.blendState,
+ state.pfnBlendFunc,
+ vCoverageMask,
+ depthPassMask,
+ state.psState.renderTargetMask,
+ workerId,
+ workerId);
#endif
// do final depth write after all pixel kills
if (!state.psState.forceEarlyZ)
{
- DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
- pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
+ DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+ &state.depthStencilState,
+ work.triFlags.frontFacing,
+ psContext.vZ,
+ pDepthBuffer,
+ depthPassMask,
+ vCoverageMask,
+ pStencilBuffer,
+ stencilPassMask);
}
RDTSC_END(BEOutputMerger, 0);
}
-Endtile:
+ Endtile:
RDTSC_BEGIN(BEEndTile, pDC->drawId);
work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
- if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+ if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
{
work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
}
#if USE_8x2_TILE_BACKEND
if (useAlternateOffset)
{
- DWORD rt;
+ DWORD rt;
uint32_t rtMask = state.colorHottileEnable;
- while(_BitScanForward(&rt, rtMask))
+ while (_BitScanForward(&rt, rtMask))
{
rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] +=
+ (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
}
#else
while (_BitScanForward(&rt, rtMask))
{
rtMask &= ~(1 << rt);
- psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+ psContext.pColorBuffer[rt] +=
+ (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
}
#endif
pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
- pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+ pStencilBuffer +=
+ (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
RDTSC_END(BEEndTile, 0);
- psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
+ psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
}
- psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
+ psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
}
// Last Arg Terminator
static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
{
- switch(tArg)
+ switch (tArg)
{
- case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break;
+ case SWR_BACKEND_SINGLE_SAMPLE:
+ return BackendSingleSample<SwrBackendTraits<ArgsT...>>;
+ break;
case SWR_BACKEND_MSAA_PIXEL_RATE:
case SWR_BACKEND_MSAA_SAMPLE_RATE:
default:
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
{
- switch(tArg)
+ switch (tArg)
{
- case SWR_INPUT_COVERAGE_NONE: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
- case SWR_INPUT_COVERAGE_NORMAL: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
- case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
+ case SWR_INPUT_COVERAGE_NONE:
+ return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+ remainingArgs...);
+ break;
+ case SWR_INPUT_COVERAGE_NORMAL:
+ return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(
+ remainingArgs...);
+ break;
+ case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE:
+ return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(
+ remainingArgs...);
+ break;
default:
- SWR_ASSERT(0 && "Invalid sample pattern\n");
- return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
- break;
+ SWR_ASSERT(0 && "Invalid sample pattern\n");
+ return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+ remainingArgs...);
+ break;
}
}
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
{
- switch(tArg)
+ switch (tArg)
{
- case SWR_MULTISAMPLE_1X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_2X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_4X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_8X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
- case SWR_MULTISAMPLE_16X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
+ case SWR_MULTISAMPLE_1X:
+ return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_2X:
+ return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_4X:
+ return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_8X:
+ return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...);
+ break;
+ case SWR_MULTISAMPLE_16X:
+ return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...);
+ break;
default:
- SWR_ASSERT(0 && "Invalid sample count\n");
- return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
- break;
+ SWR_ASSERT(0 && "Invalid sample count\n");
+ return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+ break;
}
}
template <typename... TArgsT>
static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
{
- if(tArg == true)
+ if (tArg == true)
{
return BEChooserSingleSample<ArgsT..., 1>::GetFunc(remainingArgs...);
}
void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2])
{
- for(uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
+ for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
{
- for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
+ for (uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
{
- for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
+ for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
{
table[inputCoverage][isCentroid][canEarlyZ] =
- BEChooserSingleSample<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
- (isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
+ BEChooserSingleSample<>::GetFunc(SWR_MULTISAMPLE_1X,
+ false,
+ (SWR_INPUT_COVERAGE)inputCoverage,
+ (isCentroid > 0),
+ false,
+ (canEarlyZ > 0),
+ SWR_BACKEND_SINGLE_SAMPLE);
}
}
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file binner.cpp
-*
-* @brief Implementation for the macrotile binner
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file binner.cpp
+ *
+ * @brief Implementation for the macrotile binner
+ *
+ ******************************************************************************/
#include "binner.h"
#include "context.h"
// Function Prototype
template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupLinesImpl(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- Float<SIMD_T> recipW[],
- uint32_t primMask,
- Integer<SIMD_T> const &primID,
- Integer<SIMD_T> const &viewportIdx,
- Integer<SIMD_T> const &rtIdx);
+void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ Vec4<SIMD_T> prim[],
+ Float<SIMD_T> recipW[],
+ uint32_t primMask,
+ Integer<SIMD_T> const& primID,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx);
template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupPointsImpl(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- uint32_t primMask,
- Integer<SIMD_T> const &primID,
- Integer<SIMD_T> const &viewportIdx,
- Integer<SIMD_T> const &rtIdx);
+void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ Vec4<SIMD_T> prim[],
+ uint32_t primMask,
+ Integer<SIMD_T> const& primID,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx);
//////////////////////////////////////////////////////////////////////////
/// @brief Processes attributes for the backend based on linkage mask and
/// @param pLinkageMap - maps VS attribute slot to PS slot
/// @param triIndex - Triangle to process attributes for
/// @param pBuffer - Output result
-template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT, typename IsDegenerate>
+template <typename NumVertsT,
+ typename IsSwizzledT,
+ typename HasConstantInterpT,
+ typename IsDegenerate>
INLINE void ProcessAttributes(
- DRAW_CONTEXT *pDC,
- PA_STATE&pa,
- uint32_t triIndex,
- uint32_t primId,
- float *pBuffer)
+ DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t triIndex, uint32_t primId, float* pBuffer)
{
static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
// Conservative Rasterization requires degenerate tris to have constant attribute interpolation
- uint32_t constantInterpMask = IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
+ uint32_t constantInterpMask =
+ IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
- const PRIMITIVE_TOPOLOGY topo = pa.binTopology;
+ const PRIMITIVE_TOPOLOGY topo = pa.binTopology;
static const float constTable[3][4] = {
- { 0.0f, 0.0f, 0.0f, 0.0f },
- { 0.0f, 0.0f, 0.0f, 1.0f },
- { 1.0f, 1.0f, 1.0f, 1.0f }
- };
+ {0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f}};
for (uint32_t i = 0; i < backendState.numAttributes; ++i)
{
{
SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
inputSlot = backendState.vertexAttribOffset + attribSwizzle.sourceAttrib;
-
}
else
{
inputSlot = backendState.vertexAttribOffset + i;
}
- simd4scalar attrib[3]; // triangle attribs (always 4 wide)
- float* pAttribStart = pBuffer;
+ simd4scalar attrib[3]; // triangle attribs (always 4 wide)
+ float* pAttribStart = pBuffer;
if (HasConstantInterpT::value || IsDegenerate::value)
{
if (CheckBit(constantInterpMask, i))
{
- uint32_t vid;
- uint32_t adjustedTriIndex;
- static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 };
- static const int32_t quadProvokingTri[2][4] = { { 0, 0, 0, 1 },{ 0, -1, 0, 0 } };
- static const uint32_t quadProvokingVertex[2][4] = { { 0, 1, 2, 2 },{ 0, 1, 1, 2 } };
- static const int32_t qstripProvokingTri[2][4] = { { 0, 0, 0, 1 },{ -1, 0, 0, 0 } };
- static const uint32_t qstripProvokingVertex[2][4] = { { 0, 1, 2, 1 },{ 0, 0, 2, 1 } };
-
- switch (topo) {
+ uint32_t vid;
+ uint32_t adjustedTriIndex;
+ static const uint32_t tristripProvokingVertex[] = {0, 2, 1};
+ static const int32_t quadProvokingTri[2][4] = {{0, 0, 0, 1}, {0, -1, 0, 0}};
+ static const uint32_t quadProvokingVertex[2][4] = {{0, 1, 2, 2}, {0, 1, 1, 2}};
+ static const int32_t qstripProvokingTri[2][4] = {{0, 0, 0, 1}, {-1, 0, 0, 0}};
+ static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}};
+
+ switch (topo)
+ {
case TOP_QUAD_LIST:
adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex];
- vid = quadProvokingVertex[triIndex & 1][provokingVertex];
+ vid = quadProvokingVertex[triIndex & 1][provokingVertex];
break;
case TOP_QUAD_STRIP:
adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex];
- vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
+ vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
break;
case TOP_TRIANGLE_STRIP:
adjustedTriIndex = triIndex;
- vid = (triIndex & 1)
- ? tristripProvokingVertex[provokingVertex]
- : provokingVertex;
+ vid =
+ (triIndex & 1) ? tristripProvokingVertex[provokingVertex] : provokingVertex;
break;
default:
adjustedTriIndex = triIndex;
- vid = provokingVertex;
+ vid = provokingVertex;
break;
}
}
}
-typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
+typedef void (*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
struct ProcessAttributesChooser
{
}
};
-PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp, bool IsDegenerate = false)
+PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts,
+ bool IsSwizzled,
+ bool HasConstantInterp,
+ bool IsDegenerate = false)
{
- return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
+ return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(
+ IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
}
//////////////////////////////////////////////////////////////////////////
/// @param primIndex - primitive index to process
/// @param clipDistMask - mask of enabled clip distances
/// @param pUserClipBuffer - buffer to store results
-template<uint32_t NumVerts>
-void ProcessUserClipDist(const SWR_BACKEND_STATE& state, PA_STATE& pa, uint32_t primIndex, float *pRecipW, float* pUserClipBuffer)
+template <uint32_t NumVerts>
+void ProcessUserClipDist(const SWR_BACKEND_STATE& state,
+ PA_STATE& pa,
+ uint32_t primIndex,
+ float* pRecipW,
+ float* pUserClipBuffer)
{
- DWORD clipDist;
+ DWORD clipDist;
uint32_t clipDistMask = state.clipDistanceMask;
while (_BitScanForward(&clipDist, clipDistMask))
{
clipDistMask &= ~(1 << clipDist);
uint32_t clipSlot = clipDist >> 2;
uint32_t clipComp = clipDist & 0x3;
- uint32_t clipAttribSlot = clipSlot == 0 ?
- state.vertexClipCullOffset : state.vertexClipCullOffset + 1;
+ uint32_t clipAttribSlot =
+ clipSlot == 0 ? state.vertexClipCullOffset : state.vertexClipCullOffset + 1;
simd4scalar primClipDist[3];
pa.AssembleSingle(clipAttribSlot, primIndex, primClipDist);
}
INLINE
-void TransposeVertices(simd4scalar(&dst)[8], const simdscalar &src0, const simdscalar &src1, const simdscalar &src2)
+void TransposeVertices(simd4scalar (&dst)[8],
+ const simdscalar& src0,
+ const simdscalar& src1,
+ const simdscalar& src2)
{
vTranspose3x8(dst, src0, src1, src2);
}
INLINE
-void TransposeVertices(simd4scalar(&dst)[16], const simd16scalar &src0, const simd16scalar &src1, const simd16scalar &src2)
+void TransposeVertices(simd4scalar (&dst)[16],
+ const simd16scalar& src0,
+ const simd16scalar& src1,
+ const simd16scalar& src2)
{
- vTranspose4x16(reinterpret_cast<simd16scalar(&)[4]>(dst), src0, src1, src2, _simd16_setzero_ps());
+ vTranspose4x16(
+ reinterpret_cast<simd16scalar(&)[4]>(dst), src0, src1, src2, _simd16_setzero_ps());
}
-
#if KNOB_ENABLE_EARLY_RAST
#define ER_SIMD_TILE_X_DIM (1 << ER_SIMD_TILE_X_SHIFT)
#define ER_SIMD_TILE_Y_DIM (1 << ER_SIMD_TILE_Y_SHIFT)
-
-template<typename SIMD_T>
+template <typename SIMD_T>
struct EarlyRastHelper
{
};
-template<>
+template <>
struct EarlyRastHelper<SIMD256>
{
static SIMD256::Integer InitShiftCntrl()
};
#if USE_SIMD16_FRONTEND
-template<>
+template <>
struct EarlyRastHelper<SIMD512>
{
static SIMD512::Integer InitShiftCntrl()
/// @param oneTileMask - defines triangles for ER to work on
/// (tris that fit into ER tile)
template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
-uint32_t SIMDCALL EarlyRasterizer(
- SIMDBBOX_T<SIMD_T> &er_bbox,
- Integer<SIMD_T> (&vAi)[3],
- Integer<SIMD_T> (&vBi)[3],
- Integer<SIMD_T> (&vXi)[3],
- Integer<SIMD_T> (&vYi)[3],
- uint32_t cwTrisMask,
- uint32_t triMask,
- uint32_t oneTileMask)
+uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox,
+ Integer<SIMD_T> (&vAi)[3],
+ Integer<SIMD_T> (&vBi)[3],
+ Integer<SIMD_T> (&vXi)[3],
+ Integer<SIMD_T> (&vYi)[3],
+ uint32_t cwTrisMask,
+ uint32_t triMask,
+ uint32_t oneTileMask)
{
// step to pixel center of top-left pixel of the triangle bbox
- Integer<SIMD_T> vTopLeftX = SIMD_T::template slli_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(er_bbox.xmin);
+ Integer<SIMD_T> vTopLeftX =
+ SIMD_T::template slli_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(er_bbox.xmin);
vTopLeftX = SIMD_T::add_epi32(vTopLeftX, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
- Integer<SIMD_T> vTopLeftY = SIMD_T::template slli_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(er_bbox.ymin);
+ Integer<SIMD_T> vTopLeftY =
+ SIMD_T::template slli_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(er_bbox.ymin);
vTopLeftY = SIMD_T::add_epi32(vTopLeftY, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
// negate A and B for CW tris
RDTSC_EVENT(FEEarlyRastEnter, _mm_popcnt_u32(oneTileMask & triMask), 0);
- Integer<SIMD_T> vShiftCntrl = EarlyRastHelper <SIMD_T>::InitShiftCntrl();
- Integer<SIMD_T> vCwTris = SIMD_T::set1_epi32(cwTrisMask);
- Integer<SIMD_T> vMask = SIMD_T::sllv_epi32(vCwTris, vShiftCntrl);
-
- vAi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[0]), SIMD_T::castsi_ps(vNegA0), SIMD_T::castsi_ps(vMask)));
- vAi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[1]), SIMD_T::castsi_ps(vNegA1), SIMD_T::castsi_ps(vMask)));
- vAi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[2]), SIMD_T::castsi_ps(vNegA2), SIMD_T::castsi_ps(vMask)));
- vBi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[0]), SIMD_T::castsi_ps(vNegB0), SIMD_T::castsi_ps(vMask)));
- vBi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[1]), SIMD_T::castsi_ps(vNegB1), SIMD_T::castsi_ps(vMask)));
- vBi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[2]), SIMD_T::castsi_ps(vNegB2), SIMD_T::castsi_ps(vMask)));
+ Integer<SIMD_T> vShiftCntrl = EarlyRastHelper<SIMD_T>::InitShiftCntrl();
+ Integer<SIMD_T> vCwTris = SIMD_T::set1_epi32(cwTrisMask);
+ Integer<SIMD_T> vMask = SIMD_T::sllv_epi32(vCwTris, vShiftCntrl);
+
+ vAi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vAi[0]), SIMD_T::castsi_ps(vNegA0), SIMD_T::castsi_ps(vMask)));
+ vAi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vAi[1]), SIMD_T::castsi_ps(vNegA1), SIMD_T::castsi_ps(vMask)));
+ vAi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vAi[2]), SIMD_T::castsi_ps(vNegA2), SIMD_T::castsi_ps(vMask)));
+ vBi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vBi[0]), SIMD_T::castsi_ps(vNegB0), SIMD_T::castsi_ps(vMask)));
+ vBi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vBi[1]), SIMD_T::castsi_ps(vNegB1), SIMD_T::castsi_ps(vMask)));
+ vBi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vBi[2]), SIMD_T::castsi_ps(vNegB2), SIMD_T::castsi_ps(vMask)));
// evaluate edge equations at top-left pixel
Integer<SIMD_T> vDeltaX0 = SIMD_T::sub_epi32(vTopLeftX, vXi[0]);
Integer<SIMD_T> vEdgeAdjust2 = SIMD_T::sub_epi32(vEdge2, SIMD_T::set1_epi32(1));
// vA < 0
- vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vAi[0])));
- vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vAi[1])));
- vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vAi[2])));
+ vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vAi[0])));
+ vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vAi[1])));
+ vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vAi[2])));
// vA == 0 && vB < 0
Integer<SIMD_T> vCmp0 = SIMD_T::cmpeq_epi32(vAi[0], SIMD_T::setzero_si());
vCmp1 = SIMD_T::and_si(vCmp1, vBi[1]);
vCmp2 = SIMD_T::and_si(vCmp2, vBi[2]);
- vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vCmp0)));
- vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vCmp1)));
- vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vCmp2)));
-
+ vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vCmp0)));
+ vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vCmp1)));
+ vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vCmp2)));
#if ER_SIMD_TILE_X_DIM == 4 && ER_SIMD_TILE_Y_DIM == 4
// Go down
// coverage pixel 0
Integer<SIMD_T> vMask0 = SIMD_T::and_si(vEdge0, vEdge1);
- vMask0 = SIMD_T::and_si(vMask0, vEdge2);
+ vMask0 = SIMD_T::and_si(vMask0, vEdge2);
// coverage pixel 1
Integer<SIMD_T> vEdge0N = SIMD_T::add_epi32(vEdge0, vBi[0]);
Integer<SIMD_T> vEdge1N = SIMD_T::add_epi32(vEdge1, vBi[1]);
Integer<SIMD_T> vEdge2N = SIMD_T::add_epi32(vEdge2, vBi[2]);
- Integer<SIMD_T> vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
+ Integer<SIMD_T> vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
// coverage pixel 2
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
+ vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
+ vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
+ vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
Integer<SIMD_T> vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
+ vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
// coverage pixel 3
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
+ vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
+ vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
+ vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
Integer<SIMD_T> vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
+ vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
// One step to the right and then up
// coverage pixel 4
- vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
- vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
- vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
+ vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
+ vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
+ vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
Integer<SIMD_T> vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
+ vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
// coverage pixel 5
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
+ vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
+ vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
+ vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
Integer<SIMD_T> vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
+ vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
// coverage pixel 6
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
+ vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
+ vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
+ vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
Integer<SIMD_T> vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
+ vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
// coverage pixel 7
- vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
- vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
- vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
+ vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
+ vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
+ vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
Integer<SIMD_T> vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
+ vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
Integer<SIMD_T> vLit1 = SIMD_T::or_si(vMask0, vMask1);
- vLit1 = SIMD_T::or_si(vLit1, vMask2);
- vLit1 = SIMD_T::or_si(vLit1, vMask3);
- vLit1 = SIMD_T::or_si(vLit1, vMask4);
- vLit1 = SIMD_T::or_si(vLit1, vMask5);
- vLit1 = SIMD_T::or_si(vLit1, vMask6);
- vLit1 = SIMD_T::or_si(vLit1, vMask7);
+ vLit1 = SIMD_T::or_si(vLit1, vMask2);
+ vLit1 = SIMD_T::or_si(vLit1, vMask3);
+ vLit1 = SIMD_T::or_si(vLit1, vMask4);
+ vLit1 = SIMD_T::or_si(vLit1, vMask5);
+ vLit1 = SIMD_T::or_si(vLit1, vMask6);
+ vLit1 = SIMD_T::or_si(vLit1, vMask7);
// Step to the right and go down again
vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
- vMask0 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask0 = SIMD_T::and_si(vMask0, vEdge2N);
+ vMask0 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask0 = SIMD_T::and_si(vMask0, vEdge2N);
// coverage pixel 1
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
+ vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
// coverage pixel 2
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
+ vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
// coverage pixel 3
vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
- vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
+ vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
// And for the last time - to the right and up
vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
- vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
+ vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
// coverage pixel 5
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
+ vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
// coverage pixel 6
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
+ vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
// coverage pixel 7
vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
- vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
- vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
+ vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
+ vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
Integer<SIMD_T> vLit2 = SIMD_T::or_si(vMask0, vMask1);
- vLit2 = SIMD_T::or_si(vLit2, vMask2);
- vLit2 = SIMD_T::or_si(vLit2, vMask3);
- vLit2 = SIMD_T::or_si(vLit2, vMask4);
- vLit2 = SIMD_T::or_si(vLit2, vMask5);
- vLit2 = SIMD_T::or_si(vLit2, vMask6);
- vLit2 = SIMD_T::or_si(vLit2, vMask7);
+ vLit2 = SIMD_T::or_si(vLit2, vMask2);
+ vLit2 = SIMD_T::or_si(vLit2, vMask3);
+ vLit2 = SIMD_T::or_si(vLit2, vMask4);
+ vLit2 = SIMD_T::or_si(vLit2, vMask5);
+ vLit2 = SIMD_T::or_si(vLit2, vMask6);
+ vLit2 = SIMD_T::or_si(vLit2, vMask7);
Integer<SIMD_T> vLit = SIMD_T::or_si(vLit1, vLit2);
#endif
// Check which triangles has any pixel lit
- uint32_t maskLit = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vLit));
+ uint32_t maskLit = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vLit));
uint32_t maskUnlit = ~maskLit & oneTileMask;
uint32_t oldTriMask = triMask;
/// @param viewportIdx - viewport array index for each triangle.
/// @tparam CT - ConservativeRastFETraits
template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
-void SIMDCALL BinTrianglesImpl(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- Vec4<SIMD_T> tri[3],
- uint32_t triMask,
- Integer<SIMD_T> const &primID,
- Integer<SIMD_T> const &viewportIdx,
- Integer<SIMD_T> const &rtIdx)
+void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ Vec4<SIMD_T> tri[3],
+ uint32_t triMask,
+ Integer<SIMD_T> const& primID,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx)
{
- const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+ const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
RDTSC_BEGIN(FEBinTriangles, pDC->drawId);
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
- const SWR_FRONTEND_STATE& feState = state.frontendState;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_RASTSTATE& rastState = state.rastState;
+ const SWR_FRONTEND_STATE& feState = state.frontendState;
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
Float<SIMD_T> vRecipW0 = SIMD_T::set1_ps(1.0f);
Float<SIMD_T> vRecipW1 = SIMD_T::set1_ps(1.0f);
calcDeterminantIntVertical(vAi, vBi, vDet);
// cull zero area
- uint32_t maskLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[0], SIMD_T::setzero_si())));
- uint32_t maskHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[1], SIMD_T::setzero_si())));
+ uint32_t maskLo =
+ SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[0], SIMD_T::setzero_si())));
+ uint32_t maskHi =
+ SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[1], SIMD_T::setzero_si())));
uint32_t cullZeroAreaMask = maskLo | (maskHi << (SIMD_WIDTH / 2));
uint32_t frontWindingTris;
if (rastState.frontWinding == SWR_FRONTWINDING_CW)
{
- maskLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
- maskHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
+ maskLo = SIMD_T::movemask_pd(
+ SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
+ maskHi = SIMD_T::movemask_pd(
+ SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
}
else
{
- maskLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[0])));
- maskHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[1])));
+ maskLo = SIMD_T::movemask_pd(
+ SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[0])));
+ maskHi = SIMD_T::movemask_pd(
+ SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[1])));
}
frontWindingTris = maskLo | (maskHi << (SIMD_WIDTH / 2));
uint32_t cullTris;
switch ((SWR_CULLMODE)rastState.cullMode)
{
- case SWR_CULLMODE_BOTH: cullTris = 0xffffffff; break;
- case SWR_CULLMODE_NONE: cullTris = 0x0; break;
- case SWR_CULLMODE_FRONT: cullTris = frontWindingTris; break;
- // 0 area triangles are marked as backfacing, which is required behavior for conservative rast
- case SWR_CULLMODE_BACK: cullTris = ~frontWindingTris; break;
- default: SWR_INVALID("Invalid cull mode: %d", rastState.cullMode); cullTris = 0x0; break;
+ case SWR_CULLMODE_BOTH:
+ cullTris = 0xffffffff;
+ break;
+ case SWR_CULLMODE_NONE:
+ cullTris = 0x0;
+ break;
+ case SWR_CULLMODE_FRONT:
+ cullTris = frontWindingTris;
+ break;
+ // 0 area triangles are marked as backfacing, which is required behavior for conservative
+ // rast
+ case SWR_CULLMODE_BACK:
+ cullTris = ~frontWindingTris;
+ break;
+ default:
+ SWR_INVALID("Invalid cull mode: %d", rastState.cullMode);
+ cullTris = 0x0;
+ break;
}
triMask &= ~cullTris;
/// Note: these variable initializations must stay above any 'goto endBenTriangles'
// compute per tri backface
- uint32_t frontFaceMask = frontWindingTris;
- uint32_t *pPrimID = (uint32_t *)&primID;
- const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
- DWORD triIndex = 0;
+ uint32_t frontFaceMask = frontWindingTris;
+ uint32_t* pPrimID = (uint32_t*)&primID;
+ const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
+ DWORD triIndex = 0;
- uint32_t edgeEnable;
+ uint32_t edgeEnable;
PFN_WORK_FUNC pfnWork;
if (CT::IsConservativeT::value)
{
const Integer<SIMD_T> x0x1Mask = SIMD_T::cmpeq_epi32(vXi[0], vXi[1]);
const Integer<SIMD_T> y0y1Mask = SIMD_T::cmpeq_epi32(vYi[0], vYi[1]);
- uint32_t e0Mask = SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x0x1Mask, y0y1Mask)));
+ uint32_t e0Mask =
+ SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x0x1Mask, y0y1Mask)));
// e1 = v2-v1
const Integer<SIMD_T> x1x2Mask = SIMD_T::cmpeq_epi32(vXi[1], vXi[2]);
const Integer<SIMD_T> y1y2Mask = SIMD_T::cmpeq_epi32(vYi[1], vYi[2]);
- uint32_t e1Mask = SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x1x2Mask, y1y2Mask)));
+ uint32_t e1Mask =
+ SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x1x2Mask, y1y2Mask)));
// e2 = v0-v2
// if v0 == v1 & v1 == v2, v0 == v2
else
{
// degenerate triangles won't be sent to rasterizer; just enable all edges
- pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
- (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, EdgeValToEdgeState(ALL_EDGES_VALID), (state.scissorsTileAligned == false));
+ pfnWork = GetRasterizerFunc(rastState.sampleCount,
+ rastState.bIsCenterPattern,
+ (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage,
+ EdgeValToEdgeState(ALL_EDGES_VALID),
+ (state.scissorsTileAligned == false));
}
SIMDBBOX_T<SIMD_T> bbox;
{
Integer<SIMD_T> xmin = SIMD_T::add_epi32(bbox.xmin, SIMD_T::set1_epi32(127));
- xmin = SIMD_T::and_si(xmin, SIMD_T::set1_epi32(~255));
+ xmin = SIMD_T::and_si(xmin, SIMD_T::set1_epi32(~255));
Integer<SIMD_T> xmax = SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(128));
- xmax = SIMD_T::and_si(xmax, SIMD_T::set1_epi32(~255));
+ xmax = SIMD_T::and_si(xmax, SIMD_T::set1_epi32(~255));
Integer<SIMD_T> vMaskH = SIMD_T::cmpeq_epi32(xmin, xmax);
Integer<SIMD_T> ymin = SIMD_T::add_epi32(bbox.ymin, SIMD_T::set1_epi32(127));
- ymin = SIMD_T::and_si(ymin, SIMD_T::set1_epi32(~255));
+ ymin = SIMD_T::and_si(ymin, SIMD_T::set1_epi32(~255));
Integer<SIMD_T> ymax = SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(128));
- ymax = SIMD_T::and_si(ymax, SIMD_T::set1_epi32(~255));
+ ymax = SIMD_T::and_si(ymax, SIMD_T::set1_epi32(~255));
Integer<SIMD_T> vMaskV = SIMD_T::cmpeq_epi32(ymin, ymax);
- vMaskV = SIMD_T::or_si(vMaskH, vMaskV);
+ vMaskV = SIMD_T::or_si(vMaskH, vMaskV);
cullCenterMask = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vMaskV));
}
}
}
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
- // Gather the AOS effective scissor rects based on the per-prim VP index.
+ // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is
+ // exclusive. Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
{
Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
if (pa.viewportArrayActive)
{
- GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
+ GatherScissors(&state.scissorsInFixedPoint[0],
+ pViewportIndex,
+ scisXmin,
+ scisYmin,
+ scisXmax,
+ scisYmax);
}
else // broadcast fast path for non-VPAI case.
{
if (CT::IsConservativeT::value)
{
- // in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
- // some area. Bump the xmax/ymax edges out
+ // in the case where a degenerate triangle is on a scissor edge, we need to make sure the
+ // primitive bbox has some area. Bump the xmax/ymax edges out
Integer<SIMD_T> topEqualsBottom = SIMD_T::cmpeq_epi32(bbox.ymin, bbox.ymax);
- bbox.ymax = SIMD_T::blendv_epi32(bbox.ymax, SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), topEqualsBottom);
+ bbox.ymax = SIMD_T::blendv_epi32(
+ bbox.ymax, SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), topEqualsBottom);
Integer<SIMD_T> leftEqualsRight = SIMD_T::cmpeq_epi32(bbox.xmin, bbox.xmax);
- bbox.xmax = SIMD_T::blendv_epi32(bbox.xmax, SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), leftEqualsRight);
+ bbox.xmax = SIMD_T::blendv_epi32(
+ bbox.xmax, SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), leftEqualsRight);
}
// Cull tris completely outside scissor
{
Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
- Integer<SIMD_T> maskOutsideScissorXY = SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
+ Integer<SIMD_T> maskOutsideScissorXY =
+ SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
- triMask = triMask & ~maskOutsideScissor;
+ triMask = triMask & ~maskOutsideScissor;
}
#if KNOB_ENABLE_EARLY_RAST
// convert to ER tiles
SIMDBBOX_T<SIMD_T> er_bbox;
- er_bbox.xmin = SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmin);
- er_bbox.xmax = SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmax);
- er_bbox.ymin = SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymin);
- er_bbox.ymax = SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymax);
+ er_bbox.xmin =
+ SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmin);
+ er_bbox.xmax =
+ SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmax);
+ er_bbox.ymin =
+ SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymin);
+ er_bbox.ymax =
+ SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymax);
Integer<SIMD_T> vTileX = SIMD_T::cmpeq_epi32(er_bbox.xmin, er_bbox.xmax);
Integer<SIMD_T> vTileY = SIMD_T::cmpeq_epi32(er_bbox.ymin, er_bbox.ymax);
// Take only triangles that fit into ER tile
- uint32_t oneTileMask = triMask & SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(vTileX, vTileY)));
+ uint32_t oneTileMask =
+ triMask & SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(vTileX, vTileY)));
if (oneTileMask)
{
// determine CW tris (det > 0)
- uint32_t maskCwLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
- uint32_t maskCwHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
+ uint32_t maskCwLo = SIMD_T::movemask_pd(
+ SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
+ uint32_t maskCwHi = SIMD_T::movemask_pd(
+ SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
uint32_t cwTrisMask = maskCwLo | (maskCwHi << (SIMD_WIDTH / 2));
// Try early rasterization
- triMask = EarlyRasterizer<SIMD_T, SIMD_WIDTH, CT>(er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
+ triMask = EarlyRasterizer<SIMD_T, SIMD_WIDTH, CT>(
+ er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
if (!triMask)
{
return;
}
}
-
}
#endif
{
// Simple non-conformant wireframe mode, useful for debugging
// construct 3 SIMD lines out of the triangle and call the line binner for each SIMD
- Vec4<SIMD_T> line[2];
+ Vec4<SIMD_T> line[2];
Float<SIMD_T> recipW[2];
- line[0] = tri[0];
- line[1] = tri[1];
+ line[0] = tri[0];
+ line[1] = tri[1];
recipW[0] = vRecipW0;
recipW[1] = vRecipW1;
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
+ pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
- line[0] = tri[1];
- line[1] = tri[2];
+ line[0] = tri[1];
+ line[1] = tri[2];
recipW[0] = vRecipW1;
recipW[1] = vRecipW2;
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
+ pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
- line[0] = tri[2];
- line[1] = tri[0];
+ line[0] = tri[2];
+ line[1] = tri[0];
recipW[0] = vRecipW2;
recipW[1] = vRecipW0;
- BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
+ pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
RDTSC_END(FEBinTriangles, 1);
return;
else if (rastState.fillMode == SWR_FILLMODE_POINT)
{
// Bin 3 points
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx);
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
- BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
+ pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
+ pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
+ BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
+ pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
RDTSC_END(FEBinTriangles, 1);
return;
bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
- OSALIGNSIMD16(uint32_t) aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
+ OSALIGNSIMD16(uint32_t)
+ aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTLeft), bbox.xmin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTRight), bbox.xmax);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTTop), bbox.ymin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTBottom), bbox.ymax);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
// transpose verts needed for backend
/// @todo modify BE to take non-transformed verts
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&triIndex, triMask))
{
- uint32_t linkageCount = state.backendState.numAttributes;
+ uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
{
// only rasterize valid edges if we have a degenerate primitive
int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
- work.pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
- (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, EdgeValToEdgeState(triEdgeEnable), (state.scissorsTileAligned == false));
+ work.pfnWork =
+ GetRasterizerFunc(rastState.sampleCount,
+ rastState.bIsCenterPattern,
+ (rastState.conservativeRast > 0),
+ (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage,
+ EdgeValToEdgeState(triEdgeEnable),
+ (state.scissorsTileAligned == false));
// Degenerate triangles are required to be constant interpolated
isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
}
// Select attribute processor
- PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
- state.backendState.swizzleEnable, state.backendState.constantInterpolationMask, isDegenerate);
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs =
+ GetProcessAttributesFunc(3,
+ state.backendState.swizzleEnable,
+ state.backendState.constantInterpolationMask,
+ isDegenerate);
- TRIANGLE_WORK_DESC &desc = work.desc.tri;
+ TRIANGLE_WORK_DESC& desc = work.desc.tri;
desc.triFlags.frontFacing = state.forceFront ? 1 : ((frontFaceMask >> triIndex) & 1);
desc.triFlags.renderTargetArrayIndex = aRTAI[triIndex];
- desc.triFlags.viewportIndex = pViewportIndex[triIndex];
+ desc.triFlags.viewportIndex = pViewportIndex[triIndex];
auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// store active attribs
- float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
- desc.pAttribs = pAttribs;
+ float* pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
+ desc.pAttribs = pAttribs;
desc.numAttribs = linkageCount;
pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs);
// store triangle vertex data
desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
- SIMD128::store_ps(&desc.pTriBuffer[0], vHorizX[triIndex]);
- SIMD128::store_ps(&desc.pTriBuffer[4], vHorizY[triIndex]);
- SIMD128::store_ps(&desc.pTriBuffer[8], vHorizZ[triIndex]);
+ SIMD128::store_ps(&desc.pTriBuffer[0], vHorizX[triIndex]);
+ SIMD128::store_ps(&desc.pTriBuffer[4], vHorizY[triIndex]);
+ SIMD128::store_ps(&desc.pTriBuffer[8], vHorizZ[triIndex]);
SIMD128::store_ps(&desc.pTriBuffer[12], vHorizW[triIndex]);
// store user clip distances
{
uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 3 * sizeof(float));
- ProcessUserClipDist<3>(state.backendState, pa, triIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
+ ProcessUserClipDist<3>(
+ state.backendState, pa, triIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
}
for (uint32_t y = aMTTop[triIndex]; y <= aMTBottom[triIndex]; ++y)
}
}
- triMask &= ~(1 << triIndex);
+ triMask &= ~(1 << triIndex);
}
RDTSC_END(FEBinTriangles, 1);
}
template <typename CT>
-void BinTriangles(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- simdvector tri[3],
- uint32_t triMask,
- simdscalari const &primID,
- simdscalari const &viewportIdx,
- simdscalari const &rtIdx)
+void BinTriangles(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector tri[3],
+ uint32_t triMask,
+ simdscalari const& primID,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx)
{
- BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
+ BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(
+ pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
}
#if USE_SIMD16_FRONTEND
template <typename CT>
-void SIMDCALL BinTriangles_simd16(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- simd16vector tri[3],
- uint32_t triMask,
- simd16scalari const &primID,
- simd16scalari const &viewportIdx,
- simd16scalari const &rtIdx)
+void SIMDCALL BinTriangles_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector tri[3],
+ uint32_t triMask,
+ simd16scalari const& primID,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx)
{
- BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
+ BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(
+ pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
}
#endif
#endif
template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupPointsImpl(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- uint32_t primMask,
- Integer<SIMD_T> const &primID,
- Integer<SIMD_T> const &viewportIdx,
- Integer<SIMD_T> const &rtIdx)
+void BinPostSetupPointsImpl(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ Vec4<SIMD_T> prim[],
+ uint32_t primMask,
+ Integer<SIMD_T> const& primID,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx)
{
RDTSC_BEGIN(FEBinPoints, pDC->drawId);
- Vec4<SIMD_T> &primVerts = prim[0];
+ Vec4<SIMD_T>& primVerts = prim[0];
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
- const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_RASTSTATE& rastState = state.rastState;
+ const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
// Select attribute processor
- PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
- state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(
+ 1, state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
// convert to fixed point
Integer<SIMD_T> vXi, vYi;
primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vXi));
primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vYi));
- // compute macro tile coordinates
+ // compute macro tile coordinates
Integer<SIMD_T> macroX = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(vXi);
Integer<SIMD_T> macroY = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(vYi);
OSALIGNSIMD16(uint32_t) aMacroX[SIMD_WIDTH], aMacroY[SIMD_WIDTH];
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMacroX), macroX);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMacroY), macroY);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMacroX), macroX);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMacroY), macroY);
// compute raster tile coordinates
- Integer<SIMD_T> rasterX = SIMD_T::template srai_epi32<KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT>(vXi);
- Integer<SIMD_T> rasterY = SIMD_T::template srai_epi32<KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT>(vYi);
+ Integer<SIMD_T> rasterX =
+ SIMD_T::template srai_epi32<KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT>(vXi);
+ Integer<SIMD_T> rasterY =
+ SIMD_T::template srai_epi32<KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT>(vYi);
// compute raster tile relative x,y for coverage mask
Integer<SIMD_T> tileAlignedX = SIMD_T::template slli_epi32<KNOB_TILE_X_DIM_SHIFT>(rasterX);
Integer<SIMD_T> tileAlignedY = SIMD_T::template slli_epi32<KNOB_TILE_Y_DIM_SHIFT>(rasterY);
- Integer<SIMD_T> tileRelativeX = SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vXi), tileAlignedX);
- Integer<SIMD_T> tileRelativeY = SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vYi), tileAlignedY);
+ Integer<SIMD_T> tileRelativeX =
+ SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vXi), tileAlignedX);
+ Integer<SIMD_T> tileRelativeY =
+ SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vYi), tileAlignedY);
OSALIGNSIMD16(uint32_t) aTileRelativeX[SIMD_WIDTH];
OSALIGNSIMD16(uint32_t) aTileRelativeY[SIMD_WIDTH];
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileRelativeX), tileRelativeX);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileRelativeY), tileRelativeY);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileRelativeX), tileRelativeX);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileRelativeY), tileRelativeY);
OSALIGNSIMD16(uint32_t) aTileAlignedX[SIMD_WIDTH];
OSALIGNSIMD16(uint32_t) aTileAlignedY[SIMD_WIDTH];
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileAlignedX), tileAlignedX);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileAlignedY), tileAlignedY);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileAlignedX), tileAlignedX);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileAlignedY), tileAlignedY);
OSALIGNSIMD16(float) aZ[SIMD_WIDTH];
- SIMD_T::store_ps(reinterpret_cast<float *>(aZ), primVerts.z);
+ SIMD_T::store_ps(reinterpret_cast<float*>(aZ), primVerts.z);
// store render target array index
- const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
-
- uint32_t *pPrimID = (uint32_t *)&primID;
- DWORD primIndex = 0;
+ const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
+
+ uint32_t* pPrimID = (uint32_t*)&primID;
+ DWORD primIndex = 0;
const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
// scan remaining valid triangles and bin each separately
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = backendState.numAttributes;
+ uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
work.type = DRAW;
- TRIANGLE_WORK_DESC &desc = work.desc.tri;
+ TRIANGLE_WORK_DESC& desc = work.desc.tri;
// points are always front facing
- desc.triFlags.frontFacing = 1;
+ desc.triFlags.frontFacing = 1;
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
- desc.triFlags.viewportIndex = pViewportIndex[primIndex];
+ desc.triFlags.viewportIndex = pViewportIndex[primIndex];
work.pfnWork = RasterizeSimplePoint;
SWR_ASSERT(pArena != nullptr);
// store attributes
- float *pAttribs = (float*)pArena->AllocAligned(3 * numScalarAttribs * sizeof(float), 16);
- desc.pAttribs = pAttribs;
+ float* pAttribs =
+ (float*)pArena->AllocAligned(3 * numScalarAttribs * sizeof(float), 16);
+ desc.pAttribs = pAttribs;
desc.numAttribs = linkageCount;
pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs);
// store raster tile aligned x, y, perspective correct z
- float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
- desc.pTriBuffer = pTriBuffer;
+ float* pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
+ desc.pTriBuffer = pTriBuffer;
*(uint32_t*)pTriBuffer++ = aTileAlignedX[primIndex];
*(uint32_t*)pTriBuffer++ = aTileAlignedY[primIndex];
- *pTriBuffer = aZ[primIndex];
+ *pTriBuffer = aZ[primIndex];
uint32_t tX = aTileRelativeX[primIndex];
uint32_t tY = aTileRelativeY[primIndex];
work.desc.tri.triFlags.coverageMask = tX | (tY << 4);
// bin it
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
#if KNOB_ENABLE_TOSS_POINTS
if (!KNOB_TOSS_SETUP_TRIS)
#endif
bbox.xmin = bbox.xmax = vXi;
bbox.ymin = bbox.ymax = vYi;
- Float<SIMD_T> vHalfWidth = SIMD_T::mul_ps(vPointSize, SIMD_T::set1_ps(0.5f));
+ Float<SIMD_T> vHalfWidth = SIMD_T::mul_ps(vPointSize, SIMD_T::set1_ps(0.5f));
Integer<SIMD_T> vHalfWidthi = fpToFixedPointVertical<SIMD_T>(vHalfWidth);
bbox.xmin = SIMD_T::sub_epi32(bbox.xmin, vHalfWidthi);
bbox.ymin = SIMD_T::sub_epi32(bbox.ymin, vHalfWidthi);
bbox.ymax = SIMD_T::add_epi32(bbox.ymax, vHalfWidthi);
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
- // Gather the AOS effective scissor rects based on the per-prim VP index.
+ // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge
+ // is exclusive. Gather the AOS effective scissor rects based on the per-prim VP index.
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
{
Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
if (pa.viewportArrayActive)
{
- GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
+ GatherScissors(&state.scissorsInFixedPoint[0],
+ pViewportIndex,
+ scisXmin,
+ scisYmin,
+ scisXmax,
+ scisYmax);
}
else // broadcast fast path for non-VPAI case.
{
bbox.xmin = SIMD_T::max_epi32(bbox.xmin, scisXmin);
bbox.ymin = SIMD_T::max_epi32(bbox.ymin, scisYmin);
- bbox.xmax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
- bbox.ymax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
+ bbox.xmax =
+ SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
+ bbox.ymax =
+ SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
}
// Cull bloated points completely outside scissor
Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
- Integer<SIMD_T> maskOutsideScissorXY = SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
+ Integer<SIMD_T> maskOutsideScissorXY =
+ SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
- primMask = primMask & ~maskOutsideScissor;
+ primMask = primMask & ~maskOutsideScissor;
// Convert bbox to macrotile units.
bbox.xmin = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmin);
bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
- OSALIGNSIMD16(uint32_t) aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
+ OSALIGNSIMD16(uint32_t)
+ aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTLeft), bbox.xmin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTRight), bbox.xmax);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTTop), bbox.ymin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTBottom), bbox.ymax);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
// store render target array index
- const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+ const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
OSALIGNSIMD16(float) aPointSize[SIMD_WIDTH];
- SIMD_T::store_ps(reinterpret_cast<float *>(aPointSize), vPointSize);
+ SIMD_T::store_ps(reinterpret_cast<float*>(aPointSize), vPointSize);
- uint32_t *pPrimID = (uint32_t *)&primID;
+ uint32_t* pPrimID = (uint32_t*)&primID;
OSALIGNSIMD16(float) aPrimVertsX[SIMD_WIDTH];
OSALIGNSIMD16(float) aPrimVertsY[SIMD_WIDTH];
OSALIGNSIMD16(float) aPrimVertsZ[SIMD_WIDTH];
- SIMD_T::store_ps(reinterpret_cast<float *>(aPrimVertsX), primVerts.x);
- SIMD_T::store_ps(reinterpret_cast<float *>(aPrimVertsY), primVerts.y);
- SIMD_T::store_ps(reinterpret_cast<float *>(aPrimVertsZ), primVerts.z);
+ SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsX), primVerts.x);
+ SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsY), primVerts.y);
+ SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsZ), primVerts.z);
// scan remaining valid prims and bin each separately
const SWR_BACKEND_STATE& backendState = state.backendState;
- DWORD primIndex;
+ DWORD primIndex;
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = backendState.numAttributes;
+ uint32_t linkageCount = backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
work.type = DRAW;
- TRIANGLE_WORK_DESC &desc = work.desc.tri;
+ TRIANGLE_WORK_DESC& desc = work.desc.tri;
- desc.triFlags.frontFacing = 1;
- desc.triFlags.pointSize = aPointSize[primIndex];
+ desc.triFlags.frontFacing = 1;
+ desc.triFlags.pointSize = aPointSize[primIndex];
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
- desc.triFlags.viewportIndex = pViewportIndex[primIndex];
+ desc.triFlags.viewportIndex = pViewportIndex[primIndex];
work.pfnWork = RasterizeTriPoint;
pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
// store point vertex data
- float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
- desc.pTriBuffer = pTriBuffer;
- *pTriBuffer++ = aPrimVertsX[primIndex];
- *pTriBuffer++ = aPrimVertsY[primIndex];
- *pTriBuffer = aPrimVertsZ[primIndex];
+ float* pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
+ desc.pTriBuffer = pTriBuffer;
+ *pTriBuffer++ = aPrimVertsX[primIndex];
+ *pTriBuffer++ = aPrimVertsY[primIndex];
+ *pTriBuffer = aPrimVertsZ[primIndex];
// store user clip distances
if (backendState.clipDistanceMask)
float dists[8];
float one = 1.0f;
ProcessUserClipDist<1>(backendState, pa, primIndex, &one, dists);
- for (uint32_t i = 0; i < numClipDist; i++) {
+ for (uint32_t i = 0; i < numClipDist; i++)
+ {
desc.pUserClipBuffer[3 * i + 0] = 0.0f;
desc.pUserClipBuffer[3 * i + 1] = 0.0f;
desc.pUserClipBuffer[3 * i + 2] = dists[i];
}
}
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
for (uint32_t y = aMTTop[primIndex]; y <= aMTBottom[primIndex]; ++y)
{
for (uint32_t x = aMTLeft[primIndex]; x <= aMTRight[primIndex]; ++x)
/// @param tri - Contains point position data for SIMDs worth of points.
/// @param primID - Primitive ID for each point.
template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPointsImpl(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[3],
- uint32_t primMask,
- Integer<SIMD_T> const &primID,
- Integer<SIMD_T> const &viewportIdx,
- Integer<SIMD_T> const &rtIdx)
+void BinPointsImpl(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ Vec4<SIMD_T> prim[3],
+ uint32_t primMask,
+ Integer<SIMD_T> const& primID,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx)
{
- const API_STATE& state = GetApiState(pDC);
- const SWR_FRONTEND_STATE& feState = state.frontendState;
- const SWR_RASTSTATE& rastState = state.rastState;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_FRONTEND_STATE& feState = state.frontendState;
+ const SWR_RASTSTATE& rastState = state.rastState;
if (!feState.vpTransformDisable)
{
prim[0].y = SIMD_T::add_ps(prim[0].y, offset);
BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
- pDC,
- pa,
- workerId,
- prim,
- primMask,
- primID,
- viewportIdx,
- rtIdx);
+ pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
}
-void BinPoints(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- simdvector prim[3],
- uint32_t primMask,
- simdscalari const &primID,
- simdscalari const &viewportIdx,
- simdscalari const &rtIdx)
+void BinPoints(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prim[3],
+ uint32_t primMask,
+ simdscalari const& primID,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx)
{
BinPointsImpl<SIMD256, KNOB_SIMD_WIDTH>(
- pDC,
- pa,
- workerId,
- prim,
- primMask,
- primID,
- viewportIdx,
- rtIdx);
+ pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
}
#if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- simd16vector prim[3],
- uint32_t primMask,
- simd16scalari const &primID,
- simd16scalari const &viewportIdx,
- simd16scalari const & rtIdx)
+void SIMDCALL BinPoints_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prim[3],
+ uint32_t primMask,
+ simd16scalari const& primID,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx)
{
BinPointsImpl<SIMD512, KNOB_SIMD16_WIDTH>(
- pDC,
- pa,
- workerId,
- prim,
- primMask,
- primID,
- viewportIdx,
- rtIdx);
+ pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
}
#endif
/// @param primID - Primitive ID for each line.
/// @param viewportIdx - Viewport Array Index for each line.
template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupLinesImpl(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[],
- Float<SIMD_T> recipW[],
- uint32_t primMask,
- Integer<SIMD_T> const &primID,
- Integer<SIMD_T> const &viewportIdx,
- Integer<SIMD_T> const &rtIdx)
+void BinPostSetupLinesImpl(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ Vec4<SIMD_T> prim[],
+ Float<SIMD_T> recipW[],
+ uint32_t primMask,
+ Integer<SIMD_T> const& primID,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx)
{
- const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+ const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
RDTSC_BEGIN(FEBinLines, pDC->drawId);
- const API_STATE &state = GetApiState(pDC);
- const SWR_RASTSTATE &rastState = state.rastState;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_RASTSTATE& rastState = state.rastState;
// Select attribute processor
- PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2,
- state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+ PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(
+ 2, state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
- Float<SIMD_T> &vRecipW0 = recipW[0];
- Float<SIMD_T> &vRecipW1 = recipW[1];
+ Float<SIMD_T>& vRecipW0 = recipW[0];
+ Float<SIMD_T>& vRecipW1 = recipW[1];
// convert to fixed point
Integer<SIMD_T> vXi[2], vYi[2];
vYi[1] = fpToFixedPointVertical<SIMD_T>(prim[1].y);
// compute x-major vs y-major mask
- Integer<SIMD_T> xLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vXi[0], vXi[1]));
- Integer<SIMD_T> yLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vYi[0], vYi[1]));
- Float<SIMD_T> vYmajorMask = SIMD_T::castsi_ps(SIMD_T::cmpgt_epi32(yLength, xLength));
- uint32_t yMajorMask = SIMD_T::movemask_ps(vYmajorMask);
+ Integer<SIMD_T> xLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vXi[0], vXi[1]));
+ Integer<SIMD_T> yLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vYi[0], vYi[1]));
+ Float<SIMD_T> vYmajorMask = SIMD_T::castsi_ps(SIMD_T::cmpgt_epi32(yLength, xLength));
+ uint32_t yMajorMask = SIMD_T::movemask_ps(vYmajorMask);
// cull zero-length lines
Integer<SIMD_T> vZeroLengthMask = SIMD_T::cmpeq_epi32(xLength, SIMD_T::setzero_si());
- vZeroLengthMask = SIMD_T::and_si(vZeroLengthMask, SIMD_T::cmpeq_epi32(yLength, SIMD_T::setzero_si()));
+ vZeroLengthMask =
+ SIMD_T::and_si(vZeroLengthMask, SIMD_T::cmpeq_epi32(yLength, SIMD_T::setzero_si()));
primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vZeroLengthMask));
- uint32_t *pPrimID = (uint32_t *)&primID;
- const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
+ uint32_t* pPrimID = (uint32_t*)&primID;
+ const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
// Calc bounding box of lines
SIMDBBOX_T<SIMD_T> bbox;
bbox.ymax = SIMD_T::max_epi32(vYi[0], vYi[1]);
// bloat bbox by line width along minor axis
- Float<SIMD_T> vHalfWidth = SIMD_T::set1_ps(rastState.lineWidth / 2.0f);
+ Float<SIMD_T> vHalfWidth = SIMD_T::set1_ps(rastState.lineWidth / 2.0f);
Integer<SIMD_T> vHalfWidthi = fpToFixedPointVertical<SIMD_T>(vHalfWidth);
SIMDBBOX_T<SIMD_T> bloatBox;
bbox.ymin = SIMD_T::blendv_epi32(bloatBox.ymin, bbox.ymin, vYmajorMask);
bbox.ymax = SIMD_T::blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
- // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+ // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is
+ // exclusive.
{
Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
if (pa.viewportArrayActive)
{
- GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
+ GatherScissors(&state.scissorsInFixedPoint[0],
+ pViewportIndex,
+ scisXmin,
+ scisYmin,
+ scisXmax,
+ scisYmax);
}
else // broadcast fast path for non-VPAI case.
{
bbox.xmin = SIMD_T::max_epi32(bbox.xmin, scisXmin);
bbox.ymin = SIMD_T::max_epi32(bbox.ymin, scisYmin);
- bbox.xmax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
- bbox.ymax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
+ bbox.xmax =
+ SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
+ bbox.ymax =
+ SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
}
// Cull prims completely outside scissor
{
Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
- Integer<SIMD_T> maskOutsideScissorXY = SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
+ Integer<SIMD_T> maskOutsideScissorXY =
+ SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
- primMask = primMask & ~maskOutsideScissor;
+ primMask = primMask & ~maskOutsideScissor;
}
// transpose verts needed for backend
bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
- OSALIGNSIMD16(uint32_t) aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
+ OSALIGNSIMD16(uint32_t)
+ aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTLeft), bbox.xmin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTRight), bbox.xmax);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTTop), bbox.ymin);
- SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTBottom), bbox.ymax);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
+ SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
TransposeVertices(vHorizX, prim[0].x, prim[1].x, SIMD_T::setzero_ps());
TransposeVertices(vHorizY, prim[0].y, prim[1].y, SIMD_T::setzero_ps());
TransposeVertices(vHorizZ, prim[0].z, prim[1].z, SIMD_T::setzero_ps());
- TransposeVertices(vHorizW, vRecipW0, vRecipW1, SIMD_T::setzero_ps());
+ TransposeVertices(vHorizW, vRecipW0, vRecipW1, SIMD_T::setzero_ps());
// scan remaining valid prims and bin each separately
DWORD primIndex;
while (_BitScanForward(&primIndex, primMask))
{
- uint32_t linkageCount = state.backendState.numAttributes;
+ uint32_t linkageCount = state.backendState.numAttributes;
uint32_t numScalarAttribs = linkageCount * 4;
BE_WORK work;
work.type = DRAW;
- TRIANGLE_WORK_DESC &desc = work.desc.tri;
+ TRIANGLE_WORK_DESC& desc = work.desc.tri;
- desc.triFlags.frontFacing = 1;
- desc.triFlags.yMajor = (yMajorMask >> primIndex) & 1;
+ desc.triFlags.frontFacing = 1;
+ desc.triFlags.yMajor = (yMajorMask >> primIndex) & 1;
desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
- desc.triFlags.viewportIndex = pViewportIndex[primIndex];
+ desc.triFlags.viewportIndex = pViewportIndex[primIndex];
work.pfnWork = RasterizeLine;
SWR_ASSERT(pArena != nullptr);
// store active attribs
- desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
+ desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
desc.numAttribs = linkageCount;
pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
// store line vertex data
desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
- _mm_store_ps(&desc.pTriBuffer[0], vHorizX[primIndex]);
- _mm_store_ps(&desc.pTriBuffer[4], vHorizY[primIndex]);
- _mm_store_ps(&desc.pTriBuffer[8], vHorizZ[primIndex]);
+ _mm_store_ps(&desc.pTriBuffer[0], vHorizX[primIndex]);
+ _mm_store_ps(&desc.pTriBuffer[4], vHorizY[primIndex]);
+ _mm_store_ps(&desc.pTriBuffer[8], vHorizZ[primIndex]);
_mm_store_ps(&desc.pTriBuffer[12], vHorizW[primIndex]);
// store user clip distances
{
uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 2 * sizeof(float));
- ProcessUserClipDist<2>(state.backendState, pa, primIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
+ ProcessUserClipDist<2>(
+ state.backendState, pa, primIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
}
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
for (uint32_t y = aMTTop[primIndex]; y <= aMTBottom[primIndex]; ++y)
{
for (uint32_t x = aMTLeft[primIndex]; x <= aMTRight[primIndex]; ++x)
/// @param primID - Primitive ID for each line.
/// @param viewportIdx - Viewport Array Index for each line.
template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void SIMDCALL BinLinesImpl(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- Vec4<SIMD_T> prim[3],
- uint32_t primMask,
- Integer<SIMD_T> const &primID,
- Integer<SIMD_T> const &viewportIdx,
- Integer<SIMD_T> const & rtIdx)
+void SIMDCALL BinLinesImpl(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ Vec4<SIMD_T> prim[3],
+ uint32_t primMask,
+ Integer<SIMD_T> const& primID,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx)
{
- const API_STATE& state = GetApiState(pDC);
- const SWR_RASTSTATE& rastState = state.rastState;
- const SWR_FRONTEND_STATE& feState = state.frontendState;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_RASTSTATE& rastState = state.rastState;
+ const SWR_FRONTEND_STATE& feState = state.frontendState;
- Float<SIMD_T> vRecipW[2] = { SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f) };
+ Float<SIMD_T> vRecipW[2] = {SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f)};
if (!feState.vpTransformDisable)
{
prim[1].y = SIMD_T::add_ps(prim[1].y, offset);
BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
- pDC,
- pa,
- workerId,
- prim,
- vRecipW,
- primMask,
- primID,
- viewportIdx,
- rtIdx);
+ pDC, pa, workerId, prim, vRecipW, primMask, primID, viewportIdx, rtIdx);
}
-void BinLines(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- simdvector prim[],
- uint32_t primMask,
- simdscalari const &primID,
- simdscalari const &viewportIdx,
- simdscalari const &rtIdx)
+void BinLines(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prim[],
+ uint32_t primMask,
+ simdscalari const& primID,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx)
{
- BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
+ BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(
+ pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
}
#if USE_SIMD16_FRONTEND
-void SIMDCALL BinLines_simd16(
- DRAW_CONTEXT *pDC,
- PA_STATE &pa,
- uint32_t workerId,
- simd16vector prim[3],
- uint32_t primMask,
- simd16scalari const &primID,
- simd16scalari const &viewportIdx,
- simd16scalari const &rtIdx)
+void SIMDCALL BinLines_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prim[3],
+ uint32_t primMask,
+ simd16scalari const& primID,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx)
{
- BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
+ BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(
+ pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
}
#endif
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file binner.h
-*
-* @brief Declaration for the macrotile binner
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file binner.h
+ *
+ * @brief Declaration for the macrotile binner
+ *
+ ******************************************************************************/
#include "state.h"
#include "conservativeRast.h"
#include "utils.h"
};
//////////////////////////////////////////////////////////////////////////
-/// @brief Convert the X,Y coords of a triangle to the requested Fixed
+/// @brief Convert the X,Y coords of a triangle to the requested Fixed
/// Point precision from FP32.
template <typename SIMD_T, typename PT = FixedPointTraits<Fixed_16_8>>
-INLINE Integer<SIMD_T> fpToFixedPointVertical(const Float<SIMD_T> &vIn)
+INLINE Integer<SIMD_T> fpToFixedPointVertical(const Float<SIMD_T>& vIn)
{
return SIMD_T::cvtps_epi32(SIMD_T::mul_ps(vIn, SIMD_T::set1_ps(PT::ScaleT::value)));
}
//////////////////////////////////////////////////////////////////////////
-/// @brief Helper function to set the X,Y coords of a triangle to the
+/// @brief Helper function to set the X,Y coords of a triangle to the
/// requested Fixed Point precision from FP32.
/// @param tri: simdvector[3] of FP triangle verts
/// @param vXi: fixed point X coords of tri verts
/// @param vYi: fixed point Y coords of tri verts
template <typename SIMD_T>
-INLINE static void FPToFixedPoint(const Vec4<SIMD_T> *const tri, Integer<SIMD_T>(&vXi)[3], Integer<SIMD_T>(&vYi)[3])
+INLINE static void
+FPToFixedPoint(const Vec4<SIMD_T>* const tri, Integer<SIMD_T> (&vXi)[3], Integer<SIMD_T> (&vYi)[3])
{
vXi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].x);
vYi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].y);
/// @param vX: fixed point X position for triangle verts
/// @param vY: fixed point Y position for triangle verts
/// @param bbox: fixed point bbox
-/// *Note*: expects vX, vY to be in the correct precision for the type
+/// *Note*: expects vX, vY to be in the correct precision for the type
/// of rasterization. This avoids unnecessary FP->fixed conversions.
template <typename SIMD_T, typename CT>
-INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T>(&vX)[3], const Integer<SIMD_T>(&vY)[3], SIMDBBOX_T<SIMD_T> &bbox)
+INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T> (&vX)[3],
+ const Integer<SIMD_T> (&vY)[3],
+ SIMDBBOX_T<SIMD_T>& bbox)
{
Integer<SIMD_T> vMinX = vX[0];
if (CT::BoundingBoxOffsetT::value != 0)
{
- /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
- /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
+ /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative
+ /// rasterization expand bbox by 1/256; coverage will be correctly handled in the
+ /// rasterizer.
const Integer<SIMD_T> value = SIMD_T::set1_epi32(CT::BoundingBoxOffsetT::value);
/// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
//
/// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
-static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
- simdscalari &scisXmin, simdscalari &scisYmin, simdscalari &scisXmax, simdscalari &scisYmax)
+static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
+ const uint32_t* pViewportIndex,
+ simdscalari& scisXmin,
+ simdscalari& scisYmin,
+ simdscalari& scisXmax,
+ simdscalari& scisYmax)
{
- scisXmin = _simd_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[7]].xmin,
- pScissorsInFixedPoint[pViewportIndex[6]].xmin,
- pScissorsInFixedPoint[pViewportIndex[5]].xmin,
- pScissorsInFixedPoint[pViewportIndex[4]].xmin,
- pScissorsInFixedPoint[pViewportIndex[3]].xmin,
- pScissorsInFixedPoint[pViewportIndex[2]].xmin,
- pScissorsInFixedPoint[pViewportIndex[1]].xmin,
- pScissorsInFixedPoint[pViewportIndex[0]].xmin);
- scisYmin = _simd_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[7]].ymin,
- pScissorsInFixedPoint[pViewportIndex[6]].ymin,
- pScissorsInFixedPoint[pViewportIndex[5]].ymin,
- pScissorsInFixedPoint[pViewportIndex[4]].ymin,
- pScissorsInFixedPoint[pViewportIndex[3]].ymin,
- pScissorsInFixedPoint[pViewportIndex[2]].ymin,
- pScissorsInFixedPoint[pViewportIndex[1]].ymin,
- pScissorsInFixedPoint[pViewportIndex[0]].ymin);
- scisXmax = _simd_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[7]].xmax,
- pScissorsInFixedPoint[pViewportIndex[6]].xmax,
- pScissorsInFixedPoint[pViewportIndex[5]].xmax,
- pScissorsInFixedPoint[pViewportIndex[4]].xmax,
- pScissorsInFixedPoint[pViewportIndex[3]].xmax,
- pScissorsInFixedPoint[pViewportIndex[2]].xmax,
- pScissorsInFixedPoint[pViewportIndex[1]].xmax,
- pScissorsInFixedPoint[pViewportIndex[0]].xmax);
- scisYmax = _simd_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[7]].ymax,
- pScissorsInFixedPoint[pViewportIndex[6]].ymax,
- pScissorsInFixedPoint[pViewportIndex[5]].ymax,
- pScissorsInFixedPoint[pViewportIndex[4]].ymax,
- pScissorsInFixedPoint[pViewportIndex[3]].ymax,
- pScissorsInFixedPoint[pViewportIndex[2]].ymax,
- pScissorsInFixedPoint[pViewportIndex[1]].ymax,
- pScissorsInFixedPoint[pViewportIndex[0]].ymax);
+ scisXmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[6]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[5]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[4]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[3]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[2]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[1]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[0]].xmin);
+ scisYmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[6]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[5]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[4]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[3]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[2]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[1]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[0]].ymin);
+ scisXmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[6]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[5]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[4]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[3]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[2]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[1]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[0]].xmax);
+ scisYmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[6]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[5]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[4]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[3]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[2]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[1]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[0]].ymax);
}
-static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
- simd16scalari &scisXmin, simd16scalari &scisYmin, simd16scalari &scisXmax, simd16scalari &scisYmax)
+static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
+ const uint32_t* pViewportIndex,
+ simd16scalari& scisXmin,
+ simd16scalari& scisYmin,
+ simd16scalari& scisXmax,
+ simd16scalari& scisYmax)
{
- scisXmin = _simd16_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[15]].xmin,
- pScissorsInFixedPoint[pViewportIndex[14]].xmin,
- pScissorsInFixedPoint[pViewportIndex[13]].xmin,
- pScissorsInFixedPoint[pViewportIndex[12]].xmin,
- pScissorsInFixedPoint[pViewportIndex[11]].xmin,
- pScissorsInFixedPoint[pViewportIndex[10]].xmin,
- pScissorsInFixedPoint[pViewportIndex[9]].xmin,
- pScissorsInFixedPoint[pViewportIndex[8]].xmin,
- pScissorsInFixedPoint[pViewportIndex[7]].xmin,
- pScissorsInFixedPoint[pViewportIndex[6]].xmin,
- pScissorsInFixedPoint[pViewportIndex[5]].xmin,
- pScissorsInFixedPoint[pViewportIndex[4]].xmin,
- pScissorsInFixedPoint[pViewportIndex[3]].xmin,
- pScissorsInFixedPoint[pViewportIndex[2]].xmin,
- pScissorsInFixedPoint[pViewportIndex[1]].xmin,
- pScissorsInFixedPoint[pViewportIndex[0]].xmin);
-
- scisYmin = _simd16_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[15]].ymin,
- pScissorsInFixedPoint[pViewportIndex[14]].ymin,
- pScissorsInFixedPoint[pViewportIndex[13]].ymin,
- pScissorsInFixedPoint[pViewportIndex[12]].ymin,
- pScissorsInFixedPoint[pViewportIndex[11]].ymin,
- pScissorsInFixedPoint[pViewportIndex[10]].ymin,
- pScissorsInFixedPoint[pViewportIndex[9]].ymin,
- pScissorsInFixedPoint[pViewportIndex[8]].ymin,
- pScissorsInFixedPoint[pViewportIndex[7]].ymin,
- pScissorsInFixedPoint[pViewportIndex[6]].ymin,
- pScissorsInFixedPoint[pViewportIndex[5]].ymin,
- pScissorsInFixedPoint[pViewportIndex[4]].ymin,
- pScissorsInFixedPoint[pViewportIndex[3]].ymin,
- pScissorsInFixedPoint[pViewportIndex[2]].ymin,
- pScissorsInFixedPoint[pViewportIndex[1]].ymin,
- pScissorsInFixedPoint[pViewportIndex[0]].ymin);
-
- scisXmax = _simd16_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[15]].xmax,
- pScissorsInFixedPoint[pViewportIndex[14]].xmax,
- pScissorsInFixedPoint[pViewportIndex[13]].xmax,
- pScissorsInFixedPoint[pViewportIndex[12]].xmax,
- pScissorsInFixedPoint[pViewportIndex[11]].xmax,
- pScissorsInFixedPoint[pViewportIndex[10]].xmax,
- pScissorsInFixedPoint[pViewportIndex[9]].xmax,
- pScissorsInFixedPoint[pViewportIndex[8]].xmax,
- pScissorsInFixedPoint[pViewportIndex[7]].xmax,
- pScissorsInFixedPoint[pViewportIndex[6]].xmax,
- pScissorsInFixedPoint[pViewportIndex[5]].xmax,
- pScissorsInFixedPoint[pViewportIndex[4]].xmax,
- pScissorsInFixedPoint[pViewportIndex[3]].xmax,
- pScissorsInFixedPoint[pViewportIndex[2]].xmax,
- pScissorsInFixedPoint[pViewportIndex[1]].xmax,
- pScissorsInFixedPoint[pViewportIndex[0]].xmax);
-
- scisYmax = _simd16_set_epi32(
- pScissorsInFixedPoint[pViewportIndex[15]].ymax,
- pScissorsInFixedPoint[pViewportIndex[14]].ymax,
- pScissorsInFixedPoint[pViewportIndex[13]].ymax,
- pScissorsInFixedPoint[pViewportIndex[12]].ymax,
- pScissorsInFixedPoint[pViewportIndex[11]].ymax,
- pScissorsInFixedPoint[pViewportIndex[10]].ymax,
- pScissorsInFixedPoint[pViewportIndex[9]].ymax,
- pScissorsInFixedPoint[pViewportIndex[8]].ymax,
- pScissorsInFixedPoint[pViewportIndex[7]].ymax,
- pScissorsInFixedPoint[pViewportIndex[6]].ymax,
- pScissorsInFixedPoint[pViewportIndex[5]].ymax,
- pScissorsInFixedPoint[pViewportIndex[4]].ymax,
- pScissorsInFixedPoint[pViewportIndex[3]].ymax,
- pScissorsInFixedPoint[pViewportIndex[2]].ymax,
- pScissorsInFixedPoint[pViewportIndex[1]].ymax,
- pScissorsInFixedPoint[pViewportIndex[0]].ymax);
+ scisXmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[14]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[13]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[12]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[11]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[10]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[9]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[8]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[7]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[6]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[5]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[4]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[3]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[2]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[1]].xmin,
+ pScissorsInFixedPoint[pViewportIndex[0]].xmin);
+
+ scisYmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[14]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[13]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[12]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[11]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[10]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[9]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[8]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[7]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[6]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[5]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[4]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[3]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[2]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[1]].ymin,
+ pScissorsInFixedPoint[pViewportIndex[0]].ymin);
+
+ scisXmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[14]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[13]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[12]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[11]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[10]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[9]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[8]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[7]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[6]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[5]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[4]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[3]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[2]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[1]].xmax,
+ pScissorsInFixedPoint[pViewportIndex[0]].xmax);
+
+ scisYmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[14]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[13]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[12]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[11]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[10]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[9]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[8]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[7]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[6]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[5]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[4]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[3]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[2]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[1]].ymax,
+ pScissorsInFixedPoint[pViewportIndex[0]].ymax);
}
\ No newline at end of file
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file blend.cpp
-*
-* @brief Implementation for blending operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file blend.cpp
+ *
+ * @brief Implementation for blending operations.
+ *
+ ******************************************************************************/
#include "state.h"
-template<bool Color, bool Alpha>
-INLINE
-void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdvector &src, simdvector &src1, simdvector &dst, simdvector &out)
+template <bool Color, bool Alpha>
+INLINE void GenerateBlendFactor(SWR_BLEND_FACTOR func,
+ simdvector& constantColor,
+ simdvector& src,
+ simdvector& src1,
+ simdvector& dst,
+ simdvector& out)
{
simdvector result;
switch (func)
{
- case BLENDFACTOR_ZERO:
+ case BLENDFACTOR_ZERO:
result.x = _simd_setzero_ps();
result.y = _simd_setzero_ps();
result.z = _simd_setzero_ps();
result.w = _simd_setzero_ps();
break;
- case BLENDFACTOR_ONE:
+ case BLENDFACTOR_ONE:
result.x = _simd_set1_ps(1.0);
result.y = _simd_set1_ps(1.0);
result.z = _simd_set1_ps(1.0);
result.w = _simd_set1_ps(1.0);
break;
- case BLENDFACTOR_SRC_COLOR:
+ case BLENDFACTOR_SRC_COLOR:
result = src;
break;
- case BLENDFACTOR_DST_COLOR:
+ case BLENDFACTOR_DST_COLOR:
result = dst;
break;
- case BLENDFACTOR_INV_SRC_COLOR:
+ case BLENDFACTOR_INV_SRC_COLOR:
result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
break;
- case BLENDFACTOR_INV_DST_COLOR:
+ case BLENDFACTOR_INV_DST_COLOR:
result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
break;
- case BLENDFACTOR_SRC_ALPHA: result.x = src.w;
+ case BLENDFACTOR_SRC_ALPHA:
+ result.x = src.w;
result.y = src.w;
result.z = src.w;
result.w = src.w;
case BLENDFACTOR_INV_SRC_ALPHA:
{
simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
- result.x = oneMinusSrcA;
- result.y = oneMinusSrcA;
- result.z = oneMinusSrcA;
- result.w = oneMinusSrcA;
+ result.x = oneMinusSrcA;
+ result.y = oneMinusSrcA;
+ result.z = oneMinusSrcA;
+ result.w = oneMinusSrcA;
break;
}
- case BLENDFACTOR_DST_ALPHA: result.x = dst.w;
+ case BLENDFACTOR_DST_ALPHA:
+ result.x = dst.w;
result.y = dst.w;
result.z = dst.w;
result.w = dst.w;
case BLENDFACTOR_INV_DST_ALPHA:
{
simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
- result.x = oneMinusDstA;
- result.y = oneMinusDstA;
- result.z = oneMinusDstA;
- result.w = oneMinusDstA;
+ result.x = oneMinusDstA;
+ result.y = oneMinusDstA;
+ result.z = oneMinusDstA;
+ result.w = oneMinusDstA;
break;
}
case BLENDFACTOR_SRC_ALPHA_SATURATE:
{
simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
- result.x = sat;
- result.y = sat;
- result.z = sat;
- result.w = _simd_set1_ps(1.0);
+ result.x = sat;
+ result.y = sat;
+ result.z = sat;
+ result.w = _simd_set1_ps(1.0);
break;
}
case BLENDFACTOR_INV_CONST_ALPHA:
{
- result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
+ result.x = result.y = result.z = result.w =
+ _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
break;
}
result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
break;
- default: SWR_INVALID("Unimplemented blend factor: %d", func);
+ default:
+ SWR_INVALID("Unimplemented blend factor: %d", func);
}
if (Color)
{
out.w = result.w;
}
-
}
-template<bool Color, bool Alpha>
-INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFactor, simdvector &dst, simdvector &dstFactor, simdvector &out)
+template <bool Color, bool Alpha>
+INLINE void BlendFunc(SWR_BLEND_OP blendOp,
+ simdvector& src,
+ simdvector& srcFactor,
+ simdvector& dst,
+ simdvector& dstFactor,
+ simdvector& out)
{
simdvector result;
result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
break;
-
+
case BLENDOP_MIN:
result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
break;
-
+
case BLENDOP_MAX:
result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
break;
-
+
default:
SWR_INVALID("Unimplemented blend function: %d", blendOp);
}
}
}
-template<SWR_TYPE type>
-INLINE void Clamp(simdvector &src)
+template <SWR_TYPE type>
+INLINE void Clamp(simdvector& src)
{
switch (type)
{
}
}
-template<SWR_TYPE type>
-void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector &src, simdvector& src1, uint8_t *pDst, simdvector &result)
+template <SWR_TYPE type>
+void Blend(const SWR_BLEND_STATE* pBlendState,
+ const SWR_RENDER_TARGET_BLEND_STATE* pState,
+ simdvector& src,
+ simdvector& src1,
+ uint8_t* pDst,
+ simdvector& result)
{
// load render target
simdvector dst;
simdvector srcFactor, dstFactor;
if (pBlendState->independentAlphaBlendEnable)
{
- GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor, constColor, src, src1, dst, srcFactor);
-
- GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
- GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
-
- BlendFunc<true, false>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
- BlendFunc<false, true>((SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
+ GenerateBlendFactor<true, false>(
+ (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
+ GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor,
+ constColor,
+ src,
+ src1,
+ dst,
+ srcFactor);
+
+ GenerateBlendFactor<true, false>(
+ (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
+ GenerateBlendFactor<false, true>(
+ (SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
+
+ BlendFunc<true, false>(
+ (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+ BlendFunc<false, true>(
+ (SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
}
else
{
- GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
+ GenerateBlendFactor<true, true>(
+ (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
+ GenerateBlendFactor<true, true>(
+ (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
- BlendFunc<true, true>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+ BlendFunc<true, true>(
+ (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
}
}
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file clip.cpp
-*
-* @brief Implementation for clipping
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file clip.cpp
+ *
+ * @brief Implementation for clipping
+ *
+ ******************************************************************************/
#include <assert.h>
return (boundaryCoord0 / (boundaryCoord0 - boundaryCoord1));
}
-template<SWR_CLIPCODES ClippingPlane>
+template <SWR_CLIPCODES ClippingPlane>
inline void intersect(
- int s, // index to first edge vertex v0 in pInPts.
- int p, // index to second edge vertex v1 in pInPts.
- const float *pInPts, // array of all the input positions.
- const float *pInAttribs, // array of all attributes for all vertex. All the attributes for each vertex is contiguous.
- int numInAttribs, // number of attributes per vertex.
- int i, // output index.
- float *pOutPts, // array of output positions. We'll write our new intersection point at i*4.
- float *pOutAttribs) // array of output attributes. We'll write our new attributes at i*numInAttribs.
+ int s, // index to first edge vertex v0 in pInPts.
+ int p, // index to second edge vertex v1 in pInPts.
+ const float* pInPts, // array of all the input positions.
+ const float* pInAttribs, // array of all attributes for all vertex. All the attributes for each
+ // vertex is contiguous.
+ int numInAttribs, // number of attributes per vertex.
+ int i, // output index.
+ float* pOutPts, // array of output positions. We'll write our new intersection point at i*4.
+ float* pOutAttribs) // array of output attributes. We'll write our new attributes at
+ // i*numInAttribs.
{
float t;
// Find the parameter of the intersection.
// t = (v1.w - v1.x) / ((v2.x - v1.x) - (v2.w - v1.w)) for x = w (RIGHT) plane, etc.
- const float *v1 = &pInPts[s*4];
- const float *v2 = &pInPts[p*4];
+ const float* v1 = &pInPts[s * 4];
+ const float* v2 = &pInPts[p * 4];
switch (ClippingPlane)
{
- case FRUSTUM_LEFT: t = ComputeInterpFactor(v1[3] + v1[0], v2[3] + v2[0]); break;
- case FRUSTUM_RIGHT: t = ComputeInterpFactor(v1[3] - v1[0], v2[3] - v2[0]); break;
- case FRUSTUM_TOP: t = ComputeInterpFactor(v1[3] + v1[1], v2[3] + v2[1]); break;
- case FRUSTUM_BOTTOM: t = ComputeInterpFactor(v1[3] - v1[1], v2[3] - v2[1]); break;
- case FRUSTUM_NEAR: t = ComputeInterpFactor(v1[2], v2[2]); break;
- case FRUSTUM_FAR: t = ComputeInterpFactor(v1[3] - v1[2], v2[3] - v2[2]); break;
- default: SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
+ case FRUSTUM_LEFT:
+ t = ComputeInterpFactor(v1[3] + v1[0], v2[3] + v2[0]);
+ break;
+ case FRUSTUM_RIGHT:
+ t = ComputeInterpFactor(v1[3] - v1[0], v2[3] - v2[0]);
+ break;
+ case FRUSTUM_TOP:
+ t = ComputeInterpFactor(v1[3] + v1[1], v2[3] + v2[1]);
+ break;
+ case FRUSTUM_BOTTOM:
+ t = ComputeInterpFactor(v1[3] - v1[1], v2[3] - v2[1]);
+ break;
+ case FRUSTUM_NEAR:
+ t = ComputeInterpFactor(v1[2], v2[2]);
+ break;
+ case FRUSTUM_FAR:
+ t = ComputeInterpFactor(v1[3] - v1[2], v2[3] - v2[2]);
+ break;
+ default:
+ SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
};
+ const float* a1 = &pInAttribs[s * numInAttribs];
+ const float* a2 = &pInAttribs[p * numInAttribs];
- const float *a1 = &pInAttribs[s*numInAttribs];
- const float *a2 = &pInAttribs[p*numInAttribs];
-
- float *pOutP = &pOutPts[i*4];
- float *pOutA = &pOutAttribs[i*numInAttribs];
+ float* pOutP = &pOutPts[i * 4];
+ float* pOutA = &pOutAttribs[i * numInAttribs];
// Interpolate new position.
- for(int j = 0; j < 4; ++j)
+ for (int j = 0; j < 4; ++j)
{
- pOutP[j] = v1[j] + (v2[j]-v1[j])*t;
+ pOutP[j] = v1[j] + (v2[j] - v1[j]) * t;
}
// Interpolate Attributes
- for(int attr = 0; attr < numInAttribs; ++attr)
+ for (int attr = 0; attr < numInAttribs; ++attr)
{
- pOutA[attr] = a1[attr] + (a2[attr]-a1[attr])*t;
+ pOutA[attr] = a1[attr] + (a2[attr] - a1[attr]) * t;
}
}
-
// Checks whether vertex v lies inside clipping plane
// in homogenous coords check -w < {x,y,z} < w;
//
-template<SWR_CLIPCODES ClippingPlane>
+template <SWR_CLIPCODES ClippingPlane>
inline int inside(const float v[4])
{
switch (ClippingPlane)
{
- case FRUSTUM_LEFT : return (v[0]>=-v[3]);
- case FRUSTUM_RIGHT : return (v[0]<= v[3]);
- case FRUSTUM_TOP : return (v[1]>=-v[3]);
- case FRUSTUM_BOTTOM : return (v[1]<= v[3]);
- case FRUSTUM_NEAR : return (v[2]>=0.0f);
- case FRUSTUM_FAR : return (v[2]<= v[3]);
+ case FRUSTUM_LEFT:
+ return (v[0] >= -v[3]);
+ case FRUSTUM_RIGHT:
+ return (v[0] <= v[3]);
+ case FRUSTUM_TOP:
+ return (v[1] >= -v[3]);
+ case FRUSTUM_BOTTOM:
+ return (v[1] <= v[3]);
+ case FRUSTUM_NEAR:
+ return (v[2] >= 0.0f);
+ case FRUSTUM_FAR:
+ return (v[2] <= v[3]);
default:
SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
return 0;
}
}
-
// Clips a polygon in homogenous coordinates to a particular clipping plane.
// Takes in vertices of the polygon (InPts) and the clipping plane
// Puts the vertices of the clipped polygon in OutPts
// Returns number of points in clipped polygon
//
-template<SWR_CLIPCODES ClippingPlane>
-int ClipTriToPlane( const float *pInPts, int numInPts,
- const float *pInAttribs, int numInAttribs,
- float *pOutPts, float *pOutAttribs)
+template <SWR_CLIPCODES ClippingPlane>
+int ClipTriToPlane(const float* pInPts,
+ int numInPts,
+ const float* pInAttribs,
+ int numInAttribs,
+ float* pOutPts,
+ float* pOutAttribs)
{
- int i=0; // index number of OutPts, # of vertices in OutPts = i div 4;
+ int i = 0; // index number of OutPts, # of vertices in OutPts = i div 4;
for (int j = 0; j < numInPts; ++j)
{
int s = j;
int p = (j + 1) % numInPts;
- int s_in = inside<ClippingPlane>(&pInPts[s*4]);
- int p_in = inside<ClippingPlane>(&pInPts[p*4]);
+ int s_in = inside<ClippingPlane>(&pInPts[s * 4]);
+ int p_in = inside<ClippingPlane>(&pInPts[p * 4]);
// test if vertex is to be added to output vertices
- if (s_in != p_in) // edge crosses clipping plane
+ if (s_in != p_in) // edge crosses clipping plane
{
// find point of intersection
- intersect<ClippingPlane>(s, p, pInPts, pInAttribs, numInAttribs, i, pOutPts, pOutAttribs);
+ intersect<ClippingPlane>(
+ s, p, pInPts, pInAttribs, numInAttribs, i, pOutPts, pOutAttribs);
i++;
}
if (p_in) // 2nd vertex is inside clipping volume, add it to output
{
// Copy 2nd vertex position of edge over to output.
- for(int k = 0; k < 4; ++k)
+ for (int k = 0; k < 4; ++k)
{
- pOutPts[i*4 + k] = pInPts[p*4 + k];
+ pOutPts[i * 4 + k] = pInPts[p * 4 + k];
}
// Copy 2nd vertex attributes of edge over to output.
- for(int attr = 0; attr < numInAttribs; ++attr)
+ for (int attr = 0; attr < numInAttribs; ++attr)
{
- pOutAttribs[i*numInAttribs+attr] = pInAttribs[p*numInAttribs+attr];
+ pOutAttribs[i * numInAttribs + attr] = pInAttribs[p * numInAttribs + attr];
}
i++;
}
return i;
}
-void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
- simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipRectangles(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
Clipper<SIMD256, 3> clipper(workerId, pDC);
RDTSC_END(FEClipRectangles, 1);
}
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
- simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipTriangles(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
Clipper<SIMD256, 3> clipper(workerId, pDC);
RDTSC_END(FEClipTriangles, 1);
}
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
- simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipLines(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipLines, pDC->drawId);
Clipper<SIMD256, 2> clipper(workerId, pDC);
RDTSC_END(FEClipLines, 1);
}
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
- simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipPoints(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx)
{
RDTSC_BEGIN(FEClipPoints, pDC->drawId);
Clipper<SIMD256, 1> clipper(workerId, pDC);
}
#if USE_SIMD16_FRONTEND
-void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
- simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
- enum { VERTS_PER_PRIM = 3 };
+ enum
+ {
+ VERTS_PER_PRIM = 3
+ };
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
RDTSC_END(FEClipRectangles, 1);
}
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
- simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
- enum { VERTS_PER_PRIM = 3 };
+ enum
+ {
+ VERTS_PER_PRIM = 3
+ };
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
RDTSC_END(FEClipTriangles, 1);
}
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
- simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipLines, pDC->drawId);
- enum { VERTS_PER_PRIM = 2 };
+ enum
+ {
+ VERTS_PER_PRIM = 2
+ };
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
RDTSC_END(FEClipLines, 1);
}
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
- simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx)
{
RDTSC_BEGIN(FEClipPoints, pDC->drawId);
- enum { VERTS_PER_PRIM = 1 };
+ enum
+ {
+ VERTS_PER_PRIM = 1
+ };
Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file clip.h
-*
-* @brief Definitions for clipping
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file clip.h
+ *
+ * @brief Definitions for clipping
+ *
+ ******************************************************************************/
#pragma once
#include "common/simdintrin.h"
enum SWR_CLIPCODES
{
- // Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
- // Guardband is able to use a single high-bit with 4 separate LSBs, because it computes a union, rather than intersection, of clipcodes.
+// Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
+// Guardband is able to use a single high-bit with 4 separate LSBs, because it computes a union,
+// rather than intersection, of clipcodes.
#define CLIPCODE_SHIFT 23
- FRUSTUM_LEFT = (0x01 << CLIPCODE_SHIFT),
- FRUSTUM_TOP = (0x02 << CLIPCODE_SHIFT),
- FRUSTUM_RIGHT = (0x04 << CLIPCODE_SHIFT),
- FRUSTUM_BOTTOM = (0x08 << CLIPCODE_SHIFT),
+ FRUSTUM_LEFT = (0x01 << CLIPCODE_SHIFT),
+ FRUSTUM_TOP = (0x02 << CLIPCODE_SHIFT),
+ FRUSTUM_RIGHT = (0x04 << CLIPCODE_SHIFT),
+ FRUSTUM_BOTTOM = (0x08 << CLIPCODE_SHIFT),
- FRUSTUM_NEAR = (0x10 << CLIPCODE_SHIFT),
- FRUSTUM_FAR = (0x20 << CLIPCODE_SHIFT),
+ FRUSTUM_NEAR = (0x10 << CLIPCODE_SHIFT),
+ FRUSTUM_FAR = (0x20 << CLIPCODE_SHIFT),
- NEGW = (0x40 << CLIPCODE_SHIFT),
+ NEGW = (0x40 << CLIPCODE_SHIFT),
GUARDBAND_LEFT = (0x80 << CLIPCODE_SHIFT | 0x1),
GUARDBAND_TOP = (0x80 << CLIPCODE_SHIFT | 0x2),
GUARDBAND_BOTTOM = (0x80 << CLIPCODE_SHIFT | 0x8)
};
-#define GUARDBAND_CLIP_MASK (FRUSTUM_NEAR|FRUSTUM_FAR|GUARDBAND_LEFT|GUARDBAND_TOP|GUARDBAND_RIGHT|GUARDBAND_BOTTOM|NEGW)
-#define FRUSTUM_CLIP_MASK (FRUSTUM_NEAR|FRUSTUM_FAR|FRUSTUM_LEFT|FRUSTUM_RIGHT|FRUSTUM_TOP|FRUSTUM_BOTTOM)
-
-template<typename SIMD_T>
-void ComputeClipCodes(const API_STATE &state, const Vec4<SIMD_T> &vertex, Float<SIMD_T> &clipCodes, Integer<SIMD_T> const &viewportIndexes)
+#define GUARDBAND_CLIP_MASK \
+ (FRUSTUM_NEAR | FRUSTUM_FAR | GUARDBAND_LEFT | GUARDBAND_TOP | GUARDBAND_RIGHT | \
+ GUARDBAND_BOTTOM | NEGW)
+#define FRUSTUM_CLIP_MASK \
+ (FRUSTUM_NEAR | FRUSTUM_FAR | FRUSTUM_LEFT | FRUSTUM_RIGHT | FRUSTUM_TOP | FRUSTUM_BOTTOM)
+
+template <typename SIMD_T>
+void ComputeClipCodes(const API_STATE& state,
+ const Vec4<SIMD_T>& vertex,
+ Float<SIMD_T>& clipCodes,
+ Integer<SIMD_T> const& viewportIndexes)
{
clipCodes = SIMD_T::setzero_ps();
// -w
- Float<SIMD_T> vNegW = SIMD_T::mul_ps(vertex.w,SIMD_T::set1_ps(-1.0f));
+ Float<SIMD_T> vNegW = SIMD_T::mul_ps(vertex.w, SIMD_T::set1_ps(-1.0f));
// FRUSTUM_LEFT
Float<SIMD_T> vRes = SIMD_T::cmplt_ps(vertex.x, vNegW);
- clipCodes = SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_LEFT)));
+ clipCodes = SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_LEFT)));
// FRUSTUM_TOP
- vRes = SIMD_T::cmplt_ps(vertex.y, vNegW);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_TOP))));
+ vRes = SIMD_T::cmplt_ps(vertex.y, vNegW);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_TOP))));
// FRUSTUM_RIGHT
- vRes = SIMD_T::cmpgt_ps(vertex.x, vertex.w);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_RIGHT))));
+ vRes = SIMD_T::cmpgt_ps(vertex.x, vertex.w);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_RIGHT))));
// FRUSTUM_BOTTOM
- vRes = SIMD_T::cmpgt_ps(vertex.y, vertex.w);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_BOTTOM))));
+ vRes = SIMD_T::cmpgt_ps(vertex.y, vertex.w);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_BOTTOM))));
if (state.rastState.depthClipEnable)
{
{
vRes = SIMD_T::cmplt_ps(vertex.z, vNegW);
}
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_NEAR))));
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_NEAR))));
// FRUSTUM_FAR
- vRes = SIMD_T::cmpgt_ps(vertex.z, vertex.w);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_FAR))));
+ vRes = SIMD_T::cmpgt_ps(vertex.z, vertex.w);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_FAR))));
}
// NEGW
vRes = SIMD_T::cmple_ps(vertex.w, SIMD_T::setzero_ps());
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(NEGW))));
+ clipCodes =
+ SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(NEGW))));
// GUARDBAND_LEFT
- Float<SIMD_T> gbMult = SIMD_T::mul_ps(vNegW, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.left[0], viewportIndexes));
- vRes = SIMD_T::cmplt_ps(vertex.x, gbMult);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_LEFT))));
+ Float<SIMD_T> gbMult = SIMD_T::mul_ps(vNegW,
+ SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+ &state.gbState.left[0], viewportIndexes));
+ vRes = SIMD_T::cmplt_ps(vertex.x, gbMult);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_LEFT))));
// GUARDBAND_TOP
- gbMult = SIMD_T::mul_ps(vNegW, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.top[0], viewportIndexes));
- vRes = SIMD_T::cmplt_ps(vertex.y, gbMult);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_TOP))));
+ gbMult = SIMD_T::mul_ps(vNegW,
+ SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+ &state.gbState.top[0], viewportIndexes));
+ vRes = SIMD_T::cmplt_ps(vertex.y, gbMult);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_TOP))));
// GUARDBAND_RIGHT
- gbMult = SIMD_T::mul_ps(vertex.w, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.right[0], viewportIndexes));
- vRes = SIMD_T::cmpgt_ps(vertex.x, gbMult);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_RIGHT))));
+ gbMult = SIMD_T::mul_ps(vertex.w,
+ SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+ &state.gbState.right[0], viewportIndexes));
+ vRes = SIMD_T::cmpgt_ps(vertex.x, gbMult);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_RIGHT))));
// GUARDBAND_BOTTOM
- gbMult = SIMD_T::mul_ps(vertex.w, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.bottom[0], viewportIndexes));
- vRes = SIMD_T::cmpgt_ps(vertex.y, gbMult);
- clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_BOTTOM))));
+ gbMult = SIMD_T::mul_ps(vertex.w,
+ SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+ &state.gbState.bottom[0], viewportIndexes));
+ vRes = SIMD_T::cmpgt_ps(vertex.y, gbMult);
+ clipCodes = SIMD_T::or_ps(
+ clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_BOTTOM))));
}
-template<typename SIMD_T>
+template <typename SIMD_T>
struct BinnerChooser
{
};
-template<>
+template <>
struct BinnerChooser<SIMD256>
{
PFN_PROCESS_PRIMS pfnBinFunc;
BinnerChooser(uint32_t numVertsPerPrim, uint32_t conservativeRast)
- :pfnBinFunc(nullptr)
+ :
+ pfnBinFunc(nullptr)
{
if (numVertsPerPrim == 3)
{
}
BinnerChooser(PRIMITIVE_TOPOLOGY topology, uint32_t conservativeRast)
- :pfnBinFunc(nullptr)
+ :
+ pfnBinFunc(nullptr)
{
switch (topology)
{
};
}
- void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx, SIMD256::Integer &rtIdx)
+ void BinFunc(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ SIMD256::Vec4 prims[],
+ uint32_t primMask,
+ SIMD256::Integer const& primID,
+ SIMD256::Integer& viewportIdx,
+ SIMD256::Integer& rtIdx)
{
SWR_ASSERT(pfnBinFunc != nullptr);
};
#if USE_SIMD16_FRONTEND
-template<>
+template <>
struct BinnerChooser<SIMD512>
{
PFN_PROCESS_PRIMS_SIMD16 pfnBinFunc;
BinnerChooser(uint32_t numVertsPerPrim, uint32_t conservativeRast)
- :pfnBinFunc(nullptr)
+ :
+ pfnBinFunc(nullptr)
{
if (numVertsPerPrim == 3)
{
}
BinnerChooser(PRIMITIVE_TOPOLOGY topology, uint32_t conservativeRast)
- :pfnBinFunc(nullptr)
+ :
+ pfnBinFunc(nullptr)
{
switch (topology)
{
};
}
- void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx, SIMD512::Integer &rtIdx)
+ void BinFunc(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ SIMD512::Vec4 prims[],
+ uint32_t primMask,
+ SIMD512::Integer const& primID,
+ SIMD512::Integer& viewportIdx,
+ SIMD512::Integer& rtIdx)
{
SWR_ASSERT(pfnBinFunc != nullptr);
};
#endif
-template<typename SIMD_T>
+template <typename SIMD_T>
struct SimdHelper
{
};
-template<>
+template <>
struct SimdHelper<SIMD256>
{
- static SIMD256::Float insert_lo_ps(SIMD256::Float a)
- {
- return a;
- }
+ static SIMD256::Float insert_lo_ps(SIMD256::Float a) { return a; }
static SIMD256::Mask cmpeq_ps_mask(SIMD256::Float a, SIMD256::Float b)
{
};
#if USE_SIMD16_FRONTEND
-template<>
+template <>
struct SimdHelper<SIMD512>
{
static SIMD512::Float insert_lo_ps(SIMD256::Float a)
#endif
// Temp storage used by the clipper
-template<typename SIMD_T>
+template <typename SIMD_T>
struct ClipHelper
{
};
-template<>
+template <>
struct ClipHelper<SIMD256>
{
- static SIMDVERTEX_T<SIMD256> *GetTempVertices()
- {
- return tlsTempVertices;
- }
+ static SIMDVERTEX_T<SIMD256>* GetTempVertices() { return tlsTempVertices; }
};
#if USE_SIMD16_FRONTEND
-template<>
+template <>
struct ClipHelper<SIMD512>
{
- static SIMDVERTEX_T<SIMD512> *GetTempVertices()
- {
- return tlsTempVertices_simd16;
- }
+ static SIMDVERTEX_T<SIMD512>* GetTempVertices() { return tlsTempVertices_simd16; }
};
#endif
-template<typename SIMD_T, uint32_t NumVertsPerPrim>
+template <typename SIMD_T, uint32_t NumVertsPerPrim>
class Clipper
{
public:
static_assert(NumVertsPerPrim >= 1 && NumVertsPerPrim <= 3, "Invalid NumVertsPerPrim");
}
- void ComputeClipCodes(Vec4<SIMD_T> vertex[], const Integer<SIMD_T> &viewportIndexes)
+ void ComputeClipCodes(Vec4<SIMD_T> vertex[], const Integer<SIMD_T>& viewportIndexes)
{
for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
{
{
Float<SIMD_T> clipUnion = ComputeClipCodeUnion();
- clipUnion = SIMD_T::and_ps(clipUnion, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_CLIP_MASK)));
+ clipUnion =
+ SIMD_T::and_ps(clipUnion, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_CLIP_MASK)));
return SIMD_T::movemask_ps(SIMD_T::cmpneq_ps(clipUnion, SIMD_T::setzero_ps()));
}
for (uint32_t e = 0; e < NumVertsPerPrim; ++e)
{
- Float<SIMD_T> vNan01 = SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[0], prim[e].v[1]);
+ Float<SIMD_T> vNan01 =
+ SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[0], prim[e].v[1]);
vNanMask = SIMD_T::or_ps(vNanMask, vNan01);
- Float<SIMD_T> vNan23 = SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[2], prim[e].v[3]);
+ Float<SIMD_T> vNan23 =
+ SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[2], prim[e].v[3]);
vNanMask = SIMD_T::or_ps(vNanMask, vNan23);
}
return SIMD_T::movemask_ps(vNanMask);
}
- int ComputeUserClipCullMask(PA_STATE &pa, Vec4<SIMD_T> prim[])
+ int ComputeUserClipCullMask(PA_STATE& pa, Vec4<SIMD_T> prim[])
{
- uint8_t cullMask = state.backendState.cullDistanceMask;
+ uint8_t cullMask = state.backendState.cullDistanceMask;
uint32_t vertexClipCullOffset = state.backendState.vertexClipCullOffset;
Float<SIMD_T> vClipCullMask = SIMD_T::setzero_ps();
while (_BitScanForward(&index, cullMask))
{
cullMask &= ~(1 << index);
- uint32_t slot = index >> 2;
+ uint32_t slot = index >> 2;
uint32_t component = index & 0x3;
Float<SIMD_T> vCullMaskElem = SIMD_T::set1_ps(-1.0f);
}
// cull if cull distance < 0 || NAN
- Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(SIMD_T::setzero_ps(), vCullComp);
+ Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(
+ SIMD_T::setzero_ps(), vCullComp);
vCullMaskElem = SIMD_T::and_ps(vCullMaskElem, vCull);
}
vClipCullMask = SIMD_T::or_ps(vClipCullMask, vCullMaskElem);
while (_BitScanForward(&index, clipMask))
{
clipMask &= ~(1 << index);
- uint32_t slot = index >> 2;
+ uint32_t slot = index >> 2;
uint32_t component = index & 0x3;
Float<SIMD_T> vCullMaskElem = SIMD_T::set1_ps(-1.0f);
vClipComp = vClipCullDistHi[e][component];
}
- Float<SIMD_T> vClip = SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(vClipComp, vClipComp);
- Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(SIMD_T::setzero_ps(), vClipComp);
+ Float<SIMD_T> vClip =
+ SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(vClipComp, vClipComp);
+ Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(
+ SIMD_T::setzero_ps(), vClipComp);
vCullMaskElem = SIMD_T::and_ps(vCullMaskElem, vCull);
vClipCullMask = SIMD_T::or_ps(vClipCullMask, vClip);
}
return SIMD_T::movemask_ps(vClipCullMask);
}
- void ClipSimd(const Vec4<SIMD_T> prim[], const Float<SIMD_T> &vPrimMask, const Float<SIMD_T> &vClipMask, PA_STATE &pa,
- const Integer<SIMD_T> &vPrimId, const Integer<SIMD_T> &vViewportIdx, const Integer<SIMD_T> &vRtIdx)
+ void ClipSimd(const Vec4<SIMD_T> prim[],
+ const Float<SIMD_T>& vPrimMask,
+ const Float<SIMD_T>& vClipMask,
+ PA_STATE& pa,
+ const Integer<SIMD_T>& vPrimId,
+ const Integer<SIMD_T>& vViewportIdx,
+ const Integer<SIMD_T>& vRtIdx)
{
// input/output vertex store for clipper
SIMDVERTEX_T<SIMD_T> vertices[7]; // maximum 7 verts generated per triangle
uint32_t constantInterpMask = state.backendState.constantInterpolationMask;
- uint32_t provokingVertex = 0;
+ uint32_t provokingVertex = 0;
if (pa.binTopology == TOP_TRIANGLE_FAN)
{
provokingVertex = state.frontendState.provokingVertex.triFan;
for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot)
{
// Compute absolute attrib slot in vertex array
- uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
- maxSlot = std::max<int32_t>(maxSlot, mapSlot);
+ uint32_t mapSlot =
+ backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
+ maxSlot = std::max<int32_t>(maxSlot, mapSlot);
uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot;
pa.Assemble(inputSlot, tmpVector);
uint32_t numAttribs = maxSlot + 1;
- Integer<SIMD_T> vNumClippedVerts = ClipPrims((float*)&vertices[0], vPrimMask, vClipMask, numAttribs);
+ Integer<SIMD_T> vNumClippedVerts =
+ ClipPrims((float*)&vertices[0], vPrimMask, vClipMask, numAttribs);
- BinnerChooser<SIMD_T> binner(NumVertsPerPrim, pa.pDC->pState->state.rastState.conservativeRast);
+ BinnerChooser<SIMD_T> binner(NumVertsPerPrim,
+ pa.pDC->pState->state.rastState.conservativeRast);
// set up new PA for binning clipped primitives
PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN;
SWR_ASSERT(0 && "Unexpected points in clipper.");
}
- const uint32_t *pVertexCount = reinterpret_cast<const uint32_t *>(&vNumClippedVerts);
- const uint32_t *pPrimitiveId = reinterpret_cast<const uint32_t *>(&vPrimId);
- const uint32_t *pViewportIdx = reinterpret_cast<const uint32_t *>(&vViewportIdx);
- const uint32_t *pRtIdx = reinterpret_cast<const uint32_t *>(&vRtIdx);
-
- const SIMD256::Integer vOffsets = SIMD256::set_epi32(
- 0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
- 6 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 5 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 4 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 3 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 2 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 1 * sizeof(SIMDVERTEX_T<SIMD_T>),
- 0 * sizeof(SIMDVERTEX_T<SIMD_T>));
+ const uint32_t* pVertexCount = reinterpret_cast<const uint32_t*>(&vNumClippedVerts);
+ const uint32_t* pPrimitiveId = reinterpret_cast<const uint32_t*>(&vPrimId);
+ const uint32_t* pViewportIdx = reinterpret_cast<const uint32_t*>(&vViewportIdx);
+ const uint32_t* pRtIdx = reinterpret_cast<const uint32_t*>(&vRtIdx);
+
+ const SIMD256::Integer vOffsets =
+ SIMD256::set_epi32(0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
+ 6 * sizeof(SIMDVERTEX_T<SIMD_T>),
+ 5 * sizeof(SIMDVERTEX_T<SIMD_T>),
+ 4 * sizeof(SIMDVERTEX_T<SIMD_T>),
+ 3 * sizeof(SIMDVERTEX_T<SIMD_T>),
+ 2 * sizeof(SIMDVERTEX_T<SIMD_T>),
+ 1 * sizeof(SIMDVERTEX_T<SIMD_T>),
+ 0 * sizeof(SIMDVERTEX_T<SIMD_T>));
// only need to gather 7 verts
// @todo dynamic mask based on actual # of verts generated per lane
// for triangle fan
#if defined(_DEBUG)
- // TODO: need to increase stack size, allocating SIMD16-widened transposedPrims causes stack overflow in debug builds
- SIMDVERTEX_T<SIMD_T> *transposedPrims = reinterpret_cast<SIMDVERTEX_T<SIMD_T> *>(AlignedMalloc(sizeof(SIMDVERTEX_T<SIMD_T>) * 2, 64));
+ // TODO: need to increase stack size, allocating SIMD16-widened transposedPrims causes stack
+ // overflow in debug builds
+ SIMDVERTEX_T<SIMD_T>* transposedPrims = reinterpret_cast<SIMDVERTEX_T<SIMD_T>*>(
+ AlignedMalloc(sizeof(SIMDVERTEX_T<SIMD_T>) * 2, 64));
#else
- SIMDVERTEX_T<SIMD_T> transposedPrims[2];
+ SIMDVERTEX_T<SIMD_T> transposedPrims[2];
#endif
- uint32_t numInputPrims = pa.NumPrims();
+ uint32_t numInputPrims = pa.NumPrims();
for (uint32_t inputPrim = 0; inputPrim < numInputPrims; ++inputPrim)
{
uint32_t numEmittedVerts = pVertexCount[inputPrim];
// for triangle fan
// transpose pos
- uint8_t *pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[VERTEX_POSITION_SLOT]) + sizeof(float) * inputPrim;
+ uint8_t* pBase = reinterpret_cast<uint8_t*>(&vertices[0].attrib[VERTEX_POSITION_SLOT]) +
+ sizeof(float) * inputPrim;
#if 0
// TEMPORARY WORKAROUND for bizarre VS2015 code-gen bug
#endif
for (uint32_t c = 0; c < 4; ++c)
{
- SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
- transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+ SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD256::setzero_ps(), reinterpret_cast<const float*>(pBase), vOffsets, vMask);
+ transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] =
+ SimdHelper<SIMD_T>::insert_lo_ps(temp);
pBase += sizeof(Float<SIMD_T>);
}
// transpose attribs
- pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim;
+ pBase =
+ reinterpret_cast<uint8_t*>(&vertices[0].attrib[backendState.vertexAttribOffset]) +
+ sizeof(float) * inputPrim;
for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
{
for (uint32_t c = 0; c < 4; ++c)
{
- SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
- transposedPrims[0].attrib[attribSlot][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+ SIMD256::Float temp =
+ SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD256::setzero_ps(),
+ reinterpret_cast<const float*>(pBase),
+ vOffsets,
+ vMask);
+ transposedPrims[0].attrib[attribSlot][c] =
+ SimdHelper<SIMD_T>::insert_lo_ps(temp);
pBase += sizeof(Float<SIMD_T>);
}
}
uint32_t vertexClipCullSlot = backendState.vertexClipCullOffset;
if (state.backendState.clipDistanceMask & 0x0f)
{
- pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[vertexClipCullSlot]) + sizeof(float) * inputPrim;
+ pBase = reinterpret_cast<uint8_t*>(&vertices[0].attrib[vertexClipCullSlot]) +
+ sizeof(float) * inputPrim;
for (uint32_t c = 0; c < 4; ++c)
{
- SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
- transposedPrims[0].attrib[vertexClipCullSlot][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+ SIMD256::Float temp =
+ SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD256::setzero_ps(),
+ reinterpret_cast<const float*>(pBase),
+ vOffsets,
+ vMask);
+ transposedPrims[0].attrib[vertexClipCullSlot][c] =
+ SimdHelper<SIMD_T>::insert_lo_ps(temp);
pBase += sizeof(Float<SIMD_T>);
}
}
if (state.backendState.clipDistanceMask & 0xf0)
{
- pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[vertexClipCullSlot + 1]) + sizeof(float) * inputPrim;
+ pBase = reinterpret_cast<uint8_t*>(&vertices[0].attrib[vertexClipCullSlot + 1]) +
+ sizeof(float) * inputPrim;
for (uint32_t c = 0; c < 4; ++c)
{
- SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
- transposedPrims[0].attrib[vertexClipCullSlot + 1][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+ SIMD256::Float temp =
+ SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD256::setzero_ps(),
+ reinterpret_cast<const float*>(pBase),
+ vOffsets,
+ vMask);
+ transposedPrims[0].attrib[vertexClipCullSlot + 1][c] =
+ SimdHelper<SIMD_T>::insert_lo_ps(temp);
pBase += sizeof(Float<SIMD_T>);
}
}
- PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast<uint8_t *>(&transposedPrims[0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, NumVertsPerPrim, clipTopology);
+ PA_STATE_OPT clipPA(pDC,
+ numEmittedPrims,
+ reinterpret_cast<uint8_t*>(&transposedPrims[0]),
+ numEmittedVerts,
+ SWR_VTX_NUM_SLOTS,
+ true,
+ NumVertsPerPrim,
+ clipTopology);
clipPA.viewportArrayActive = pa.viewportArrayActive;
- clipPA.rtArrayActive = pa.rtArrayActive;
+ clipPA.rtArrayActive = pa.rtArrayActive;
- static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f };
+ static const uint32_t primMaskMap[] = {0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f};
const uint32_t primMask = primMaskMap[numEmittedPrims];
- const Integer<SIMD_T> primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
+ const Integer<SIMD_T> primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
const Integer<SIMD_T> viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]);
- const Integer<SIMD_T> rtIdx = SIMD_T::set1_epi32(pRtIdx[inputPrim]);
-
+ const Integer<SIMD_T> rtIdx = SIMD_T::set1_epi32(pRtIdx[inputPrim]);
while (clipPA.GetNextStreamOutput())
{
if (assemble)
{
- binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx);
+ binner.pfnBinFunc(
+ pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx);
}
} while (clipPA.NextPrim());
UPDATE_STAT_FE(CPrimitives, numClippedPrims);
}
- void ExecuteStage(PA_STATE &pa, Vec4<SIMD_T> prim[], uint32_t primMask,
- Integer<SIMD_T> const &primId, Integer<SIMD_T> const &viewportIdx, Integer<SIMD_T> const &rtIdx)
+ void ExecuteStage(PA_STATE& pa,
+ Vec4<SIMD_T> prim[],
+ uint32_t primMask,
+ Integer<SIMD_T> const& primId,
+ Integer<SIMD_T> const& viewportIdx,
+ Integer<SIMD_T> const& rtIdx)
{
SWR_ASSERT(pa.pDC != nullptr);
- BinnerChooser<SIMD_T> binner(pa.binTopology, pa.pDC->pState->state.rastState.conservativeRast);
+ BinnerChooser<SIMD_T> binner(pa.binTopology,
+ pa.pDC->pState->state.rastState.conservativeRast);
// update clipper invocations pipeline stat
uint32_t numInvoc = _mm_popcnt_u32(primMask);
// cull prims with NAN coords
primMask &= ~ComputeNaNMask(prim);
- // user cull distance cull
+ // user cull distance cull
if (state.backendState.cullDistanceMask | state.backendState.clipDistanceMask)
{
primMask &= ~ComputeUserClipCullMask(pa, prim);
Float<SIMD_T> clipIntersection = ComputeClipCodeIntersection();
// Mask out non-frustum codes
- clipIntersection = SIMD_T::and_ps(clipIntersection, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_CLIP_MASK)));
+ clipIntersection = SIMD_T::and_ps(clipIntersection,
+ SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_CLIP_MASK)));
// cull prims outside view frustum
- int validMask = primMask & SimdHelper<SIMD_T>::cmpeq_ps_mask(clipIntersection, SIMD_T::setzero_ps());
+ int validMask =
+ primMask & SimdHelper<SIMD_T>::cmpeq_ps_mask(clipIntersection, SIMD_T::setzero_ps());
// skip clipping for points
uint32_t clipMask = 0;
RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(prim, SIMD_T::vmask_ps(validMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx);
+ ClipSimd(prim,
+ SIMD_T::vmask_ps(validMask),
+ SIMD_T::vmask_ps(clipMask),
+ pa,
+ primId,
+ viewportIdx,
+ rtIdx);
RDTSC_END(FEGuardbandClip, 1);
}
else if (validMask)
UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
// forward valid prims directly to binner
- binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx);
+ binner.pfnBinFunc(
+ this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx);
}
}
private:
- Float<SIMD_T> ComputeInterpFactor(Float<SIMD_T> const &boundaryCoord0, Float<SIMD_T> const &boundaryCoord1)
+ Float<SIMD_T> ComputeInterpFactor(Float<SIMD_T> const& boundaryCoord0,
+ Float<SIMD_T> const& boundaryCoord1)
{
return SIMD_T::div_ps(boundaryCoord0, SIMD_T::sub_ps(boundaryCoord0, boundaryCoord1));
}
- Integer<SIMD_T> ComputeOffsets(uint32_t attrib, Integer<SIMD_T> const &vIndices, uint32_t component)
+ Integer<SIMD_T>
+ ComputeOffsets(uint32_t attrib, Integer<SIMD_T> const& vIndices, uint32_t component)
{
const uint32_t simdVertexStride = sizeof(SIMDVERTEX_T<SIMD_T>);
const uint32_t componentStride = sizeof(Float<SIMD_T>);
const uint32_t attribStride = sizeof(Vec4<SIMD_T>);
- static const OSALIGNSIMD16(uint32_t) elemOffset[16] =
- {
+ static const OSALIGNSIMD16(uint32_t) elemOffset[16] = {
0 * sizeof(float),
1 * sizeof(float),
2 * sizeof(float),
15 * sizeof(float),
};
- static_assert(sizeof(Integer<SIMD_T>) <= sizeof(elemOffset), "Clipper::ComputeOffsets, Increase number of element offsets.");
+ static_assert(sizeof(Integer<SIMD_T>) <= sizeof(elemOffset),
+ "Clipper::ComputeOffsets, Increase number of element offsets.");
- Integer<SIMD_T> vElemOffset = SIMD_T::loadu_si(reinterpret_cast<const Integer<SIMD_T> *>(elemOffset));
+ Integer<SIMD_T> vElemOffset =
+ SIMD_T::loadu_si(reinterpret_cast<const Integer<SIMD_T>*>(elemOffset));
// step to the simdvertex
- Integer<SIMD_T> vOffsets = SIMD_T::mullo_epi32(vIndices, SIMD_T::set1_epi32(simdVertexStride));
+ Integer<SIMD_T> vOffsets =
+ SIMD_T::mullo_epi32(vIndices, SIMD_T::set1_epi32(simdVertexStride));
// step to the attribute and component
- vOffsets = SIMD_T::add_epi32(vOffsets, SIMD_T::set1_epi32(attribStride * attrib + componentStride * component));
+ vOffsets = SIMD_T::add_epi32(
+ vOffsets, SIMD_T::set1_epi32(attribStride * attrib + componentStride * component));
// step to the lane
vOffsets = SIMD_T::add_epi32(vOffsets, vElemOffset);
return vOffsets;
}
- Float<SIMD_T> GatherComponent(const float* pBuffer, uint32_t attrib, Float<SIMD_T> const &vMask, Integer<SIMD_T> const &vIndices, uint32_t component)
+ Float<SIMD_T> GatherComponent(const float* pBuffer,
+ uint32_t attrib,
+ Float<SIMD_T> const& vMask,
+ Integer<SIMD_T> const& vIndices,
+ uint32_t component)
{
Integer<SIMD_T> vOffsets = ComputeOffsets(attrib, vIndices, component);
- Float<SIMD_T> vSrc = SIMD_T::setzero_ps();
+ Float<SIMD_T> vSrc = SIMD_T::setzero_ps();
- return SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(vSrc, pBuffer, vOffsets, vMask);
+ return SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ vSrc, pBuffer, vOffsets, vMask);
}
- void ScatterComponent(const float* pBuffer, uint32_t attrib, Float<SIMD_T> const &vMask, Integer<SIMD_T> const &vIndices, uint32_t component, Float<SIMD_T> const &vSrc)
+ void ScatterComponent(const float* pBuffer,
+ uint32_t attrib,
+ Float<SIMD_T> const& vMask,
+ Integer<SIMD_T> const& vIndices,
+ uint32_t component,
+ Float<SIMD_T> const& vSrc)
{
Integer<SIMD_T> vOffsets = ComputeOffsets(attrib, vIndices, component);
- const uint32_t *pOffsets = reinterpret_cast<const uint32_t *>(&vOffsets);
- const float *pSrc = reinterpret_cast<const float *>(&vSrc);
- uint32_t mask = SIMD_T::movemask_ps(vMask);
- DWORD lane;
+ const uint32_t* pOffsets = reinterpret_cast<const uint32_t*>(&vOffsets);
+ const float* pSrc = reinterpret_cast<const float*>(&vSrc);
+ uint32_t mask = SIMD_T::movemask_ps(vMask);
+ DWORD lane;
while (_BitScanForward(&lane, mask))
{
mask &= ~(1 << lane);
- const uint8_t *pBuf = reinterpret_cast<const uint8_t *>(pBuffer) + pOffsets[lane];
- *(float *)pBuf = pSrc[lane];
+ const uint8_t* pBuf = reinterpret_cast<const uint8_t*>(pBuffer) + pOffsets[lane];
+ *(float*)pBuf = pSrc[lane];
}
}
- template<SWR_CLIPCODES ClippingPlane>
- void intersect(
- const Float<SIMD_T> &vActiveMask, // active lanes to operate on
- const Integer<SIMD_T> &s, // index to first edge vertex v0 in pInPts.
- const Integer<SIMD_T> &p, // index to second edge vertex v1 in pInPts.
- const Vec4<SIMD_T> &v1, // vertex 0 position
- const Vec4<SIMD_T> &v2, // vertex 1 position
- Integer<SIMD_T> &outIndex, // output index.
- const float *pInVerts, // array of all the input positions.
- uint32_t numInAttribs, // number of attributes per vertex.
- float *pOutVerts) // array of output positions. We'll write our new intersection point at i*4.
+ template <SWR_CLIPCODES ClippingPlane>
+ void intersect(const Float<SIMD_T>& vActiveMask, // active lanes to operate on
+ const Integer<SIMD_T>& s, // index to first edge vertex v0 in pInPts.
+ const Integer<SIMD_T>& p, // index to second edge vertex v1 in pInPts.
+ const Vec4<SIMD_T>& v1, // vertex 0 position
+ const Vec4<SIMD_T>& v2, // vertex 1 position
+ Integer<SIMD_T>& outIndex, // output index.
+ const float* pInVerts, // array of all the input positions.
+ uint32_t numInAttribs, // number of attributes per vertex.
+ float* pOutVerts) // array of output positions. We'll write our new intersection
+ // point at i*4.
{
- uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+ uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
uint32_t vertexClipCullOffset = this->state.backendState.vertexClipCullOffset;
// compute interpolation factor
Float<SIMD_T> t;
switch (ClippingPlane)
{
- case FRUSTUM_LEFT: t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[0]), SIMD_T::add_ps(v2[3], v2[0])); break;
- case FRUSTUM_RIGHT: t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[0]), SIMD_T::sub_ps(v2[3], v2[0])); break;
- case FRUSTUM_TOP: t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[1]), SIMD_T::add_ps(v2[3], v2[1])); break;
- case FRUSTUM_BOTTOM: t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[1]), SIMD_T::sub_ps(v2[3], v2[1])); break;
+ case FRUSTUM_LEFT:
+ t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[0]), SIMD_T::add_ps(v2[3], v2[0]));
+ break;
+ case FRUSTUM_RIGHT:
+ t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[0]), SIMD_T::sub_ps(v2[3], v2[0]));
+ break;
+ case FRUSTUM_TOP:
+ t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[1]), SIMD_T::add_ps(v2[3], v2[1]));
+ break;
+ case FRUSTUM_BOTTOM:
+ t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[1]), SIMD_T::sub_ps(v2[3], v2[1]));
+ break;
case FRUSTUM_NEAR:
// DX Znear plane is 0, GL is -w
if (this->state.rastState.clipHalfZ)
t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[2]), SIMD_T::add_ps(v2[3], v2[2]));
}
break;
- case FRUSTUM_FAR: t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[2]), SIMD_T::sub_ps(v2[3], v2[2])); break;
- default: SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
+ case FRUSTUM_FAR:
+ t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[2]), SIMD_T::sub_ps(v2[3], v2[2]));
+ break;
+ default:
+ SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
};
// interpolate position and store
{
Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
- Float<SIMD_T> vOutAttrib = SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+ Float<SIMD_T> vOutAttrib =
+ SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
}
}
{
Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
- Float<SIMD_T> vOutAttrib = SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+ Float<SIMD_T> vOutAttrib =
+ SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
}
}
{
Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
- Float<SIMD_T> vOutAttrib = SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+ Float<SIMD_T> vOutAttrib =
+ SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
}
}
}
- template<SWR_CLIPCODES ClippingPlane>
- Float<SIMD_T> inside(const Vec4<SIMD_T> &v)
+ template <SWR_CLIPCODES ClippingPlane>
+ Float<SIMD_T> inside(const Vec4<SIMD_T>& v)
{
switch (ClippingPlane)
{
- case FRUSTUM_LEFT: return SIMD_T::cmpge_ps(v[0], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
- case FRUSTUM_RIGHT: return SIMD_T::cmple_ps(v[0], v[3]);
- case FRUSTUM_TOP: return SIMD_T::cmpge_ps(v[1], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
- case FRUSTUM_BOTTOM: return SIMD_T::cmple_ps(v[1], v[3]);
- case FRUSTUM_NEAR: return SIMD_T::cmpge_ps(v[2], this->state.rastState.clipHalfZ ? SIMD_T::setzero_ps() : SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
- case FRUSTUM_FAR: return SIMD_T::cmple_ps(v[2], v[3]);
+ case FRUSTUM_LEFT:
+ return SIMD_T::cmpge_ps(v[0], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
+ case FRUSTUM_RIGHT:
+ return SIMD_T::cmple_ps(v[0], v[3]);
+ case FRUSTUM_TOP:
+ return SIMD_T::cmpge_ps(v[1], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
+ case FRUSTUM_BOTTOM:
+ return SIMD_T::cmple_ps(v[1], v[3]);
+ case FRUSTUM_NEAR:
+ return SIMD_T::cmpge_ps(v[2],
+ this->state.rastState.clipHalfZ
+ ? SIMD_T::setzero_ps()
+ : SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
+ case FRUSTUM_FAR:
+ return SIMD_T::cmple_ps(v[2], v[3]);
default:
SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
return SIMD_T::setzero_ps();
}
}
- template<SWR_CLIPCODES ClippingPlane>
- Integer<SIMD_T> ClipTriToPlane(const float *pInVerts, const Integer<SIMD_T> &vNumInPts, uint32_t numInAttribs, float *pOutVerts)
+ template <SWR_CLIPCODES ClippingPlane>
+ Integer<SIMD_T> ClipTriToPlane(const float* pInVerts,
+ const Integer<SIMD_T>& vNumInPts,
+ uint32_t numInAttribs,
+ float* pOutVerts)
{
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
- Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
- Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
- Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
+ Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
+ Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
+ Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
while (!SIMD_T::testz_ps(vActiveMask, vActiveMask)) // loop until activeMask is empty
{
- Integer<SIMD_T> s = vCurIndex;
- Integer<SIMD_T> p = SIMD_T::add_epi32(s, SIMD_T::set1_epi32(1));
+ Integer<SIMD_T> s = vCurIndex;
+ Integer<SIMD_T> p = SIMD_T::add_epi32(s, SIMD_T::set1_epi32(1));
Integer<SIMD_T> underFlowMask = SIMD_T::cmpgt_epi32(vNumInPts, p);
- p = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::setzero_ps(), SIMD_T::castsi_ps(p), SIMD_T::castsi_ps(underFlowMask)));
+ p = SIMD_T::castps_si(SIMD_T::blendv_ps(
+ SIMD_T::setzero_ps(), SIMD_T::castsi_ps(p), SIMD_T::castsi_ps(underFlowMask)));
// gather position
Vec4<SIMD_T> vInPos0, vInPos1;
// compute intersection mask (s_in != p_in)
Float<SIMD_T> intersectMask = SIMD_T::xor_ps(s_in, p_in);
- intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
+ intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
// store s if inside
s_in = SIMD_T::and_ps(s_in, vActiveMask);
// store position
for (uint32_t c = 0; c < 4; ++c)
{
- ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
+ ScatterComponent(
+ pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
}
// store attribs
}
// increment outIndex
- vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
+ vOutIndex = SIMD_T::blendv_epi32(
+ vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
}
// compute and store intersection
if (!SIMD_T::testz_ps(intersectMask, intersectMask))
{
- intersect<ClippingPlane>(intersectMask, s, p, vInPos0, vInPos1, vOutIndex, pInVerts, numInAttribs, pOutVerts);
+ intersect<ClippingPlane>(intersectMask,
+ s,
+ p,
+ vInPos0,
+ vInPos1,
+ vOutIndex,
+ pInVerts,
+ numInAttribs,
+ pOutVerts);
// increment outIndex for active lanes
- vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
+ vOutIndex = SIMD_T::blendv_epi32(
+ vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
}
// increment loop index and update active mask
- vCurIndex = SIMD_T::add_epi32(vCurIndex, SIMD_T::set1_epi32(1));
+ vCurIndex = SIMD_T::add_epi32(vCurIndex, SIMD_T::set1_epi32(1));
vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
}
return vOutIndex;
}
- template<SWR_CLIPCODES ClippingPlane>
- Integer<SIMD_T> ClipLineToPlane(const float *pInVerts, const Integer<SIMD_T> &vNumInPts, uint32_t numInAttribs, float *pOutVerts)
+ template <SWR_CLIPCODES ClippingPlane>
+ Integer<SIMD_T> ClipLineToPlane(const float* pInVerts,
+ const Integer<SIMD_T>& vNumInPts,
+ uint32_t numInAttribs,
+ float* pOutVerts)
{
uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
- Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
- Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
- Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
+ Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
+ Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
+ Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
if (!SIMD_T::testz_ps(vActiveMask, vActiveMask))
{
// compute intersection mask (s_in != p_in)
Float<SIMD_T> intersectMask = SIMD_T::xor_ps(s_in, p_in);
- intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
+ intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
// store s if inside
s_in = SIMD_T::and_ps(s_in, vActiveMask);
{
for (uint32_t c = 0; c < 4; ++c)
{
- ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
+ ScatterComponent(
+ pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
}
// interpolate attributes and store
}
// increment outIndex
- vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
+ vOutIndex = SIMD_T::blendv_epi32(
+ vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
}
// compute and store intersection
if (!SIMD_T::testz_ps(intersectMask, intersectMask))
{
- intersect<ClippingPlane>(intersectMask, s, p, vInPos0, vInPos1, vOutIndex, pInVerts, numInAttribs, pOutVerts);
+ intersect<ClippingPlane>(intersectMask,
+ s,
+ p,
+ vInPos0,
+ vInPos1,
+ vOutIndex,
+ pInVerts,
+ numInAttribs,
+ pOutVerts);
// increment outIndex for active lanes
- vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
+ vOutIndex = SIMD_T::blendv_epi32(
+ vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
}
// store p if inside
{
for (uint32_t c = 0; c < 4; ++c)
{
- ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, p_in, vOutIndex, c, vInPos1[c]);
+ ScatterComponent(
+ pOutVerts, VERTEX_POSITION_SLOT, p_in, vOutIndex, c, vInPos1[c]);
}
// interpolate attributes and store
}
// increment outIndex
- vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), p_in);
+ vOutIndex = SIMD_T::blendv_epi32(
+ vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), p_in);
}
}
return vOutIndex;
}
- Integer<SIMD_T> ClipPrims(float *pVertices, const Float<SIMD_T> &vPrimMask, const Float<SIMD_T> &vClipMask, int numAttribs)
+ Integer<SIMD_T> ClipPrims(float* pVertices,
+ const Float<SIMD_T>& vPrimMask,
+ const Float<SIMD_T>& vClipMask,
+ int numAttribs)
{
// temp storage
- float *pTempVerts = reinterpret_cast<float *>(ClipHelper<SIMD_T>::GetTempVertices());
+ float* pTempVerts = reinterpret_cast<float*>(ClipHelper<SIMD_T>::GetTempVertices());
// zero out num input verts for non-active lanes
Integer<SIMD_T> vNumInPts = SIMD_T::set1_epi32(NumVertsPerPrim);
{
vNumOutPts = ClipTriToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
vNumOutPts = ClipTriToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts = ClipTriToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
- vNumOutPts = ClipTriToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts = ClipTriToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+ vNumOutPts =
+ ClipTriToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+ vNumOutPts =
+ ClipTriToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+ vNumOutPts =
+ ClipTriToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
vNumOutPts = ClipTriToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
}
else
{
SWR_ASSERT(NumVertsPerPrim == 2);
- vNumOutPts = ClipLineToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
- vNumOutPts = ClipLineToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts = ClipLineToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
- vNumOutPts = ClipLineToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
- vNumOutPts = ClipLineToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
- vNumOutPts = ClipLineToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+ vNumOutPts =
+ ClipLineToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
+ vNumOutPts =
+ ClipLineToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+ vNumOutPts =
+ ClipLineToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+ vNumOutPts =
+ ClipLineToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+ vNumOutPts =
+ ClipLineToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+ vNumOutPts =
+ ClipLineToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
}
// restore num verts for non-clipped, active lanes
Float<SIMD_T> vNonClippedMask = SIMD_T::andnot_ps(vClipMask, vPrimMask);
- vNumOutPts = SIMD_T::blendv_epi32(vNumOutPts, SIMD_T::set1_epi32(NumVertsPerPrim), vNonClippedMask);
+ vNumOutPts =
+ SIMD_T::blendv_epi32(vNumOutPts, SIMD_T::set1_epi32(NumVertsPerPrim), vNonClippedMask);
return vNumOutPts;
}
- const uint32_t workerId{ 0 };
- DRAW_CONTEXT *pDC{ nullptr };
- const API_STATE &state;
- Float<SIMD_T> clipCodes[NumVertsPerPrim];
+ const uint32_t workerId{0};
+ DRAW_CONTEXT* pDC{nullptr};
+ const API_STATE& state;
+ Float<SIMD_T> clipCodes[NumVertsPerPrim];
};
-
// pipeline stage functions
-void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void ClipRectangles(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx);
+void ClipTriangles(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx);
+void ClipLines(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx);
+void ClipPoints(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primId,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx);
#if USE_SIMD16_FRONTEND
-void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx);
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx);
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx);
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primId,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx);
#endif
-
/****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file conservativerast.h
-*
-******************************************************************************/
+ * Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file conservativerast.h
+ *
+ ******************************************************************************/
#pragma once
#include <type_traits>
#include "common/simdintrin.h"
//////////////////////////////////////////////////////////////////////////
/// @brief convenience typedefs for supported Fixed Point precisions
typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit;
-typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
-typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
-typedef std::integral_constant<uint32_t, _X_16> Fixed_X_16;
+typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
+typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
+typedef std::integral_constant<uint32_t, _X_16> Fixed_X_16;
//////////////////////////////////////////////////////////////////////////
/// @struct FixedPointTraits
-/// @brief holds constants relating to converting between FP and Fixed point
+/// @brief holds constants relating to converting between FP and Fixed point
/// @tparam FT: fixed precision type
-template<typename FT>
-struct FixedPointTraits{};
+template <typename FT>
+struct FixedPointTraits
+{
+};
//////////////////////////////////////////////////////////////////////////
/// @brief Fixed_16_8 specialization of FixedPointTraits
-template<>
+template <>
struct FixedPointTraits<Fixed_16_8>
{
/// multiplier to go from FP32 to Fixed Point 16.8
typedef std::integral_constant<uint32_t, 256> ScaleT;
/// number of bits to shift to go from 16.8 fixed => int32
typedef std::integral_constant<uint32_t, 8> BitsT;
- typedef Fixed_16_8 TypeT;
+ typedef Fixed_16_8 TypeT;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Fixed_16_9 specialization of FixedPointTraits
-template<>
+template <>
struct FixedPointTraits<Fixed_16_9>
{
/// multiplier to go from FP32 to Fixed Point 16.9
typedef std::integral_constant<uint32_t, 512> ScaleT;
/// number of bits to shift to go from 16.9 fixed => int32
typedef std::integral_constant<uint32_t, 9> BitsT;
- typedef Fixed_16_9 TypeT;
+ typedef Fixed_16_9 TypeT;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Fixed_16_9 specialization of FixedPointTraits
-template<>
+template <>
struct FixedPointTraits<Fixed_X_16>
{
/// multiplier to go from FP32 to Fixed Point X.16
typedef std::integral_constant<uint32_t, 65536> ScaleT;
/// number of bits to shift to go from X.16 fixed => int32
typedef std::integral_constant<uint32_t, 16> BitsT;
- typedef Fixed_X_16 TypeT;
+ typedef Fixed_X_16 TypeT;
};
//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for conservative rasterization modes
+/// @brief convenience typedefs for conservative rasterization modes
typedef std::false_type StandardRastT;
-typedef std::true_type ConservativeRastT;
+typedef std::true_type ConservativeRastT;
//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for Input Coverage rasterization modes
-typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT;
+/// @brief convenience typedefs for Input Coverage rasterization modes
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT;
typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NORMAL> OuterConservativeCoverageT;
-typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE> InnerConservativeCoverageT;
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
+ InnerConservativeCoverageT;
//////////////////////////////////////////////////////////////////////////
/// @struct ConservativeRastTraits
/// @brief primary ConservativeRastTraits template. Shouldn't be instantiated
/// @tparam ConservativeT: type of conservative rasterization
template <typename ConservativeT>
-struct ConservativeRastFETraits {};
+struct ConservativeRastFETraits
+{
+};
//////////////////////////////////////////////////////////////////////////
/// @brief StandardRast specialization of ConservativeRastTraits
template <>
struct ConservativeRastFETraits<StandardRastT>
{
- typedef std::false_type IsConservativeT;
+ typedef std::false_type IsConservativeT;
typedef std::integral_constant<uint32_t, 0> BoundingBoxOffsetT;
};
template <>
struct ConservativeRastFETraits<ConservativeRastT>
{
- typedef std::true_type IsConservativeT;
+ typedef std::true_type IsConservativeT;
typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
};
//////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for ConservativeRastFETraits
-typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
+/// @brief convenience typedefs for ConservativeRastFETraits
+typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
//////////////////////////////////////////////////////////////////////////
/// @tparam ConservativeT: type of conservative rasterization
/// @tparam InputCoverageT: type of input coverage requested, if any
template <typename ConservativeT, typename _InputCoverageT>
-struct ConservativeRastBETraits {
- typedef std::false_type IsConservativeT;
- typedef _InputCoverageT InputCoverageT;
- typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
+struct ConservativeRastBETraits
+{
+ typedef std::false_type IsConservativeT;
+ typedef _InputCoverageT InputCoverageT;
+ typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
};
template <typename _InputCoverageT>
struct ConservativeRastBETraits<StandardRastT, _InputCoverageT>
{
- typedef std::false_type IsConservativeT;
- typedef _InputCoverageT InputCoverageT;
- typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
+ typedef std::false_type IsConservativeT;
+ typedef _InputCoverageT InputCoverageT;
+ typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
};
template <>
struct ConservativeRastBETraits<ConservativeRastT, NoInputCoverageT>
{
- typedef std::true_type IsConservativeT;
+ typedef std::true_type IsConservativeT;
typedef NoInputCoverageT InputCoverageT;
typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
/// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
- /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
- /// of having to compare individual edges to pixel corners to check if any part of the triangle
- /// intersects a pixel
- typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
+ /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
+ /// of of having to compare individual edges to pixel corners to check if any part of the
+ /// triangle intersects a pixel
+ typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
+ ConservativeEdgeOffsetT;
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
};
template <>
struct ConservativeRastBETraits<ConservativeRastT, OuterConservativeCoverageT>
{
- typedef std::true_type IsConservativeT;
+ typedef std::true_type IsConservativeT;
typedef OuterConservativeCoverageT InputCoverageT;
typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
/// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
- /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
- /// of having to compare individual edges to pixel corners to check if any part of the triangle
- /// intersects a pixel
- typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
+ /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
+ /// of of having to compare individual edges to pixel corners to check if any part of the
+ /// triangle intersects a pixel
+ typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
+ ConservativeEdgeOffsetT;
typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
-
};
//////////////////////////////////////////////////////////////////////////
template <>
struct ConservativeRastBETraits<ConservativeRastT, InnerConservativeCoverageT>
{
- typedef std::true_type IsConservativeT;
+ typedef std::true_type IsConservativeT;
typedef InnerConservativeCoverageT InputCoverageT;
typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
/// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
- /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
- /// of having to compare individual edges to pixel corners to check if any part of the triangle
- /// intersects a pixel
- typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
-
- /// undo the outer conservative offset and offset edge towards from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
- /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of
- /// of having to compare individual edges to pixel corners to check if a pixel is fully covered by a triangle
- typedef std::integral_constant<int32_t, static_cast<int32_t>(-((ConservativePrecisionT::ScaleT::value/2) + 1) - ConservativeEdgeOffsetT::value)> InnerConservativeEdgeOffsetT;
+ /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
+ /// of of having to compare individual edges to pixel corners to check if any part of the
+ /// triangle intersects a pixel
+ typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
+ ConservativeEdgeOffsetT;
+
+ /// undo the outer conservative offset and offset edge towards from pixel center by 1/2 pixel +
+ /// 1/512, in Fixed 16.9 precision this allows the rasterizer to do the 3 edge coverage tests
+ /// against a single point, instead of of having to compare individual edges to pixel corners to
+ /// check if a pixel is fully covered by a triangle
+ typedef std::integral_constant<int32_t,
+ static_cast<int32_t>(
+ -((ConservativePrecisionT::ScaleT::value / 2) + 1) -
+ ConservativeEdgeOffsetT::value)>
+ InnerConservativeEdgeOffsetT;
};
\ No newline at end of file
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file context.h
-*
-* @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
-* The SWR_CONTEXT is our global context and contains the DC ring,
-* thread state, etc.
-*
-* The DRAW_CONTEXT contains all state associated with a draw operation.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file context.h
+ *
+ * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
+ * The SWR_CONTEXT is our global context and contains the DC ring,
+ * thread state, etc.
+ *
+ * The DRAW_CONTEXT contains all state associated with a draw operation.
+ *
+ ******************************************************************************/
#pragma once
#include <condition_variable>
{
uint32_t frontFacing : 1;
uint32_t yMajor : 1;
- uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
+ uint32_t coverageMask : (SIMD_TILE_X_DIM* SIMD_TILE_Y_DIM);
uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
- float pointSize;
+ float pointSize;
uint32_t renderTargetArrayIndex;
uint32_t viewportIndex;
};
float OneOverW[3];
float recipDet;
- float *pRecipW;
- float *pAttribs;
- float *pPerspAttribs;
- float *pSamplePos;
- float *pUserClipBuffer;
+ float* pRecipW;
+ float* pAttribs;
+ float* pPerspAttribs;
+ float* pSamplePos;
+ float* pUserClipBuffer;
uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
- uint64_t innerCoverageMask; // Conservative rasterization inner coverage: marked covered if entire pixel is covered
+ uint64_t innerCoverageMask; // Conservative rasterization inner coverage: marked covered if
+ // entire pixel is covered
uint64_t anyCoveredSamples;
TRI_FLAGS triFlags;
struct TRIANGLE_WORK_DESC
{
- float *pTriBuffer;
- float *pAttribs;
- float *pUserClipBuffer;
- uint32_t numAttribs;
+ float* pTriBuffer;
+ float* pAttribs;
+ float* pUserClipBuffer;
+ uint32_t numAttribs;
TRI_FLAGS triFlags;
};
SWR_RECT rect;
uint32_t attachmentMask;
uint32_t renderTargetArrayIndex;
- float clearRTColor[4]; // RGBA_32F
- float clearDepth; // [0..1]
- uint8_t clearStencil;
+ float clearRTColor[4]; // RGBA_32F
+ float clearDepth; // [0..1]
+ uint8_t clearStencil;
};
struct DISCARD_INVALIDATE_TILES_DESC
{
- uint32_t attachmentMask;
- SWR_RECT rect;
+ uint32_t attachmentMask;
+ SWR_RECT rect;
SWR_TILE_STATE newTileState;
- bool createNewTiles;
- bool fullTilesOnly;
+ bool createNewTiles;
+ bool fullTilesOnly;
};
struct SYNC_DESC
{
PFN_CALLBACK_FUNC pfnCallbackFunc;
- uint64_t userData;
- uint64_t userData2;
- uint64_t userData3;
+ uint64_t userData;
+ uint64_t userData2;
+ uint64_t userData3;
};
struct STORE_TILES_DESC
{
- uint32_t attachmentMask;
+ uint32_t attachmentMask;
SWR_TILE_STATE postStoreTileState;
- SWR_RECT rect;
+ SWR_RECT rect;
};
struct COMPUTE_DESC
uint32_t threadGroupCountZ;
};
-typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
+typedef void (*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroTile,
+ void* pDesc);
enum WORK_TYPE
{
OSALIGNSIMD(struct) BE_WORK
{
- WORK_TYPE type;
+ WORK_TYPE type;
PFN_WORK_FUNC pfnWork;
union
{
- SYNC_DESC sync;
- TRIANGLE_WORK_DESC tri;
- CLEAR_DESC clear;
+ SYNC_DESC sync;
+ TRIANGLE_WORK_DESC tri;
+ CLEAR_DESC clear;
DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
- STORE_TILES_DESC storeTiles;
+ STORE_TILES_DESC storeTiles;
} desc;
};
struct DRAW_WORK
{
- DRAW_CONTEXT* pDC;
+ DRAW_CONTEXT* pDC;
union
{
- uint32_t numIndices; // DrawIndexed: Number of indices for draw.
- uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
+ uint32_t numIndices; // DrawIndexed: Number of indices for draw.
+ uint32_t numVerts; // Draw: Number of verts (triangles, lines, etc)
};
union
{
- gfxptr_t xpIB; // DrawIndexed: App supplied int32 indices
- uint32_t startVertex; // Draw: Starting vertex in VB to render from.
+ gfxptr_t xpIB; // DrawIndexed: App supplied int32 indices
+ uint32_t startVertex; // Draw: Starting vertex in VB to render from.
};
- int32_t baseVertex;
- uint32_t numInstances; // Number of instances
- uint32_t startInstance; // Instance offset
- uint32_t startPrimID; // starting primitiveID for this draw batch
- uint32_t startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
- SWR_FORMAT type; // index buffer type
+ int32_t baseVertex;
+ uint32_t numInstances; // Number of instances
+ uint32_t startInstance; // Instance offset
+ uint32_t startPrimID; // starting primitiveID for this draw batch
+ uint32_t
+ startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
+ SWR_FORMAT type; // index buffer type
};
-typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
+typedef void (*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ void* pDesc);
struct FE_WORK
{
- WORK_TYPE type;
+ WORK_TYPE type;
PFN_FE_WORK_FUNC pfnWork;
union
{
- SYNC_DESC sync;
- DRAW_WORK draw;
- CLEAR_DESC clear;
+ SYNC_DESC sync;
+ DRAW_WORK draw;
+ CLEAR_DESC clear;
DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
- STORE_TILES_DESC storeTiles;
+ STORE_TILES_DESC storeTiles;
} desc;
};
struct PA_STATE;
// function signature for pipeline stages that execute after primitive assembly
-typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[],
- uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+typedef void (*PFN_PROCESS_PRIMS)(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[],
+ uint32_t primMask,
+ simdscalari const& primID,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx);
#if ENABLE_AVX512_SIMD16
// function signature for pipeline stages that execute after primitive assembly
-typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
- uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+typedef void(SIMDCALL* PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[],
+ uint32_t primMask,
+ simd16scalari const& primID,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx);
#endif
OSALIGNLINE(struct) API_STATE
SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
// GS - Geometry Shader State
- SWR_GS_STATE gsState;
- PFN_GS_FUNC pfnGsFunc;
+ SWR_GS_STATE gsState;
+ PFN_GS_FUNC pfnGsFunc;
// FS - Fetch Shader State
- PFN_FETCH_FUNC pfnFetchFunc;
+ PFN_FETCH_FUNC pfnFetchFunc;
// VS - Vertex Shader State
- PFN_VERTEX_FUNC pfnVertexFunc;
+ PFN_VERTEX_FUNC pfnVertexFunc;
// Index Buffer
- SWR_INDEX_BUFFER_STATE indexBuffer;
+ SWR_INDEX_BUFFER_STATE indexBuffer;
// CS - Compute Shader
- PFN_CS_FUNC pfnCsFunc;
- uint32_t totalThreadsInGroup;
- uint32_t totalSpillFillSize;
- uint32_t scratchSpaceSize;
- uint32_t scratchSpaceNumInstances;
+ PFN_CS_FUNC pfnCsFunc;
+ uint32_t totalThreadsInGroup;
+ uint32_t totalSpillFillSize;
+ uint32_t scratchSpaceSize;
+ uint32_t scratchSpaceNumInstances;
// FE - Frontend State
- SWR_FRONTEND_STATE frontendState;
+ SWR_FRONTEND_STATE frontendState;
// SOS - Streamout Shader State
- PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
+ PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
// Streamout state
- SWR_STREAMOUT_STATE soState;
+ SWR_STREAMOUT_STATE soState;
mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
// Tessellation State
- PFN_HS_FUNC pfnHsFunc;
- PFN_DS_FUNC pfnDsFunc;
- SWR_TS_STATE tsState;
+ PFN_HS_FUNC pfnHsFunc;
+ PFN_DS_FUNC pfnDsFunc;
+ SWR_TS_STATE tsState;
// Number of attributes used by the frontend (vs, so, gs)
- uint32_t feNumAttributes;
-
+ uint32_t feNumAttributes;
// RS - Rasterizer State
- SWR_RASTSTATE rastState;
+ SWR_RASTSTATE rastState;
// floating point multisample offsets
float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
- GUARDBANDS gbState;
+ GUARDBANDS gbState;
- SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
- SWR_VIEWPORT_MATRICES vpMatrices;
+ SWR_VIEWPORT vp[KNOB_NUM_VIEWPORTS_SCISSORS];
+ SWR_VIEWPORT_MATRICES vpMatrices;
- SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
- SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
- bool scissorsTileAligned;
+ SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
+ SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
+ bool scissorsTileAligned;
- bool forceFront;
- PRIMITIVE_TOPOLOGY topology;
+ bool forceFront;
+ PRIMITIVE_TOPOLOGY topology;
// Backend state
OSALIGNLINE(SWR_BACKEND_STATE) backendState;
- SWR_DEPTH_BOUNDS_STATE depthBoundsState;
+ SWR_DEPTH_BOUNDS_STATE depthBoundsState;
// PS - Pixel shader state
- SWR_PS_STATE psState;
+ SWR_PS_STATE psState;
SWR_DEPTH_STENCIL_STATE depthStencilState;
// OM - Output Merger State
- SWR_BLEND_STATE blendState;
- PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
+ SWR_BLEND_STATE blendState;
+ PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
struct
{
- uint32_t enableStatsFE : 1; // Enable frontend pipeline stats
- uint32_t enableStatsBE : 1; // Enable backend pipeline stats
- uint32_t colorHottileEnable : 8; // Bitmask of enabled color hottiles
- uint32_t depthHottileEnable: 1; // Enable depth buffer hottile
- uint32_t stencilHottileEnable : 1; // Enable stencil buffer hottile
+ uint32_t enableStatsFE : 1; // Enable frontend pipeline stats
+ uint32_t enableStatsBE : 1; // Enable backend pipeline stats
+ uint32_t colorHottileEnable : 8; // Bitmask of enabled color hottiles
+ uint32_t depthHottileEnable : 1; // Enable depth buffer hottile
+ uint32_t stencilHottileEnable : 1; // Enable stencil buffer hottile
};
- PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
+ PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
};
class MacroTileMgr;
};
// pipeline function pointer types
-typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
-typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
- const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar const &);
-typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
-typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
-typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
- simdscalar const &, simdscalar const &);
+typedef void (*PFN_BACKEND_FUNC)(
+ DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
+typedef void (*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT&,
+ uint8_t* (&)[SWR_NUM_RENDERTARGETS],
+ uint32_t,
+ const SWR_BLEND_STATE*,
+ const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS],
+ simdscalar&,
+ simdscalar const&);
+typedef void (*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
+typedef void (*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
+typedef void (*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&,
+ SWR_PS_CONTEXT&,
+ const uint64_t* const,
+ const uint32_t,
+ simdscalar const&,
+ simdscalar const&);
struct BACKEND_FUNCS
{
{
API_STATE state;
- void* pPrivateState; // Its required the driver sets this up for each draw.
+ void* pPrivateState; // Its required the driver sets this up for each draw.
// pipeline function pointers, filled in by API thread when setting up the draw
- BACKEND_FUNCS backendFuncs;
+ BACKEND_FUNCS backendFuncs;
PFN_PROCESS_PRIMS pfnProcessPrims;
#if USE_SIMD16_FRONTEND
PFN_PROCESS_PRIMS_SIMD16 pfnProcessPrims_simd16;
#endif
- CachingArena* pArena; // This should only be used by API thread.
+ CachingArena* pArena; // This should only be used by API thread.
};
struct DRAW_DYNAMIC_STATE
uint32_t SoWriteOffset[4];
bool SoWriteOffsetDirty[4];
- SWR_STATS_FE statsFE; // Only one FE thread per DC.
+ SWR_STATS_FE statsFE; // Only one FE thread per DC.
SWR_STATS* pStats;
};
// This draw context maintains all of the state needed for the draw operation.
struct DRAW_CONTEXT
{
- SWR_CONTEXT* pContext;
+ SWR_CONTEXT* pContext;
union
{
- MacroTileMgr* pTileMgr;
- DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
+ MacroTileMgr* pTileMgr;
+ DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
};
- DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread.
- CachingArena* pArena;
+ DRAW_STATE* pState; // Read-only state. Core should not update this outside of API thread.
+ CachingArena* pArena;
- uint32_t drawId;
- bool dependentFE; // Frontend work is dependent on all previous FE
- bool dependent; // Backend work is dependent on all previous BE
- bool isCompute; // Is this DC a compute context?
- bool cleanupState; // True if this is the last draw using an entry in the state ring.
+ uint32_t drawId;
+ bool dependentFE; // Frontend work is dependent on all previous FE
+ bool dependent; // Backend work is dependent on all previous BE
+ bool isCompute; // Is this DC a compute context?
+ bool cleanupState; // True if this is the last draw using an entry in the state ring.
- FE_WORK FeWork;
+ FE_WORK FeWork;
- SYNC_DESC retireCallback; // Call this func when this DC is retired.
+ SYNC_DESC retireCallback; // Call this func when this DC is retired.
DRAW_DYNAMIC_STATE dynState;
- volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
- volatile OSALIGNLINE(uint32_t) FeLock;
- volatile OSALIGNLINE(uint32_t) threadsDone;
+ volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
+ volatile OSALIGNLINE(uint32_t) FeLock;
+ volatile OSALIGNLINE(uint32_t) threadsDone;
};
static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
struct SWR_CONTEXT
{
// Draw Context Ring
- // Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
- // We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
- // of draws that can be in flight at any given time.
+ // Each draw needs its own state in order to support mulitple draws in flight across multiple
+ // threads. We maintain N draw contexts configured as a ring. The size of the ring limits the
+ // maximum number of draws that can be in flight at any given time.
//
// Description:
// 1. State - When an application first sets state we'll request a new draw context to use.
- // a. If there are no available draw contexts then we'll have to wait until one becomes free.
- // b. If one is available then set pCurDrawContext to point to it and mark it in use.
+ // a. If there are no available draw contexts then we'll have to wait until one becomes
+ // free. b. If one is available then set pCurDrawContext to point to it and mark it in use.
// c. All state calls set state on pCurDrawContext.
// 2. Draw - Creates submits a work item that is associated with current draw context.
// a. Set pPrevDrawContext = pCurDrawContext
// b. State is copied from prev draw context to current.
RingBuffer<DRAW_CONTEXT> dcRing;
- DRAW_CONTEXT *pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
- DRAW_CONTEXT *pPrevDrawContext; // This points to DC entry for the previous context submitted that we can copy state from.
+ DRAW_CONTEXT* pCurDrawContext; // This points to DC entry in ring for an unsubmitted draw.
+ DRAW_CONTEXT* pPrevDrawContext; // This points to DC entry for the previous context submitted
+ // that we can copy state from.
- MacroTileMgr* pMacroTileManagerArray;
+ MacroTileMgr* pMacroTileManagerArray;
DispatchQueue* pDispatchQueueArray;
// Draw State Ring
// to reference a single entry in the DS ring.
RingBuffer<DRAW_STATE> dsRing;
- uint32_t curStateId; // Current index to the next available entry in the DS ring.
+ uint32_t curStateId; // Current index to the next available entry in the DS ring.
uint32_t NumWorkerThreads;
uint32_t NumFEThreads;
uint32_t NumBEThreads;
- THREAD_POOL threadPool; // Thread pool associated with this context
- SWR_THREADING_INFO threadInfo;
- SWR_API_THREADING_INFO apiThreadInfo;
+ THREAD_POOL threadPool; // Thread pool associated with this context
+ SWR_THREADING_INFO threadInfo;
+ SWR_API_THREADING_INFO apiThreadInfo;
SWR_WORKER_PRIVATE_STATE workerPrivateState;
uint32_t MAX_DRAWS_IN_FLIGHT;
std::condition_variable FifosNotEmpty;
- std::mutex WaitLock;
+ std::mutex WaitLock;
uint32_t privateStateSize;
- HotTileMgr *pHotTileMgr;
+ HotTileMgr* pHotTileMgr;
// Callback functions, passed in at create context time
- PFN_LOAD_TILE pfnLoadTile;
- PFN_STORE_TILE pfnStoreTile;
- PFN_CLEAR_TILE pfnClearTile;
- PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
- PFN_UPDATE_STATS pfnUpdateStats;
- PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
+ PFN_LOAD_TILE pfnLoadTile;
+ PFN_STORE_TILE pfnStoreTile;
+ PFN_CLEAR_TILE pfnClearTile;
+ PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+ PFN_UPDATE_STATS pfnUpdateStats;
+ PFN_UPDATE_STATS_FE pfnUpdateStatsFE;
// Global Stats
// Scratch space for workers.
uint8_t** ppScratch;
- volatile OSALIGNLINE(uint32_t) drawsOutstandingFE;
+ volatile OSALIGNLINE(uint32_t) drawsOutstandingFE;
OSALIGNLINE(CachingAllocator) cachingArenaAllocator;
uint32_t frameCount;
HANDLE* pArContext;
};
-#define UPDATE_STAT_BE(name, count) if (GetApiState(pDC).enableStatsBE) { pDC->dynState.pStats[workerId].name += count; }
-#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStatsFE) { pDC->dynState.statsFE.name += count; }
+#define UPDATE_STAT_BE(name, count) \
+ if (GetApiState(pDC).enableStatsBE) \
+ { \
+ pDC->dynState.pStats[workerId].name += count; \
+ }
+#define UPDATE_STAT_FE(name, count) \
+ if (GetApiState(pDC).enableStatsFE) \
+ { \
+ pDC->dynState.statsFE.name += count; \
+ }
// ArchRast instrumentation framework
-#define AR_WORKER_CTX pDC->pContext->pArContext[workerId]
-#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
+#define AR_WORKER_CTX pDC->pContext->pArContext[workerId]
+#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
#ifdef KNOB_ENABLE_RDTSC
#define RDTSC_BEGIN(type, drawid) RDTSC_START(type)
-#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0)
+#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0)
#else
#define RDTSC_BEGIN(type, count)
#define RDTSC_END(type, count)
#endif
#ifdef KNOB_ENABLE_AR
- #define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event)
- #define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id)
+#define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event)
+#define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id)
#else
- #define _AR_EVENT(ctx, event)
- #define _AR_FLUSH(ctx, id)
+#define _AR_EVENT(ctx, event)
+#define _AR_FLUSH(ctx, id)
#endif
// Use these macros for api thread.
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file depthstencil.h
-*
-* @brief Implements depth/stencil functionality
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file depthstencil.h
+ *
+ * @brief Implements depth/stencil functionality
+ *
+ ******************************************************************************/
#pragma once
#include "common/os.h"
#include "format_conversion.h"
INLINE
-void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps)
+void StencilOp(SWR_STENCILOP op,
+ simdscalar const& mask,
+ simdscalar const& stencilRefps,
+ simdscalar& stencilps)
{
simdscalari stencil = _simd_castps_si(stencilps);
case STENCILOP_INCRSAT:
{
simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
+ stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
break;
}
case STENCILOP_DECRSAT:
{
simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
+ stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
break;
}
case STENCILOP_INCR:
{
simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
+ stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
break;
}
case STENCILOP_DECR:
{
simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
- stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
+ stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
break;
}
case STENCILOP_INVERT:
{
- simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
+ simdscalar stencilinvert =
+ _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
break;
}
}
}
-
-template<SWR_FORMAT depthFormatT>
-simdscalar QuantizeDepth(simdscalar const &depth)
+template <SWR_FORMAT depthFormatT>
+simdscalar QuantizeDepth(simdscalar const& depth)
{
SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
- uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
+ uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
if (depthType == SWR_TYPE_FLOAT)
{
// should be unorm depth if not float
SWR_ASSERT(depthType == SWR_TYPE_UNORM);
- float quantize = (float)((1 << depthBpc) - 1);
- simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
- result = _simd_add_ps(result, _simd_set1_ps(0.5f));
- result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
-
+ float quantize = (float)((1 << depthBpc) - 1);
+ simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
+ result = _simd_add_ps(result, _simd_set1_ps(0.5f));
+ result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
+
if (depthBpc > 16)
{
result = _simd_div_ps(result, _simd_set1_ps(quantize));
}
INLINE
-simdscalar DepthStencilTest(const API_STATE* pState,
- bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask,
- uint8_t *pStencilBase, simdscalar* pStencilMask)
+simdscalar DepthStencilTest(const API_STATE* pState,
+ bool frontFacing,
+ uint32_t viewportIndex,
+ simdscalar const& iZ,
+ uint8_t* pDepthBase,
+ simdscalar const& coverageMask,
+ uint8_t* pStencilBase,
+ simdscalar* pStencilMask)
{
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
- const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
- const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
+ const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
+ const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
simdscalar depthResult = _simd_set1_ps(-1.0f);
simdscalar zbuf;
// clamp Z to viewport [minZ..maxZ]
- simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
- simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
+ simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
+ simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
-
+
if (pDSState->depthTestEnable)
{
switch (pDSState->depthTestFunc)
{
- case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break;
- case ZFUNC_ALWAYS: break;
+ case ZFUNC_NEVER:
+ depthResult = _simd_setzero_ps();
+ break;
+ case ZFUNC_ALWAYS:
+ break;
default:
zbuf = _simd_load_ps((const float*)pDepthBase);
}
switch (pDSState->depthTestFunc)
{
- case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break;
- case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break;
- case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break;
- case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break;
- case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break;
- case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break;
+ case ZFUNC_LE:
+ depthResult = _simd_cmple_ps(interpZ, zbuf);
+ break;
+ case ZFUNC_LT:
+ depthResult = _simd_cmplt_ps(interpZ, zbuf);
+ break;
+ case ZFUNC_GT:
+ depthResult = _simd_cmpgt_ps(interpZ, zbuf);
+ break;
+ case ZFUNC_GE:
+ depthResult = _simd_cmpge_ps(interpZ, zbuf);
+ break;
+ case ZFUNC_EQ:
+ depthResult = _simd_cmpeq_ps(interpZ, zbuf);
+ break;
+ case ZFUNC_NE:
+ depthResult = _simd_cmpneq_ps(interpZ, zbuf);
+ break;
}
}
if (pDSState->stencilTestEnable)
{
- uint8_t stencilRefValue;
+ uint8_t stencilRefValue;
uint32_t stencilTestFunc;
- uint8_t stencilTestMask;
+ uint8_t stencilTestMask;
if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
{
stencilRefValue = pDSState->stencilRefValue;
simdvector sbuf;
simdscalar stencilWithMask;
simdscalar stencilRef;
- switch(stencilTestFunc)
+ switch (stencilTestFunc)
{
- case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break;
- case ZFUNC_ALWAYS: break;
+ case ZFUNC_NEVER:
+ stencilMask = _simd_setzero_ps();
+ break;
+ case ZFUNC_ALWAYS:
+ break;
default:
LoadSOA<R8_UINT>(pStencilBase, sbuf);
-
+
// apply stencil read mask
- stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
+ stencilWithMask = _simd_castsi_ps(
+ _simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
// do stencil compare in float to avoid simd integer emulation in AVX1
stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
break;
}
- switch(stencilTestFunc)
+ switch (stencilTestFunc)
{
- case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break;
- case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break;
- case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break;
- case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break;
- case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break;
- case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break;
+ case ZFUNC_LE:
+ stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask);
+ break;
+ case ZFUNC_LT:
+ stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask);
+ break;
+ case ZFUNC_GT:
+ stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask);
+ break;
+ case ZFUNC_GE:
+ stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask);
+ break;
+ case ZFUNC_EQ:
+ stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask);
+ break;
+ case ZFUNC_NE:
+ stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask);
+ break;
}
}
simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
- depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
+ depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
*pStencilMask = stencilMask;
return depthWriteMask;
}
INLINE
-void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
- bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
- uint8_t *pStencilBase, const simdscalar& stencilMask)
+void DepthStencilWrite(const SWR_VIEWPORT* pViewport,
+ const SWR_DEPTH_STENCIL_STATE* pDSState,
+ bool frontFacing,
+ simdscalar const& iZ,
+ uint8_t* pDepthBase,
+ const simdscalar& depthMask,
+ const simdscalar& coverageMask,
+ uint8_t* pStencilBase,
+ const simdscalar& stencilMask)
{
if (pDSState->depthWriteEnable)
{
// clamp Z to viewport [minZ..maxZ]
- simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
- simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
+ simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
+ simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
LoadSOA<R8_UINT>(pStencilBase, sbuf);
simdscalar stencilbuf = sbuf.v[0];
- uint8_t stencilRefValue;
+ uint8_t stencilRefValue;
uint32_t stencilFailOp;
uint32_t stencilPassDepthPassOp;
uint32_t stencilPassDepthFailOp;
- uint8_t stencilWriteMask;
+ uint8_t stencilWriteMask;
if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
{
- stencilRefValue = pDSState->stencilRefValue;
- stencilFailOp = pDSState->stencilFailOp;
+ stencilRefValue = pDSState->stencilRefValue;
+ stencilFailOp = pDSState->stencilFailOp;
stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
- stencilWriteMask = pDSState->stencilWriteMask;
+ stencilWriteMask = pDSState->stencilWriteMask;
}
else
{
- stencilRefValue = pDSState->backfaceStencilRefValue;
- stencilFailOp = pDSState->backfaceStencilFailOp;
+ stencilRefValue = pDSState->backfaceStencilRefValue;
+ stencilFailOp = pDSState->backfaceStencilFailOp;
stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
- stencilWriteMask = pDSState->backfaceStencilWriteMask;
+ stencilWriteMask = pDSState->backfaceStencilWriteMask;
}
- simdscalar stencilps = stencilbuf;
+ simdscalar stencilps = stencilbuf;
simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
- simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
+ simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
- simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
+ simdscalar stencilPassDepthFailMask =
+ _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
simdscalar origStencil = stencilps;
StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
- StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps);
- StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps);
+ StencilOp((SWR_STENCILOP)stencilPassDepthFailOp,
+ stencilPassDepthFailMask,
+ stencilRefps,
+ stencilps);
+ StencilOp((SWR_STENCILOP)stencilPassDepthPassOp,
+ stencilPassDepthPassMask,
+ stencilRefps,
+ stencilps);
// apply stencil write mask
simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
- stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
- stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
+ stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
+ stencilps =
+ _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
simdvector stencilResult;
stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
StoreSOA<R8_UINT>(stencilResult, pStencilBase);
}
-
}
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file fifo.hpp
-*
-* @brief Definitions for our fifos used for thread communication.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file fifo.hpp
+ *
+ * @brief Definitions for our fifos used for thread communication.
+ *
+ ******************************************************************************/
#pragma once
-
#include "common/os.h"
#include "arena.h"
#include <vector>
#include <cassert>
-template<class T>
+template <class T>
struct QUEUE
{
- OSALIGNLINE(volatile uint32_t) mLock{ 0 };
- OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
+ OSALIGNLINE(volatile uint32_t) mLock{0};
+ OSALIGNLINE(volatile uint32_t) mNumEntries{0};
std::vector<T*> mBlocks;
- T* mCurBlock{ nullptr };
- uint32_t mHead{ 0 };
- uint32_t mTail{ 0 };
- uint32_t mCurBlockIdx{ 0 };
+ T* mCurBlock{nullptr};
+ uint32_t mHead{0};
+ uint32_t mTail{0};
+ uint32_t mCurBlockIdx{0};
// power of 2
static const uint32_t mBlockSizeShift = 6;
- static const uint32_t mBlockSize = 1 << mBlockSizeShift;
+ static const uint32_t mBlockSize = 1 << mBlockSizeShift;
template <typename ArenaT>
void clear(ArenaT& arena)
mHead = 0;
mTail = 0;
mBlocks.clear();
- T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
+ T* pNewBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
mBlocks.push_back(pNewBlock);
- mCurBlock = pNewBlock;
+ mCurBlock = pNewBlock;
mCurBlockIdx = 0;
- mNumEntries = 0;
- mLock = 0;
+ mNumEntries = 0;
+ mLock = 0;
}
- uint32_t getNumQueued()
- {
- return mNumEntries;
- }
+ uint32_t getNumQueued() { return mNumEntries; }
bool tryLock()
{
return (initial == 0);
}
- void unlock()
- {
- mLock = 0;
- }
+ void unlock() { mLock = 0; }
T* peek()
{
return nullptr;
}
uint32_t block = mHead >> mBlockSizeShift;
- return &mBlocks[block][mHead & (mBlockSize-1)];
+ return &mBlocks[block][mHead & (mBlockSize - 1)];
}
void dequeue_noinc()
{
- mHead ++;
- mNumEntries --;
+ mHead++;
+ mNumEntries--;
}
template <typename ArenaT>
bool enqueue_try_nosync(ArenaT& arena, const T* entry)
{
const float* pSrc = (const float*)entry;
- float* pDst = (float*)&mCurBlock[mTail];
+ float* pDst = (float*)&mCurBlock[mTail];
- auto lambda = [&](int32_t i)
- {
- __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
- _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
+ auto lambda = [&](int32_t i) {
+ __m256 vSrc = _mm256_load_ps(pSrc + i * KNOB_SIMD_WIDTH);
+ _mm256_stream_ps(pDst + i * KNOB_SIMD_WIDTH, vSrc);
};
- const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
+ const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH * 4);
static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
- "FIFO element size should be multiple of SIMD width.");
+ "FIFO element size should be multiple of SIMD width.");
UnrollerL<0, numSimdLines, 1>::step(lambda);
- mTail ++;
+ mTail++;
if (mTail == mBlockSize)
{
if (++mCurBlockIdx < mBlocks.size())
}
else
{
- T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
+ T* newBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
SWR_ASSERT(newBlock);
mBlocks.push_back(newBlock);
mTail = 0;
}
- mNumEntries ++;
+ mNumEntries++;
return true;
}
- void destroy()
- {
- }
-
+ void destroy() {}
};
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file format_conversion.h
-*
-* @brief API implementation
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file format_conversion.h
+ *
+ * @brief API implementation
+ *
+ ******************************************************************************/
#include "format_types.h"
#include "format_traits.h"
/// SOA RGBA32_FLOAT format.
/// @param pSrc - source data in SOA form
/// @param dst - output data in SOA form
-template<SWR_FORMAT SrcFormat>
-INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst)
+template <SWR_FORMAT SrcFormat>
+INLINE void LoadSOA(const uint8_t* pSrc, simdvector& dst)
{
// fast path for float32
- if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
+ if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+ (FormatTraits<SrcFormat>::GetBPC(0) == 32))
{
- auto lambda = [&](int comp)
- {
- simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp*sizeof(simdscalar)));
+ auto lambda = [&](int comp) {
+ simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp * sizeof(simdscalar)));
dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
};
return;
}
- auto lambda = [&](int comp)
- {
+ auto lambda = [&](int comp) {
// load SIMD components
simdscalar vComp = FormatTraits<SrcFormat>::loadSOA(comp, pSrc);
}
//////////////////////////////////////////////////////////////////////////
-/// @brief Clamps the given component based on the requirements on the
+/// @brief Clamps the given component based on the requirements on the
/// Format template arg
/// @param vComp - SIMD vector of floats
/// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simdscalar Clamp(simdscalar const &vC, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simdscalar Clamp(simdscalar const& vC, uint32_t Component)
{
simdscalar vComp = vC;
if (FormatTraits<Format>::isNormalized(Component))
{
if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
{
- int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
- int iMin = 0;
+ int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
+ int iMin = 0;
simdscalari vCompi = _simd_castps_si(vComp);
- vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin));
- vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax));
- vComp = _simd_castsi_ps(vCompi);
+ vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin));
+ vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax));
+ vComp = _simd_castsi_ps(vCompi);
}
else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
{
- int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
- int iMin = -1 - iMax;
+ int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
+ int iMin = -1 - iMax;
simdscalari vCompi = _simd_castps_si(vComp);
- vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin));
- vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax));
- vComp = _simd_castsi_ps(vCompi);
+ vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin));
+ vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax));
+ vComp = _simd_castsi_ps(vCompi);
}
}
/// Format template arg
/// @param vComp - SIMD vector of floats
/// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simdscalar Normalize(simdscalar const &vC, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simdscalar Normalize(simdscalar const& vC, uint32_t Component)
{
simdscalar vComp = vC;
if (FormatTraits<Format>::isNormalized(Component))
/// RGBA32_FLOAT to SOA format
/// @param src - source data in SOA form
/// @param dst - output data in SOA form
-template<SWR_FORMAT DstFormat>
-INLINE void StoreSOA(const simdvector &src, uint8_t *pDst)
+template <SWR_FORMAT DstFormat>
+INLINE void StoreSOA(const simdvector& src, uint8_t* pDst)
{
// fast path for float32
- if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
+ if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+ (FormatTraits<DstFormat>::GetBPC(0) == 32))
{
for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
{
// Gamma-correct
if (FormatTraits<DstFormat>::isSRGB)
{
- if (comp < 3) // Input format is always RGBA32_FLOAT.
+ if (comp < 3) // Input format is always RGBA32_FLOAT.
{
vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
}
}
- _simd_store_ps((float*)(pDst + comp*sizeof(simdscalar)), vComp);
+ _simd_store_ps((float*)(pDst + comp * sizeof(simdscalar)), vComp);
}
return;
}
- auto lambda = [&](int comp)
- {
+ auto lambda = [&](int comp) {
simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
// Gamma-correct
if (FormatTraits<DstFormat>::isSRGB)
{
- if (comp < 3) // Input format is always RGBA32_FLOAT.
+ if (comp < 3) // Input format is always RGBA32_FLOAT.
{
vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
}
/// SOA RGBA32_FLOAT format.
/// @param pSrc - source data in SOA form
/// @param dst - output data in SOA form
-template<SWR_FORMAT SrcFormat>
-INLINE void SIMDCALL LoadSOA(const uint8_t *pSrc, simd16vector &dst)
+template <SWR_FORMAT SrcFormat>
+INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simd16vector& dst)
{
// fast path for float32
- if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
+ if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+ (FormatTraits<SrcFormat>::GetBPC(0) == 32))
{
- auto lambda = [&](int comp)
- {
- simd16scalar vComp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc + comp * sizeof(simd16scalar)));
+ auto lambda = [&](int comp) {
+ simd16scalar vComp =
+ _simd16_load_ps(reinterpret_cast<const float*>(pSrc + comp * sizeof(simd16scalar)));
dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
};
return;
}
- auto lambda = [&](int comp)
- {
+ auto lambda = [&](int comp) {
// load SIMD components
simd16scalar vComp = FormatTraits<SrcFormat>::loadSOA_16(comp, pSrc);
}
//////////////////////////////////////////////////////////////////////////
-/// @brief Clamps the given component based on the requirements on the
+/// @brief Clamps the given component based on the requirements on the
/// Format template arg
/// @param vComp - SIMD vector of floats
/// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simd16scalar SIMDCALL Clamp(simd16scalar const &v, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simd16scalar SIMDCALL Clamp(simd16scalar const& v, uint32_t Component)
{
simd16scalar vComp = v;
if (FormatTraits<Format>::isNormalized(Component))
{
if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
{
- int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
- int iMin = 0;
+ int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
+ int iMin = 0;
simd16scalari vCompi = _simd16_castps_si(vComp);
- vCompi = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin));
- vCompi = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax));
- vComp = _simd16_castsi_ps(vCompi);
+ vCompi = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin));
+ vCompi = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax));
+ vComp = _simd16_castsi_ps(vCompi);
}
else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
{
- int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
- int iMin = -1 - iMax;
+ int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
+ int iMin = -1 - iMax;
simd16scalari vCompi = _simd16_castps_si(vComp);
- vCompi = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin));
- vCompi = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax));
- vComp = _simd16_castsi_ps(vCompi);
+ vCompi = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin));
+ vCompi = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax));
+ vComp = _simd16_castsi_ps(vCompi);
}
}
/// Format template arg
/// @param vComp - SIMD vector of floats
/// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simd16scalar SIMDCALL Normalize(simd16scalar const &vComp, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simd16scalar SIMDCALL Normalize(simd16scalar const& vComp, uint32_t Component)
{
simd16scalar r = vComp;
if (FormatTraits<Format>::isNormalized(Component))
/// RGBA32_FLOAT to SOA format
/// @param src - source data in SOA form
/// @param dst - output data in SOA form
-template<SWR_FORMAT DstFormat>
-INLINE void SIMDCALL StoreSOA(const simd16vector &src, uint8_t *pDst)
+template <SWR_FORMAT DstFormat>
+INLINE void SIMDCALL StoreSOA(const simd16vector& src, uint8_t* pDst)
{
// fast path for float32
- if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
+ if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+ (FormatTraits<DstFormat>::GetBPC(0) == 32))
{
for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
{
// Gamma-correct
if (FormatTraits<DstFormat>::isSRGB)
{
- if (comp < 3) // Input format is always RGBA32_FLOAT.
+ if (comp < 3) // Input format is always RGBA32_FLOAT.
{
vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
}
}
- _simd16_store_ps(reinterpret_cast<float *>(pDst + comp * sizeof(simd16scalar)), vComp);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst + comp * sizeof(simd16scalar)), vComp);
}
return;
}
- auto lambda = [&](int comp)
- {
+ auto lambda = [&](int comp) {
simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
// Gamma-correct
if (FormatTraits<DstFormat>::isSRGB)
{
- if (comp < 3) // Input format is always RGBA32_FLOAT.
+ if (comp < 3) // Input format is always RGBA32_FLOAT.
{
vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
}
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file format_traits.h
-*
-* @brief Format Traits. auto-generated file
-*
-* DO NOT EDIT
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file format_traits.h
+ *
+ * @brief Format Traits. auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ ******************************************************************************/
#pragma once
#include "format_types.h"
//////////////////////////////////////////////////////////////////////////
/// FormatSwizzle - Component swizzle selects
//////////////////////////////////////////////////////////////////////////
-template<uint32_t comp0 = 0, uint32_t comp1 = 0, uint32_t comp2 = 0, uint32_t comp3 = 0>
+template <uint32_t comp0 = 0, uint32_t comp1 = 0, uint32_t comp2 = 0, uint32_t comp3 = 0>
struct FormatSwizzle
{
// Return swizzle select for component.
INLINE static uint32_t swizzle(uint32_t c)
{
- static const uint32_t s[4] = { comp0, comp1, comp2, comp3 };
+ static const uint32_t s[4] = {comp0, comp1, comp2, comp3};
return s[c];
}
};
//////////////////////////////////////////////////////////////////////////
/// FormatTraits - Format traits
//////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT format>
-struct FormatTraits :
- ComponentTraits<SWR_TYPE_UNKNOWN, 0>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0>
+template <SWR_FORMAT format>
+struct FormatTraits : ComponentTraits<SWR_TYPE_UNKNOWN, 0>, FormatSwizzle<0>, Defaults<0, 0, 0, 0>
{
- static const uint32_t bpp{ 0 };
- static const uint32_t numComps{ 0 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
+ static const uint32_t bpp{0};
+ static const uint32_t numComps{0};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
};
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32A32_FLOAT> - Format traits specialization for R32G32B32A32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+ 32,
+ SWR_TYPE_FLOAT,
+ 32,
+ SWR_TYPE_FLOAT,
+ 32,
+ SWR_TYPE_FLOAT,
+ 32>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32_32 TransposeT;
typedef Format4<32, 32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32A32_SINT> - Format traits specialization for R32G32B32A32_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32_32 TransposeT;
typedef Format4<32, 32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32A32_UINT> - Format traits specialization for R32G32B32A32_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32_32 TransposeT;
typedef Format4<32, 32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R64G64_FLOAT> - Format traits specialization for R64G64_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64G64_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64G64_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose64_64 TransposeT;
typedef Format2<64, 64> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32X32_FLOAT> - Format traits specialization for R32G32B32X32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32X32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_UNUSED, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+ 32,
+ SWR_TYPE_FLOAT,
+ 32,
+ SWR_TYPE_FLOAT,
+ 32,
+ SWR_TYPE_UNUSED,
+ 32>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32_32 TransposeT;
typedef Format4<32, 32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32A32_SSCALED> - Format traits specialization for R32G32B32A32_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+ 32,
+ SWR_TYPE_SSCALED,
+ 32,
+ SWR_TYPE_SSCALED,
+ 32,
+ SWR_TYPE_SSCALED,
+ 32>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32_32 TransposeT;
typedef Format4<32, 32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32A32_USCALED> - Format traits specialization for R32G32B32A32_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+ 32,
+ SWR_TYPE_USCALED,
+ 32,
+ SWR_TYPE_USCALED,
+ 32,
+ SWR_TYPE_USCALED,
+ 32>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32_32 TransposeT;
typedef Format4<32, 32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32A32_SFIXED> - Format traits specialization for R32G32B32A32_SFIXED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_SFIXED> :
- ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_SFIXED> : ComponentTraits<SWR_TYPE_SFIXED,
+ 32,
+ SWR_TYPE_SFIXED,
+ 32,
+ SWR_TYPE_SFIXED,
+ 32,
+ SWR_TYPE_SFIXED,
+ 32>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32_32 TransposeT;
typedef Format4<32, 32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32_FLOAT> - Format traits specialization for R32G32B32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 96 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{96};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32 TransposeT;
typedef Format3<32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32_SINT> - Format traits specialization for R32G32B32_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 96 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{96};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32 TransposeT;
typedef Format3<32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32_UINT> - Format traits specialization for R32G32B32_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 96 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{96};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32 TransposeT;
typedef Format3<32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32_SSCALED> - Format traits specialization for R32G32B32_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 96 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_SSCALED>
+ : ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{96};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32 TransposeT;
typedef Format3<32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32_USCALED> - Format traits specialization for R32G32B32_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 96 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_USCALED>
+ : ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{96};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32 TransposeT;
typedef Format3<32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32B32_SFIXED> - Format traits specialization for R32G32B32_SFIXED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_SFIXED> :
- ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 96 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_SFIXED>
+ : ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{96};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32_32 TransposeT;
typedef Format3<32, 32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16A16_UNORM> - Format traits specialization for R16G16B16A16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+ 16,
+ SWR_TYPE_UNORM,
+ 16,
+ SWR_TYPE_UNORM,
+ 16,
+ SWR_TYPE_UNORM,
+ 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16A16_SNORM> - Format traits specialization for R16G16B16A16_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
+ 16,
+ SWR_TYPE_SNORM,
+ 16,
+ SWR_TYPE_SNORM,
+ 16,
+ SWR_TYPE_SNORM,
+ 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16A16_SINT> - Format traits specialization for R16G16B16A16_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16A16_UINT> - Format traits specialization for R16G16B16A16_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16A16_FLOAT> - Format traits specialization for R16G16B16A16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+ 16,
+ SWR_TYPE_FLOAT,
+ 16,
+ SWR_TYPE_FLOAT,
+ 16,
+ SWR_TYPE_FLOAT,
+ 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32_FLOAT> - Format traits specialization for R32G32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32_SINT> - Format traits specialization for R32G32_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_SINT> : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32_UINT> - Format traits specialization for R32G32_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_UINT> : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
};
//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_FLOAT_X8X24_TYPELESS> - Format traits specialization for R32_FLOAT_X8X24_TYPELESS
+/// FormatTraits<R32_FLOAT_X8X24_TYPELESS> - Format traits specialization for
+/// R32_FLOAT_X8X24_TYPELESS
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_FLOAT_X8X24_TYPELESS> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_UNUSED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
+template <>
+struct FormatTraits<R32_FLOAT_X8X24_TYPELESS>
+ : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_UNUSED, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<X32_TYPELESS_G8X24_UINT> - Format traits specialization for X32_TYPELESS_G8X24_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<X32_TYPELESS_G8X24_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UNUSED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<X32_TYPELESS_G8X24_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UNUSED, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L32A32_FLOAT> - Format traits specialization for L32A32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32A32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 1 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32A32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{1};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R64_FLOAT> - Format traits specialization for R64_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 64>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 64>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<64> TransposeT;
typedef Format1<64> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16X16_UNORM> - Format traits specialization for R16G16B16X16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16X16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNUSED, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16X16_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+ 16,
+ SWR_TYPE_UNORM,
+ 16,
+ SWR_TYPE_UNORM,
+ 16,
+ SWR_TYPE_UNUSED,
+ 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16X16_FLOAT> - Format traits specialization for R16G16B16X16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16X16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_UNUSED, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16X16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+ 16,
+ SWR_TYPE_FLOAT,
+ 16,
+ SWR_TYPE_FLOAT,
+ 16,
+ SWR_TYPE_UNUSED,
+ 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L32X32_FLOAT> - Format traits specialization for L32X32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32X32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I32X32_FLOAT> - Format traits specialization for I32X32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I32X32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16A16_SSCALED> - Format traits specialization for R16G16B16A16_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+ 16,
+ SWR_TYPE_SSCALED,
+ 16,
+ SWR_TYPE_SSCALED,
+ 16,
+ SWR_TYPE_SSCALED,
+ 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16A16_USCALED> - Format traits specialization for R16G16B16A16_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+ 16,
+ SWR_TYPE_USCALED,
+ 16,
+ SWR_TYPE_USCALED,
+ 16,
+ SWR_TYPE_USCALED,
+ 16>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16_16 TransposeT;
typedef Format4<16, 16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32_SSCALED> - Format traits specialization for R32G32_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32_USCALED> - Format traits specialization for R32G32_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32G32_SFIXED> - Format traits specialization for R32G32_SFIXED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_SFIXED> :
- ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_SFIXED> : ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose32_32 TransposeT;
typedef Format2<32, 32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B8G8R8A8_UNORM> - Format traits specialization for B8G8R8A8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8A8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8A8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B8G8R8A8_UNORM_SRGB> - Format traits specialization for B8G8R8A8_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8A8_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8A8_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10A2_UNORM> - Format traits specialization for R10G10B10A2_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10A2_UNORM_SRGB> - Format traits specialization for R10G10B10A2_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10A2_UINT> - Format traits specialization for R10G10B10A2_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8A8_UNORM> - Format traits specialization for R8G8B8A8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8A8_UNORM_SRGB> - Format traits specialization for R8G8B8A8_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8A8_SNORM> - Format traits specialization for R8G8B8A8_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_SNORM>
+ : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8A8_SINT> - Format traits specialization for R8G8B8A8_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8A8_UINT> - Format traits specialization for R8G8B8A8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16_UNORM> - Format traits specialization for R16G16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16_SNORM> - Format traits specialization for R16G16_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_SNORM> : ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16_SINT> - Format traits specialization for R16G16_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_SINT> : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16_UINT> - Format traits specialization for R16G16_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_UINT> : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16_FLOAT> - Format traits specialization for R16G16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10A2_UNORM> - Format traits specialization for B10G10R10A2_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10A2_UNORM_SRGB> - Format traits specialization for B10G10R10A2_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R11G11B10_FLOAT> - Format traits specialization for R11G11B10_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R11G11B10_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 10>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R11G11B10_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 10>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose11_11_10 TransposeT;
typedef Format3<11, 11, 10> FormatT;
};
//////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10_FLOAT_A2_UNORM> - Format traits specialization for R10G10B10_FLOAT_A2_UNORM
-//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10_FLOAT_A2_UNORM> :
- ComponentTraits<SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_UNORM, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+/// FormatTraits<R10G10B10_FLOAT_A2_UNORM> - Format traits specialization for
+/// R10G10B10_FLOAT_A2_UNORM
+//////////////////////////////////////////////////////////////////////////
+template <>
+struct FormatTraits<R10G10B10_FLOAT_A2_UNORM> : ComponentTraits<SWR_TYPE_FLOAT,
+ 10,
+ SWR_TYPE_FLOAT,
+ 10,
+ SWR_TYPE_FLOAT,
+ 10,
+ SWR_TYPE_UNORM,
+ 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32_SINT> - Format traits specialization for R32_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32_UINT> - Format traits specialization for R32_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32_FLOAT> - Format traits specialization for R32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R24_UNORM_X8_TYPELESS> - Format traits specialization for R24_UNORM_X8_TYPELESS
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R24_UNORM_X8_TYPELESS> :
- ComponentTraits<SWR_TYPE_UNORM, 24>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R24_UNORM_X8_TYPELESS>
+ : ComponentTraits<SWR_TYPE_UNORM, 24>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<24> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<X24_TYPELESS_G8_UINT> - Format traits specialization for X24_TYPELESS_G8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<X24_TYPELESS_G8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 32>,
- FormatSwizzle<1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<X24_TYPELESS_G8_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 32>, FormatSwizzle<1>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L32_UNORM> - Format traits specialization for L32_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L16A16_UNORM> - Format traits specialization for L16A16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16A16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 1 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16A16_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{1};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I24X8_UNORM> - Format traits specialization for I24X8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I24X8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I24X8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose24_8 TransposeT;
typedef Format2<24, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L24X8_UNORM> - Format traits specialization for L24X8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L24X8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L24X8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose24_8 TransposeT;
typedef Format2<24, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I32_FLOAT> - Format traits specialization for I32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I32_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L32_FLOAT> - Format traits specialization for L32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<A32_FLOAT> - Format traits specialization for A32_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A32_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 32>,
- FormatSwizzle<3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A32_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B8G8R8X8_UNORM> - Format traits specialization for B8G8R8X8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8X8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8X8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B8G8R8X8_UNORM_SRGB> - Format traits specialization for B8G8R8X8_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8X8_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8X8_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8X8_UNORM> - Format traits specialization for R8G8B8X8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8X8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8X8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8X8_UNORM_SRGB> - Format traits specialization for R8G8B8X8_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8X8_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8X8_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R9G9B9E5_SHAREDEXP> - Format traits specialization for R9G9B9E5_SHAREDEXP
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R9G9B9E5_SHAREDEXP> :
- ComponentTraits<SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 5>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R9G9B9E5_SHAREDEXP>
+ : ComponentTraits<SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 5>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose9_9_9_5 TransposeT;
typedef Format4<9, 9, 9, 5> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10X2_UNORM> - Format traits specialization for B10G10R10X2_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10X2_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNUSED, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10X2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNORM,
+ 10,
+ SWR_TYPE_UNUSED,
+ 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L16A16_FLOAT> - Format traits specialization for L16A16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16A16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 1 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16A16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{1};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10X2_USCALED> - Format traits specialization for R10G10B10X2_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10X2_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_UNUSED, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10X2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_UNUSED,
+ 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8A8_SSCALED> - Format traits specialization for R8G8B8A8_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+ 8,
+ SWR_TYPE_SSCALED,
+ 8,
+ SWR_TYPE_SSCALED,
+ 8,
+ SWR_TYPE_SSCALED,
+ 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8A8_USCALED> - Format traits specialization for R8G8B8A8_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+ 8,
+ SWR_TYPE_USCALED,
+ 8,
+ SWR_TYPE_USCALED,
+ 8,
+ SWR_TYPE_USCALED,
+ 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16_SSCALED> - Format traits specialization for R16G16_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16_USCALED> - Format traits specialization for R16G16_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16 TransposeT;
typedef Format2<16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32_SSCALED> - Format traits specialization for R32_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_SSCALED>
+ : ComponentTraits<SWR_TYPE_SSCALED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32_USCALED> - Format traits specialization for R32_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_USCALED>
+ : ComponentTraits<SWR_TYPE_USCALED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B5G6R5_UNORM> - Format traits specialization for B5G6R5_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G6R5_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
- FormatSwizzle<2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G6R5_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
+ FormatSwizzle<2, 1, 0>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose5_6_5 TransposeT;
typedef Format3<5, 6, 5> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B5G6R5_UNORM_SRGB> - Format traits specialization for B5G6R5_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G6R5_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
- FormatSwizzle<2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G6R5_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
+ FormatSwizzle<2, 1, 0>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose5_6_5 TransposeT;
typedef Format3<5, 6, 5> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B5G5R5A1_UNORM> - Format traits specialization for B5G5R5A1_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5A1_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5A1_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose5_5_5_1 TransposeT;
typedef Format4<5, 5, 5, 1> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B5G5R5A1_UNORM_SRGB> - Format traits specialization for B5G5R5A1_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5A1_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5A1_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose5_5_5_1 TransposeT;
typedef Format4<5, 5, 5, 1> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B4G4R4A4_UNORM> - Format traits specialization for B4G4R4A4_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B4G4R4A4_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B4G4R4A4_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose4_4_4_4 TransposeT;
typedef Format4<4, 4, 4, 4> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B4G4R4A4_UNORM_SRGB> - Format traits specialization for B4G4R4A4_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B4G4R4A4_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B4G4R4A4_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose4_4_4_4 TransposeT;
typedef Format4<4, 4, 4, 4> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8_UNORM> - Format traits specialization for R8G8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8_SNORM> - Format traits specialization for R8G8_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_SNORM> : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8_SINT> - Format traits specialization for R8G8_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_SINT> : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8_UINT> - Format traits specialization for R8G8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_UINT> : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16_UNORM> - Format traits specialization for R16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16_SNORM> - Format traits specialization for R16_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_SNORM>
+ : ComponentTraits<SWR_TYPE_SNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16_SINT> - Format traits specialization for R16_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16_UINT> - Format traits specialization for R16_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16_FLOAT> - Format traits specialization for R16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I16_UNORM> - Format traits specialization for I16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I16_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L16_UNORM> - Format traits specialization for L16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<A16_UNORM> - Format traits specialization for A16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16>,
- FormatSwizzle<3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A16_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8A8_UNORM> - Format traits specialization for L8A8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 1 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{1};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I16_FLOAT> - Format traits specialization for I16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I16_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L16_FLOAT> - Format traits specialization for L16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<A16_FLOAT> - Format traits specialization for A16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A16_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8A8_UNORM_SRGB> - Format traits specialization for L8A8_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 1 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{1};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B5G5R5X1_UNORM> - Format traits specialization for B5G5R5X1_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5X1_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5X1_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose5_5_5_1 TransposeT;
typedef Format4<5, 5, 5, 1> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B5G5R5X1_UNORM_SRGB> - Format traits specialization for B5G5R5X1_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5X1_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5X1_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose5_5_5_1 TransposeT;
typedef Format4<5, 5, 5, 1> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8_SSCALED> - Format traits specialization for R8G8_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8_USCALED> - Format traits specialization for R8G8_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
- FormatSwizzle<0, 1>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
+ FormatSwizzle<0, 1>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16_SSCALED> - Format traits specialization for R16_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_SSCALED>
+ : ComponentTraits<SWR_TYPE_SSCALED, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16_USCALED> - Format traits specialization for R16_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 16>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_USCALED>
+ : ComponentTraits<SWR_TYPE_USCALED, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<16> TransposeT;
typedef Format1<16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<A1B5G5R5_UNORM> - Format traits specialization for A1B5G5R5_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A1B5G5R5_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 1, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5>,
- FormatSwizzle<3, 2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A1B5G5R5_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 1, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5>,
+ FormatSwizzle<3, 2, 1, 0>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose1_5_5_5 TransposeT;
typedef Format4<1, 5, 5, 5> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<A4B4G4R4_UNORM> - Format traits specialization for A4B4G4R4_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A4B4G4R4_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
- FormatSwizzle<3, 2, 1, 0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A4B4G4R4_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
+ FormatSwizzle<3, 2, 1, 0>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose4_4_4_4 TransposeT;
typedef Format4<4, 4, 4, 4> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8A8_UINT> - Format traits specialization for L8A8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 1 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_UINT> : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{1};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8A8_SINT> - Format traits specialization for L8A8_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 16 };
- static const uint32_t numComps{ 2 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 1 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_SINT> : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+ FormatSwizzle<0, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{16};
+ static const uint32_t numComps{2};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{1};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8 TransposeT;
typedef Format2<8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8_UNORM> - Format traits specialization for R8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8_SNORM> - Format traits specialization for R8_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_SNORM>
+ : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8_SINT> - Format traits specialization for R8_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8_UINT> - Format traits specialization for R8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<A8_UNORM> - Format traits specialization for A8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I8_UNORM> - Format traits specialization for I8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8_UNORM> - Format traits specialization for L8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8_SSCALED> - Format traits specialization for R8_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_SSCALED>
+ : ComponentTraits<SWR_TYPE_SSCALED, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8_USCALED> - Format traits specialization for R8_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_USCALED>
+ : ComponentTraits<SWR_TYPE_USCALED, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8_UNORM_SRGB> - Format traits specialization for L8_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8_UINT> - Format traits specialization for L8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<L8_SINT> - Format traits specialization for L8_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I8_UINT> - Format traits specialization for I8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I8_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<I8_SINT> - Format traits specialization for I8_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I8_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I8_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<DXT1_RGB_SRGB> - Format traits specialization for DXT1_RGB_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<DXT1_RGB_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<DXT1_RGB_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<YCRCB_SWAPUVY> - Format traits specialization for YCRCB_SWAPUVY
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<YCRCB_SWAPUVY> :
- ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ true };
- static const uint32_t bcWidth{ 2 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<YCRCB_SWAPUVY>
+ : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{true};
+ static const uint32_t bcWidth{2};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC1_UNORM> - Format traits specialization for BC1_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC1_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC1_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC2_UNORM> - Format traits specialization for BC2_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC2_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC2_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC3_UNORM> - Format traits specialization for BC3_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC3_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC3_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC4_UNORM> - Format traits specialization for BC4_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC4_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC4_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC5_UNORM> - Format traits specialization for BC5_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC5_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC5_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC1_UNORM_SRGB> - Format traits specialization for BC1_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC1_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC1_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC2_UNORM_SRGB> - Format traits specialization for BC2_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC2_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC2_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC3_UNORM_SRGB> - Format traits specialization for BC3_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC3_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC3_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<YCRCB_SWAPUV> - Format traits specialization for YCRCB_SWAPUV
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<YCRCB_SWAPUV> :
- ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ true };
- static const uint32_t bcWidth{ 2 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<YCRCB_SWAPUV>
+ : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{true};
+ static const uint32_t bcWidth{2};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8_8 TransposeT;
typedef Format4<8, 8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<DXT1_RGB> - Format traits specialization for DXT1_RGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<DXT1_RGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<DXT1_RGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8_UNORM> - Format traits specialization for R8G8B8_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 24 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{24};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8 TransposeT;
typedef Format3<8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8_SNORM> - Format traits specialization for R8G8B8_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 24 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_SNORM>
+ : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{24};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8 TransposeT;
typedef Format3<8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8_SSCALED> - Format traits specialization for R8G8B8_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 24 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_SSCALED>
+ : ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{24};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8 TransposeT;
typedef Format3<8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8_USCALED> - Format traits specialization for R8G8B8_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 24 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_USCALED>
+ : ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{24};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8 TransposeT;
typedef Format3<8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R64G64B64A64_FLOAT> - Format traits specialization for R64G64B64A64_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64G64B64A64_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 256 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64G64B64A64_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+ 64,
+ SWR_TYPE_FLOAT,
+ 64,
+ SWR_TYPE_FLOAT,
+ 64,
+ SWR_TYPE_FLOAT,
+ 64>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{256};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose64_64_64_64 TransposeT;
typedef Format4<64, 64, 64, 64> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R64G64B64_FLOAT> - Format traits specialization for R64G64B64_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64G64B64_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 192 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64G64B64_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{192};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose64_64_64 TransposeT;
typedef Format3<64, 64, 64> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC4_SNORM> - Format traits specialization for BC4_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC4_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 64 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC4_SNORM>
+ : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{64};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC5_SNORM> - Format traits specialization for BC5_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC5_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC5_SNORM>
+ : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16_FLOAT> - Format traits specialization for R16G16B16_FLOAT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_FLOAT> :
- ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 48 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_FLOAT>
+ : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{48};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16 TransposeT;
typedef Format3<16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16_UNORM> - Format traits specialization for R16G16B16_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 48 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{48};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16 TransposeT;
typedef Format3<16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16_SNORM> - Format traits specialization for R16G16B16_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 48 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_SNORM>
+ : ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{48};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16 TransposeT;
typedef Format3<16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16_SSCALED> - Format traits specialization for R16G16B16_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 48 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_SSCALED>
+ : ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{48};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16 TransposeT;
typedef Format3<16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16_USCALED> - Format traits specialization for R16G16B16_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 48 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_USCALED>
+ : ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{48};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16 TransposeT;
typedef Format3<16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC6H_SF16> - Format traits specialization for BC6H_SF16
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC6H_SF16> :
- ComponentTraits<SWR_TYPE_SNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC6H_SF16>
+ : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC7_UNORM> - Format traits specialization for BC7_UNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC7_UNORM> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC7_UNORM>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC7_UNORM_SRGB> - Format traits specialization for BC7_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC7_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ true };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC7_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{true};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<BC6H_UF16> - Format traits specialization for BC6H_UF16
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC6H_UF16> :
- ComponentTraits<SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 128 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ true };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 4 };
- static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC6H_UF16>
+ : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{128};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{true};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{4};
+ static const uint32_t bcHeight{4};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8_UNORM_SRGB> - Format traits specialization for R8G8B8_UNORM_SRGB
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_UNORM_SRGB> :
- ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 24 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ true };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_UNORM_SRGB>
+ : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{24};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{true};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8 TransposeT;
typedef Format3<8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16_UINT> - Format traits specialization for R16G16B16_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 48 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{48};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16 TransposeT;
typedef Format3<16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R16G16B16_SINT> - Format traits specialization for R16G16B16_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 48 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{48};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose16_16_16 TransposeT;
typedef Format3<16, 16, 16> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R32_SFIXED> - Format traits specialization for R32_SFIXED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_SFIXED> :
- ComponentTraits<SWR_TYPE_SFIXED, 32>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_SFIXED>
+ : ComponentTraits<SWR_TYPE_SFIXED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<32> TransposeT;
typedef Format1<32> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10A2_SNORM> - Format traits specialization for R10G10B10A2_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
+ 10,
+ SWR_TYPE_SNORM,
+ 10,
+ SWR_TYPE_SNORM,
+ 10,
+ SWR_TYPE_SNORM,
+ 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10A2_USCALED> - Format traits specialization for R10G10B10A2_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10A2_SSCALED> - Format traits specialization for R10G10B10A2_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+ 10,
+ SWR_TYPE_SSCALED,
+ 10,
+ SWR_TYPE_SSCALED,
+ 10,
+ SWR_TYPE_SSCALED,
+ 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R10G10B10A2_SINT> - Format traits specialization for R10G10B10A2_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
- FormatSwizzle<0, 1, 2, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
+ FormatSwizzle<0, 1, 2, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10A2_SNORM> - Format traits specialization for B10G10R10A2_SNORM
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_SNORM> :
- ComponentTraits<SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
+ 10,
+ SWR_TYPE_SNORM,
+ 10,
+ SWR_TYPE_SNORM,
+ 10,
+ SWR_TYPE_SNORM,
+ 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10A2_USCALED> - Format traits specialization for B10G10R10A2_USCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_USCALED> :
- ComponentTraits<SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 10,
+ SWR_TYPE_USCALED,
+ 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10A2_SSCALED> - Format traits specialization for B10G10R10A2_SSCALED
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_SSCALED> :
- ComponentTraits<SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x3f800000>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+ 10,
+ SWR_TYPE_SSCALED,
+ 10,
+ SWR_TYPE_SSCALED,
+ 10,
+ SWR_TYPE_SSCALED,
+ 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x3f800000>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10A2_UINT> - Format traits specialization for B10G10R10A2_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<B10G10R10A2_SINT> - Format traits specialization for B10G10R10A2_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
- FormatSwizzle<2, 1, 0, 3>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 32 };
- static const uint32_t numComps{ 4 };
- static const bool hasAlpha{ true };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
+ FormatSwizzle<2, 1, 0, 3>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{32};
+ static const uint32_t numComps{4};
+ static const bool hasAlpha{true};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose10_10_10_2 TransposeT;
typedef Format4<10, 10, 10, 2> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8_UINT> - Format traits specialization for R8G8B8_UINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_UINT> :
- ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 24 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_UINT>
+ : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{24};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8 TransposeT;
typedef Format3<8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<R8G8B8_SINT> - Format traits specialization for R8G8B8_SINT
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_SINT> :
- ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
- FormatSwizzle<0, 1, 2>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 24 };
- static const uint32_t numComps{ 3 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 0 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_SINT>
+ : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+ FormatSwizzle<0, 1, 2>,
+ Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{24};
+ static const uint32_t numComps{3};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{0};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef Transpose8_8_8 TransposeT;
typedef Format3<8, 8, 8> FormatT;
//////////////////////////////////////////////////////////////////////////
/// FormatTraits<RAW> - Format traits specialization for RAW
//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<RAW> :
- ComponentTraits<SWR_TYPE_UINT, 8>,
- FormatSwizzle<0>,
- Defaults<0, 0, 0, 0x1>
-{
- static const uint32_t bpp{ 8 };
- static const uint32_t numComps{ 1 };
- static const bool hasAlpha{ false };
- static const uint32_t alphaComp{ 3 };
- static const bool isSRGB{ false };
- static const bool isBC{ false };
- static const bool isSubsampled{ false };
- static const uint32_t bcWidth{ 1 };
- static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<RAW>
+ : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+ static const uint32_t bpp{8};
+ static const uint32_t numComps{1};
+ static const bool hasAlpha{false};
+ static const uint32_t alphaComp{3};
+ static const bool isSRGB{false};
+ static const bool isBC{false};
+ static const bool isSubsampled{false};
+ static const uint32_t bcWidth{1};
+ static const uint32_t bcHeight{1};
typedef TransposeSingleComponent<8> TransposeT;
typedef Format1<8> FormatT;
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file formats.h
-*
-* @brief Definitions for SWR_FORMAT functions.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file formats.h
+ *
+ * @brief Definitions for SWR_FORMAT functions.
+ *
+ ******************************************************************************/
#pragma once
#include "utils.h"
template <uint32_t NumBits, bool Signed = false>
struct PackTraits
{
- static const uint32_t MyNumBits = NumBits;
- static simdscalar loadSOA(const uint8_t *pSrc) = delete;
- static void storeSOA(uint8_t *pDst, simdscalar const &src) = delete;
- static simdscalar unpack(simdscalar &in) = delete;
- static simdscalar pack(simdscalar &in) = delete;
+ static const uint32_t MyNumBits = NumBits;
+ static simdscalar loadSOA(const uint8_t* pSrc) = delete;
+ static void storeSOA(uint8_t* pDst, simdscalar const& src) = delete;
+ static simdscalar unpack(simdscalar& in) = delete;
+ static simdscalar pack(simdscalar& in) = delete;
#if ENABLE_AVX512_SIMD16
- static simd16scalar loadSOA_16(const uint8_t *pSrc) = delete;
- static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) = delete;
- static simd16scalar unpack(simd16scalar &in) = delete;
- static simd16scalar pack(simd16scalar &in) = delete;
+ static simd16scalar loadSOA_16(const uint8_t* pSrc) = delete;
+ static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src) = delete;
+ static simd16scalar unpack(simd16scalar& in) = delete;
+ static simd16scalar pack(simd16scalar& in) = delete;
#endif
};
{
static const uint32_t MyNumBits = 0;
- static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_setzero_ps(); }
- static void storeSOA(uint8_t *pDst, simdscalar const &src) { return; }
- static simdscalar unpack(simdscalar &in) { return _simd_setzero_ps(); }
- static simdscalar pack(simdscalar &in) { return _simd_setzero_ps(); }
+ static simdscalar loadSOA(const uint8_t* pSrc) { return _simd_setzero_ps(); }
+ static void storeSOA(uint8_t* pDst, simdscalar const& src) { return; }
+ static simdscalar unpack(simdscalar& in) { return _simd_setzero_ps(); }
+ static simdscalar pack(simdscalar& in) { return _simd_setzero_ps(); }
#if ENABLE_AVX512_SIMD16
- static simd16scalar loadSOA_16(const uint8_t *pSrc) { return _simd16_setzero_ps(); }
- static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { return; }
- static simd16scalar unpack(simd16scalar &in) { return _simd16_setzero_ps(); }
- static simd16scalar pack(simd16scalar &in) { return _simd16_setzero_ps(); }
+ static simd16scalar loadSOA_16(const uint8_t* pSrc) { return _simd16_setzero_ps(); }
+ static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src) { return; }
+ static simd16scalar unpack(simd16scalar& in) { return _simd16_setzero_ps(); }
+ static simd16scalar pack(simd16scalar& in) { return _simd16_setzero_ps(); }
#endif
};
{
static const uint32_t MyNumBits = 8;
- static simdscalar loadSOA(const uint8_t *pSrc)
+ static simdscalar loadSOA(const uint8_t* pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
+ __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
return _mm256_insertf128_ps(result, vLo, 0);
#else
#error Unsupported vector width
#endif
}
- static void storeSOA(uint8_t *pDst, simdscalar const &src)
+ static void storeSOA(uint8_t* pDst, simdscalar const& src)
{
// store simd bytes
#if KNOB_SIMD_WIDTH == 8
#endif
}
- static simdscalar unpack(simdscalar &in)
+ static simdscalar unpack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
#if KNOB_ARCH <= KNOB_ARCH_AVX
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+ __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
__m128i resLo = _mm_cvtepu8_epi32(src);
- __m128i resHi = _mm_shuffle_epi8(src,
- _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
+ __m128i resHi =
+ _mm_shuffle_epi8(src, _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
__m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
- return simdscalar{ _mm256_castsi256_ps(result) };
+ result = _mm256_insertf128_si256(result, resHi, 1);
+ return simdscalar{_mm256_castsi256_ps(result)};
#else
- return _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+ return _mm256_castsi256_ps(
+ _mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
#endif
#else
#error Unsupported vector width
#endif
}
- static simdscalar pack(simdscalar &in)
+ static simdscalar pack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
simdscalari src = _simd_castps_si(in);
- __m128i res16 = _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
+ __m128i res16 =
+ _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
__m128i res8 = _mm_packus_epi16(res16, _mm_undefined_si128());
return _mm256_castsi256_ps(_mm256_castsi128_si256(res8));
#else
}
#if ENABLE_AVX512_SIMD16
- static simd16scalar loadSOA_16(const uint8_t *pSrc)
+ static simd16scalar loadSOA_16(const uint8_t* pSrc)
{
- simd16scalar result = _simd16_setzero_ps();
- simdscalar resultlo = _simd_setzero_ps();
+ simd16scalar result = _simd16_setzero_ps();
+ simdscalar resultlo = _simd_setzero_ps();
- const __m128 src = _mm_load_ps(reinterpret_cast<const float *>(pSrc));
+ const __m128 src = _mm_load_ps(reinterpret_cast<const float*>(pSrc));
resultlo = _mm256_insertf128_ps(resultlo, src, 0);
- result = _simd16_insert_ps(result, resultlo, 0);
+ result = _simd16_insert_ps(result, resultlo, 0);
return result;
}
- static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+ static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
{
// store simd16 bytes
- _mm_store_ps(reinterpret_cast<float *>(pDst), _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
+ _mm_store_ps(reinterpret_cast<float*>(pDst),
+ _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
}
- static simd16scalar unpack(simd16scalar &in)
+ static simd16scalar unpack(simd16scalar& in)
{
- simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
+ simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
simd16scalari result = _simd16_cvtepu8_epi32(tmp);
return _simd16_castsi_ps(result);
}
- static simd16scalar pack(simd16scalar &in)
+ static simd16scalar pack(simd16scalar& in)
{
simd16scalari result = _simd16_setzero_si();
- simdscalari inlo = _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
- simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
+ simdscalari inlo =
+ _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
+ simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
- simdscalari permlo = _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
- simdscalari permhi = _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
+ simdscalari permlo =
+ _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
+ simdscalari permhi =
+ _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
- simdscalari pack = _simd_packus_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
+ simdscalari pack = _simd_packus_epi32(
+ permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
const simdscalari zero = _simd_setzero_si();
- permlo = _simd_permute2f128_si(pack, zero, 0x20); // (2, 0) // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
- permhi = _simd_permute2f128_si(pack, zero, 0x31); // (3, 1) // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
+ permlo = _simd_permute2f128_si(
+ pack,
+ zero,
+ 0x20); // (2, 0) // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
+ permhi = _simd_permute2f128_si(
+ pack,
+ zero,
+ 0x31); // (3, 1) // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
- pack = _simd_packus_epi16(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
+ pack = _simd_packus_epi16(permlo,
+ permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00
+ // 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
result = _simd16_insert_si(result, pack, 0);
{
static const uint32_t MyNumBits = 8;
- static simdscalar loadSOA(const uint8_t *pSrc)
+ static simdscalar loadSOA(const uint8_t* pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
+ __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
return _mm256_insertf128_ps(result, vLo, 0);
#else
#error Unsupported vector width
#endif
}
- static void storeSOA(uint8_t *pDst, simdscalar const &src)
+ static void storeSOA(uint8_t* pDst, simdscalar const& src)
{
// store simd bytes
#if KNOB_SIMD_WIDTH == 8
#endif
}
- static simdscalar unpack(simdscalar &in)
+ static simdscalar unpack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
#if KNOB_ARCH <= KNOB_ARCH_AVX
SWR_INVALID("I think this may be incorrect.");
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+ __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
__m128i resLo = _mm_cvtepi8_epi32(src);
- __m128i resHi = _mm_shuffle_epi8(src,
- _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
+ __m128i resHi =
+ _mm_shuffle_epi8(src, _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
__m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
+ result = _mm256_insertf128_si256(result, resHi, 1);
return _mm256_castsi256_ps(result);
#else
- return _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+ return _mm256_castsi256_ps(
+ _mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
#endif
#else
#error Unsupported vector width
#endif
}
- static simdscalar pack(simdscalar &in)
+ static simdscalar pack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
simdscalari src = _simd_castps_si(in);
- __m128i res16 = _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
+ __m128i res16 =
+ _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
__m128i res8 = _mm_packs_epi16(res16, _mm_undefined_si128());
return _mm256_castsi256_ps(_mm256_castsi128_si256(res8));
#else
}
#if ENABLE_AVX512_SIMD16
- static simd16scalar loadSOA_16(const uint8_t *pSrc)
+ static simd16scalar loadSOA_16(const uint8_t* pSrc)
{
- simd16scalar result = _simd16_setzero_ps();
- simdscalar resultlo = _simd_setzero_ps();
+ simd16scalar result = _simd16_setzero_ps();
+ simdscalar resultlo = _simd_setzero_ps();
- const __m128 src = _mm_load_ps(reinterpret_cast<const float *>(pSrc));
+ const __m128 src = _mm_load_ps(reinterpret_cast<const float*>(pSrc));
resultlo = _mm256_insertf128_ps(resultlo, src, 0);
- result = _simd16_insert_ps(result, resultlo, 0);
+ result = _simd16_insert_ps(result, resultlo, 0);
return result;
}
- static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+ static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
{
// store simd16 bytes
- _mm_store_ps(reinterpret_cast<float *>(pDst), _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
+ _mm_store_ps(reinterpret_cast<float*>(pDst),
+ _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
}
- static simd16scalar unpack(simd16scalar &in)
+ static simd16scalar unpack(simd16scalar& in)
{
- simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
+ simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
simd16scalari result = _simd16_cvtepu8_epi32(tmp);
return _simd16_castsi_ps(result);
}
- static simd16scalar pack(simd16scalar &in)
+ static simd16scalar pack(simd16scalar& in)
{
simd16scalari result = _simd16_setzero_si();
- simdscalari inlo = _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
- simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
+ simdscalari inlo =
+ _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
+ simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
- simdscalari permlo = _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
- simdscalari permhi = _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
+ simdscalari permlo =
+ _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
+ simdscalari permhi =
+ _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
- simdscalari pack = _simd_packs_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
+ simdscalari pack = _simd_packs_epi32(
+ permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
const simdscalari zero = _simd_setzero_si();
- permlo = _simd_permute2f128_si(pack, zero, 0x20); // (2, 0) // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
- permhi = _simd_permute2f128_si(pack, zero, 0x31); // (3, 1) // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
+ permlo = _simd_permute2f128_si(
+ pack,
+ zero,
+ 0x20); // (2, 0) // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
+ permhi = _simd_permute2f128_si(
+ pack,
+ zero,
+ 0x31); // (3, 1) // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
- pack = _simd_packs_epi16(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
+ pack =
+ _simd_packs_epi16(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00
+ // 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
result = _simd16_insert_si(result, pack, 0);
{
static const uint32_t MyNumBits = 16;
- static simdscalar loadSOA(const uint8_t *pSrc)
+ static simdscalar loadSOA(const uint8_t* pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_load_ps((const float*)pSrc);
+ __m128 vLo = _mm_load_ps((const float*)pSrc);
return _mm256_insertf128_ps(result, vLo, 0);
#else
#error Unsupported vector width
#endif
}
- static void storeSOA(uint8_t *pDst, simdscalar const &src)
+ static void storeSOA(uint8_t* pDst, simdscalar const& src)
{
#if KNOB_SIMD_WIDTH == 8
// store 16B (2B * 8)
#endif
}
- static simdscalar unpack(simdscalar &in)
+ static simdscalar unpack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
#if KNOB_ARCH <= KNOB_ARCH_AVX
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+ __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
__m128i resLo = _mm_cvtepu16_epi32(src);
- __m128i resHi = _mm_shuffle_epi8(src,
- _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
+ __m128i resHi =
+ _mm_shuffle_epi8(src, _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
__m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
+ result = _mm256_insertf128_si256(result, resHi, 1);
return _mm256_castsi256_ps(result);
#else
- return _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+ return _mm256_castsi256_ps(
+ _mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
#endif
#else
#error Unsupported vector width
#endif
}
- static simdscalar pack(simdscalar &in)
+ static simdscalar pack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
simdscalari src = _simd_castps_si(in);
- __m256i res = _mm256_castsi128_si256(_mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
+ __m256i res = _mm256_castsi128_si256(
+ _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
return _mm256_castsi256_ps(res);
#else
#error Unsupported vector width
}
#if ENABLE_AVX512_SIMD16
- static simd16scalar loadSOA_16(const uint8_t *pSrc)
+ static simd16scalar loadSOA_16(const uint8_t* pSrc)
{
simd16scalar result = _simd16_setzero_ps();
- simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float *>(pSrc));
+ simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float*>(pSrc));
result = _simd16_insert_ps(result, resultlo, 0);
return result;
}
- static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+ static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
{
- _simd_store_ps(reinterpret_cast<float *>(pDst), _simd16_extract_ps(src, 0));
+ _simd_store_ps(reinterpret_cast<float*>(pDst), _simd16_extract_ps(src, 0));
}
- static simd16scalar unpack(simd16scalar &in)
+ static simd16scalar unpack(simd16scalar& in)
{
simd16scalari result = _simd16_cvtepu16_epi32(_simd_castps_si(_simd16_extract_ps(in, 0)));
return _simd16_castsi_ps(result);
}
- static simd16scalar pack(simd16scalar &in)
+ static simd16scalar pack(simd16scalar& in)
{
const simd16scalari zero = _simd16_setzero_si();
- simd16scalari permlo = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
- simd16scalari permhi = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
+ simd16scalari permlo = _simd16_permute2f128_si(
+ _simd16_castps_si(in),
+ zero,
+ 0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
+ simd16scalari permhi = _simd16_permute2f128_si(
+ _simd16_castps_si(in),
+ zero,
+ 0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
- simd16scalari result = _simd16_packus_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (16b)
+ simd16scalari result = _simd16_packus_epi32(
+ permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00
+ // 00 00 00 00 00 00 00 00 00 (16b)
return _simd16_castsi_ps(result);
}
{
static const uint32_t MyNumBits = 16;
- static simdscalar loadSOA(const uint8_t *pSrc)
+ static simdscalar loadSOA(const uint8_t* pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
- __m128 vLo = _mm_load_ps((const float*)pSrc);
+ __m128 vLo = _mm_load_ps((const float*)pSrc);
return _mm256_insertf128_ps(result, vLo, 0);
#else
#error Unsupported vector width
#endif
}
- static void storeSOA(uint8_t *pDst, simdscalar const &src)
+ static void storeSOA(uint8_t* pDst, simdscalar const& src)
{
#if KNOB_SIMD_WIDTH == 8
// store 16B (2B * 8)
#endif
}
- static simdscalar unpack(simdscalar &in)
+ static simdscalar unpack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
#if KNOB_ARCH <= KNOB_ARCH_AVX
SWR_INVALID("I think this may be incorrect.");
- __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+ __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
__m128i resLo = _mm_cvtepi16_epi32(src);
- __m128i resHi = _mm_shuffle_epi8(src,
- _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
+ __m128i resHi =
+ _mm_shuffle_epi8(src, _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
__m256i result = _mm256_castsi128_si256(resLo);
- result = _mm256_insertf128_si256(result, resHi, 1);
+ result = _mm256_insertf128_si256(result, resHi, 1);
return _mm256_castsi256_ps(result);
#else
- return _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+ return _mm256_castsi256_ps(
+ _mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
#endif
#else
#error Unsupported vector width
#endif
}
- static simdscalar pack(simdscalar &in)
+ static simdscalar pack(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
simdscalari src = _simd_castps_si(in);
- __m256i res = _mm256_castsi128_si256(_mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
+ __m256i res = _mm256_castsi128_si256(
+ _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
return _mm256_castsi256_ps(res);
#else
#error Unsupported vector width
}
#if ENABLE_AVX512_SIMD16
- static simd16scalar loadSOA_16(const uint8_t *pSrc)
+ static simd16scalar loadSOA_16(const uint8_t* pSrc)
{
simd16scalar result = _simd16_setzero_ps();
- simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float *>(pSrc));
+ simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float*>(pSrc));
result = _simd16_insert_ps(result, resultlo, 0);
return result;
}
- static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+ static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
{
- _simd_store_ps(reinterpret_cast<float *>(pDst), _simd16_extract_ps(src, 0));
+ _simd_store_ps(reinterpret_cast<float*>(pDst), _simd16_extract_ps(src, 0));
}
- static simd16scalar unpack(simd16scalar &in)
+ static simd16scalar unpack(simd16scalar& in)
{
simd16scalari result = _simd16_cvtepu16_epi32(_simd_castps_si(_simd16_extract_ps(in, 0)));
return _simd16_castsi_ps(result);
}
- static simd16scalar pack(simd16scalar &in)
+ static simd16scalar pack(simd16scalar& in)
{
const simd16scalari zero = _simd16_setzero_si();
- simd16scalari permlo = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
- simd16scalari permhi = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
+ simd16scalari permlo = _simd16_permute2f128_si(
+ _simd16_castps_si(in),
+ zero,
+ 0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
+ simd16scalari permhi = _simd16_permute2f128_si(
+ _simd16_castps_si(in),
+ zero,
+ 0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
- simd16scalari result = _simd16_packs_epi32(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (16b)
+ simd16scalari result = _simd16_packs_epi32(
+ permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00
+ // 00 00 00 00 00 00 00 00 00 (16b)
return _simd16_castsi_ps(result);
}
{
static const uint32_t MyNumBits = 32;
- static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_load_ps((const float*)pSrc); }
- static void storeSOA(uint8_t *pDst, simdscalar const &src) { _simd_store_ps((float*)pDst, src); }
- static simdscalar unpack(simdscalar &in) { return in; }
- static simdscalar pack(simdscalar &in) { return in; }
-#if ENABLE_AVX512_SIMD16
-
- static simd16scalar loadSOA_16(const uint8_t *pSrc)
+ static simdscalar loadSOA(const uint8_t* pSrc) { return _simd_load_ps((const float*)pSrc); }
+ static void storeSOA(uint8_t* pDst, simdscalar const& src)
{
- return _simd16_load_ps(reinterpret_cast<const float *>(pSrc));
+ _simd_store_ps((float*)pDst, src);
}
+ static simdscalar unpack(simdscalar& in) { return in; }
+ static simdscalar pack(simdscalar& in) { return in; }
+#if ENABLE_AVX512_SIMD16
- static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+ static simd16scalar loadSOA_16(const uint8_t* pSrc)
{
- _simd16_store_ps(reinterpret_cast<float *>(pDst), src);
+ return _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
}
- static simd16scalar unpack(simd16scalar &in)
+ static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
{
- return in;
+ _simd16_store_ps(reinterpret_cast<float*>(pDst), src);
}
- static simd16scalar pack(simd16scalar &in)
- {
- return in;
- }
+ static simd16scalar unpack(simd16scalar& in) { return in; }
+
+ static simd16scalar pack(simd16scalar& in) { return in; }
#endif
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits.
//////////////////////////////////////////////////////////////////////////
-template<SWR_TYPE type, uint32_t NumBits>
+template <SWR_TYPE type, uint32_t NumBits>
struct TypeTraits : PackTraits<NumBits>
{
static const SWR_TYPE MyType = type;
- static float toFloat() { return 0.0; }
- static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 0.0; }
+ static float fromFloat()
+ {
+ SWR_NOT_IMPL;
+ return 0.0;
+ }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UINT8
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UINT, 8> : PackTraits<8>
+template <>
+struct TypeTraits<SWR_TYPE_UINT, 8> : PackTraits<8>
{
static const SWR_TYPE MyType = SWR_TYPE_UINT;
- static float toFloat() { return 0.0; }
- static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 0.0; }
+ static float fromFloat()
+ {
+ SWR_NOT_IMPL;
+ return 0.0;
+ }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UINT8
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SINT, 8> : PackTraits<8, true>
+template <>
+struct TypeTraits<SWR_TYPE_SINT, 8> : PackTraits<8, true>
{
static const SWR_TYPE MyType = SWR_TYPE_SINT;
- static float toFloat() { return 0.0; }
- static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 0.0; }
+ static float fromFloat()
+ {
+ SWR_NOT_IMPL;
+ return 0.0;
+ }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UINT16
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UINT, 16> : PackTraits<16>
+template <>
+struct TypeTraits<SWR_TYPE_UINT, 16> : PackTraits<16>
{
static const SWR_TYPE MyType = SWR_TYPE_UINT;
- static float toFloat() { return 0.0; }
- static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 0.0; }
+ static float fromFloat()
+ {
+ SWR_NOT_IMPL;
+ return 0.0;
+ }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for SINT16
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SINT, 16> : PackTraits<16, true>
+template <>
+struct TypeTraits<SWR_TYPE_SINT, 16> : PackTraits<16, true>
{
static const SWR_TYPE MyType = SWR_TYPE_SINT;
- static float toFloat() { return 0.0; }
- static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 0.0; }
+ static float fromFloat()
+ {
+ SWR_NOT_IMPL;
+ return 0.0;
+ }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UINT32
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UINT, 32> : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_UINT, 32> : PackTraits<32>
{
static const SWR_TYPE MyType = SWR_TYPE_UINT;
- static float toFloat() { return 0.0; }
- static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 0.0; }
+ static float fromFloat()
+ {
+ SWR_NOT_IMPL;
+ return 0.0;
+ }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UINT32
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SINT, 32> : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_SINT, 32> : PackTraits<32>
{
static const SWR_TYPE MyType = SWR_TYPE_SINT;
- static float toFloat() { return 0.0; }
- static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 0.0; }
+ static float fromFloat()
+ {
+ SWR_NOT_IMPL;
+ return 0.0;
+ }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UNORM5
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 5> : PackTraits<5>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 5> : PackTraits<5>
{
static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 31.0f; }
- static float fromFloat() { return 31.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f / 31.0f; }
+ static float fromFloat() { return 31.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UNORM6
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 6> : PackTraits<6>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 6> : PackTraits<6>
{
static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 63.0f; }
- static float fromFloat() { return 63.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f / 63.0f; }
+ static float fromFloat() { return 63.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UNORM8
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 8> : PackTraits<8>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 8> : PackTraits<8>
{
static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 255.0f; }
- static float fromFloat() { return 255.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f / 255.0f; }
+ static float fromFloat() { return 255.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UNORM8
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SNORM, 8> : PackTraits<8, true>
+template <>
+struct TypeTraits<SWR_TYPE_SNORM, 8> : PackTraits<8, true>
{
static const SWR_TYPE MyType = SWR_TYPE_SNORM;
- static float toFloat() { return 1.0f / 127.0f; }
- static float fromFloat() { return 127.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f / 127.0f; }
+ static float fromFloat() { return 127.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UNORM16
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 16> : PackTraits<16>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 16> : PackTraits<16>
{
static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 65535.0f; }
- static float fromFloat() { return 65535.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f / 65535.0f; }
+ static float fromFloat() { return 65535.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for SNORM16
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SNORM, 16> : PackTraits<16, true>
+template <>
+struct TypeTraits<SWR_TYPE_SNORM, 16> : PackTraits<16, true>
{
static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 32767.0f; }
- static float fromFloat() { return 32767.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f / 32767.0f; }
+ static float fromFloat() { return 32767.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for UNORM24
//////////////////////////////////////////////////////////////////////////
-template<>
-struct TypeTraits < SWR_TYPE_UNORM, 24 > : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 24> : PackTraits<32>
{
static const SWR_TYPE MyType = SWR_TYPE_UNORM;
- static float toFloat() { return 1.0f / 16777215.0f; }
- static float fromFloat() { return 16777215.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f / 16777215.0f; }
+ static float fromFloat() { return 16777215.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
};
//////////////////////////////////////////////////////////////////////////
#include "math.h"
-template< unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden >
-inline static __m128 fastpow(__m128 arg) {
+template <unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden>
+inline static __m128 fastpow(__m128 arg)
+{
__m128 ret = arg;
- static const __m128 factor = _mm_set1_ps(exp2(127.0f * expden / expnum - 127.0f)
- * powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum));
+ static const __m128 factor =
+ _mm_set1_ps(exp2(127.0f * expden / expnum - 127.0f) *
+ powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum));
// Apply a constant pre-correction factor.
ret = _mm_mul_ps(ret, factor);
// Reinterpret arg as integer to obtain logarithm.
- //asm("cvtdq2ps %1, %0" : "=x" (ret) : "x" (ret));
+ // asm("cvtdq2ps %1, %0" : "=x" (ret) : "x" (ret));
ret = _mm_cvtepi32_ps(_mm_castps_si128(ret));
// Multiply logarithm by power.
ret = _mm_mul_ps(ret, _mm_set1_ps(1.0f * expnum / expden));
// Convert back to "integer" to exponentiate.
- //asm("cvtps2dq %1, %0" : "=x" (ret) : "x" (ret));
+ // asm("cvtps2dq %1, %0" : "=x" (ret) : "x" (ret));
ret = _mm_castsi128_ps(_mm_cvtps_epi32(ret));
return ret;
}
-inline static __m128 pow512_4(__m128 arg) {
+inline static __m128 pow512_4(__m128 arg)
+{
// 5/12 is too small, so compute the 4th root of 20/12 instead.
// 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow.
// weighting coefficient: a^-1/2 = 2 a; a = 2^-2/3
- __m128 xf = fastpow< 2, 3, int(0.629960524947437 * 1e9), int(1e9) >(arg);
+ __m128 xf = fastpow<2, 3, int(0.629960524947437 * 1e9), int(1e9)>(arg);
__m128 xover = _mm_mul_ps(arg, xf);
- __m128 xfm1 = _mm_rsqrt_ps(xf);
- __m128 x2 = _mm_mul_ps(arg, arg);
+ __m128 xfm1 = _mm_rsqrt_ps(xf);
+ __m128 x2 = _mm_mul_ps(arg, arg);
__m128 xunder = _mm_mul_ps(x2, xfm1);
// sqrt2 * over + 2 * sqrt2 * under
__m128 xavg = _mm_mul_ps(_mm_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
- _mm_add_ps(xover, xunder));
+ _mm_add_ps(xover, xunder));
xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
inline static __m128 powf_wrapper(__m128 Base, float Exp)
{
- float *f = (float *)(&Base);
+ float* f = (float*)(&Base);
- return _mm_set_ps(powf(f[3], Exp),
- powf(f[2], Exp),
- powf(f[1], Exp),
- powf(f[0], Exp));
+ return _mm_set_ps(powf(f[3], Exp), powf(f[2], Exp), powf(f[1], Exp), powf(f[0], Exp));
}
static inline __m128 ConvertFloatToSRGB2(__m128& Src)
{
- // create a mask with 0xFFFFFFFF in the DWORDs where the source is <= the minimal SRGB float value
+ // create a mask with 0xFFFFFFFF in the DWORDs where the source is <= the minimal SRGB float
+ // value
__m128i CmpToSRGBThresholdMask = TO_M128i(_mm_cmpnlt_ps(_mm_set1_ps(0.0031308f), Src));
// squeeze the mask down to 16 bits (4 bits per DWORD)
#else
__m128 f = powf_wrapper(fSrc_0RGB, 1.0f / 2.4f);
#endif
- f = _mm_mul_ps(f, _mm_set1_ps(1.055f));
+ f = _mm_mul_ps(f, _mm_set1_ps(1.055f));
Result = _mm_sub_ps(f, _mm_set1_ps(0.055f));
}
else
f = _mm_sub_ps(f, _mm_set1_ps(0.055f));
// Clear the alpha (is garbage after the sub)
- __m128i i = _mm_and_si128(TO_M128i(f), _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
+ __m128i i = _mm_and_si128(TO_M128i(f),
+ _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
__m128i LessThanPart = _mm_and_si128(CmpToSRGBThresholdMask, TO_M128i(Src_0RGB_mul_denorm));
__m128i GreaterEqualPart = _mm_andnot_si128(CmpToSRGBThresholdMask, i);
- __m128i CombinedParts = _mm_or_si128(LessThanPart, GreaterEqualPart);
+ __m128i CombinedParts = _mm_or_si128(LessThanPart, GreaterEqualPart);
Result = TO_M128(CombinedParts);
}
}
#if ENABLE_AVX512_SIMD16
-template< unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden >
-inline static simd16scalar SIMDCALL fastpow(simd16scalar const &value)
+template <unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden>
+inline static simd16scalar SIMDCALL fastpow(simd16scalar const& value)
{
- static const float factor1 = exp2(127.0f * expden / expnum - 127.0f)
- * powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum);
+ static const float factor1 = exp2(127.0f * expden / expnum - 127.0f) *
+ powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum);
// Apply a constant pre-correction factor.
simd16scalar result = _simd16_mul_ps(value, _simd16_set1_ps(factor1));
// Reinterpret arg as integer to obtain logarithm.
- //asm("cvtdq2ps %1, %0" : "=x" (result) : "x" (result));
+ // asm("cvtdq2ps %1, %0" : "=x" (result) : "x" (result));
result = _simd16_cvtepi32_ps(_simd16_castps_si(result));
// Multiply logarithm by power.
result = _simd16_mul_ps(result, _simd16_set1_ps(1.0f * expnum / expden));
// Convert back to "integer" to exponentiate.
- //asm("cvtps2dq %1, %0" : "=x" (result) : "x" (result));
+ // asm("cvtps2dq %1, %0" : "=x" (result) : "x" (result));
result = _simd16_castsi_ps(_simd16_cvtps_epi32(result));
return result;
}
-inline static simd16scalar SIMDCALL pow512_4(simd16scalar const &arg)
+inline static simd16scalar SIMDCALL pow512_4(simd16scalar const& arg)
{
// 5/12 is too small, so compute the 4th root of 20/12 instead.
// 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow.
// weighting coefficient: a^-1/2 = 2 a; a = 2^-2/3
- simd16scalar xf = fastpow< 2, 3, int(0.629960524947437 * 1e9), int(1e9) >(arg);
+ simd16scalar xf = fastpow<2, 3, int(0.629960524947437 * 1e9), int(1e9)>(arg);
simd16scalar xover = _simd16_mul_ps(arg, xf);
- simd16scalar xfm1 = _simd16_rsqrt_ps(xf);
- simd16scalar x2 = _simd16_mul_ps(arg, arg);
+ simd16scalar xfm1 = _simd16_rsqrt_ps(xf);
+ simd16scalar x2 = _simd16_mul_ps(arg, arg);
simd16scalar xunder = _simd16_mul_ps(x2, xfm1);
// sqrt2 * over + 2 * sqrt2 * under
- simd16scalar xavg = _simd16_mul_ps(_simd16_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f), _simd16_add_ps(xover, xunder));
+ simd16scalar xavg =
+ _simd16_mul_ps(_simd16_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
+ _simd16_add_ps(xover, xunder));
xavg = _simd16_mul_ps(xavg, _simd16_rsqrt_ps(xavg));
xavg = _simd16_mul_ps(xavg, _simd16_rsqrt_ps(xavg));
return xavg;
}
-inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar &base, float exp)
+inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar& base, float exp)
{
- const float *f = reinterpret_cast<const float *>(&base);
-
- return _simd16_set_ps(
- powf(f[15], exp),
- powf(f[14], exp),
- powf(f[13], exp),
- powf(f[12], exp),
- powf(f[11], exp),
- powf(f[10], exp),
- powf(f[ 9], exp),
- powf(f[ 8], exp),
- powf(f[ 7], exp),
- powf(f[ 6], exp),
- powf(f[ 5], exp),
- powf(f[ 4], exp),
- powf(f[ 3], exp),
- powf(f[ 2], exp),
- powf(f[ 1], exp),
- powf(f[ 0], exp)
- );
+ const float* f = reinterpret_cast<const float*>(&base);
+
+ return _simd16_set_ps(powf(f[15], exp),
+ powf(f[14], exp),
+ powf(f[13], exp),
+ powf(f[12], exp),
+ powf(f[11], exp),
+ powf(f[10], exp),
+ powf(f[9], exp),
+ powf(f[8], exp),
+ powf(f[7], exp),
+ powf(f[6], exp),
+ powf(f[5], exp),
+ powf(f[4], exp),
+ powf(f[3], exp),
+ powf(f[2], exp),
+ powf(f[1], exp),
+ powf(f[0], exp));
}
// float to SRGB conversion formula
// else
// value = 1.055f * pow(value, 1.0f / 2.4f) - 0.055f;
//
-static inline simd16scalar ConvertFloatToSRGB2(const simd16scalar &value)
+static inline simd16scalar ConvertFloatToSRGB2(const simd16scalar& value)
{
// create a mask where the source is < the minimal SRGB float value
const simd16mask mask = _simd16_cmplt_ps_mask(value, _simd16_set1_ps(0.0031308f));
// only native AVX512 can directly use the computed mask for the blend operation
result = _mm512_mask_blend_ps(mask, result2, result);
#else
- result = _simd16_blendv_ps(result2, result, _simd16_cmplt_ps(value, _simd16_set1_ps(0.0031308f)));
+ result = _simd16_blendv_ps(
+ result2, result, _simd16_cmplt_ps(value, _simd16_set1_ps(0.0031308f)));
#endif
}
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for FLOAT16
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
+template <>
+struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
{
static const SWR_TYPE MyType = SWR_TYPE_FLOAT;
- static float toFloat() { return 1.0f; }
- static float fromFloat() { return 1.0f; }
- static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+ static float toFloat() { return 1.0f; }
+ static float fromFloat() { return 1.0f; }
+ static simdscalar convertSrgb(simdscalar& in)
+ {
+ SWR_NOT_IMPL;
+ return _simd_setzero_ps();
+ }
- static simdscalar pack(const simdscalar &in)
+ static simdscalar pack(const simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
#if (KNOB_ARCH == KNOB_ARCH_AVX)
// input is 8 packed float32, output is 8 packed float16
simdscalari src = _simd_castps_si(in);
- static const uint32_t FLOAT_EXP_BITS = 8;
+ static const uint32_t FLOAT_EXP_BITS = 8;
static const uint32_t FLOAT_MANTISSA_BITS = 23;
static const uint32_t FLOAT_MANTISSA_MASK = (1U << FLOAT_MANTISSA_BITS) - 1;
static const uint32_t FLOAT_EXP_MASK = ((1U << FLOAT_EXP_BITS) - 1) << FLOAT_MANTISSA_BITS;
- static const uint32_t HALF_EXP_BITS = 5;
+ static const uint32_t HALF_EXP_BITS = 5;
static const uint32_t HALF_MANTISSA_BITS = 10;
static const uint32_t HALF_EXP_MASK = ((1U << HALF_EXP_BITS) - 1) << HALF_MANTISSA_BITS;
// minimum exponent required, exponents below this are flushed to 0.
- static const int32_t HALF_EXP_MIN = -14;
+ static const int32_t HALF_EXP_MIN = -14;
static const int32_t FLOAT_EXP_BIAS = 127;
- static const int32_t FLOAT_EXP_MIN = HALF_EXP_MIN + FLOAT_EXP_BIAS;
- static const int32_t FLOAT_EXP_MIN_FTZ = FLOAT_EXP_MIN - (HALF_MANTISSA_BITS + 1); // +1 for the lack of implicit significand
+ static const int32_t FLOAT_EXP_MIN = HALF_EXP_MIN + FLOAT_EXP_BIAS;
+ static const int32_t FLOAT_EXP_MIN_FTZ =
+ FLOAT_EXP_MIN - (HALF_MANTISSA_BITS + 1); // +1 for the lack of implicit significand
// maximum exponent required, exponents above this are set to infinity
- static const int32_t HALF_EXP_MAX = 15;
+ static const int32_t HALF_EXP_MAX = 15;
static const int32_t FLOAT_EXP_MAX = HALF_EXP_MAX + FLOAT_EXP_BIAS;
- const simdscalari vSignMask = _simd_set1_epi32(0x80000000);
- const simdscalari vExpMask = _simd_set1_epi32(FLOAT_EXP_MASK);
- const simdscalari vManMask = _simd_set1_epi32(FLOAT_MANTISSA_MASK);
- const simdscalari vExpMin = _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN << FLOAT_MANTISSA_BITS));
- const simdscalari vExpMinFtz = _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN_FTZ << FLOAT_MANTISSA_BITS));
- const simdscalari vExpMax = _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MAX << FLOAT_MANTISSA_BITS));
+ const simdscalari vSignMask = _simd_set1_epi32(0x80000000);
+ const simdscalari vExpMask = _simd_set1_epi32(FLOAT_EXP_MASK);
+ const simdscalari vManMask = _simd_set1_epi32(FLOAT_MANTISSA_MASK);
+ const simdscalari vExpMin =
+ _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN << FLOAT_MANTISSA_BITS));
+ const simdscalari vExpMinFtz =
+ _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN_FTZ << FLOAT_MANTISSA_BITS));
+ const simdscalari vExpMax =
+ _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MAX << FLOAT_MANTISSA_BITS));
- simdscalari vSign = _simd_and_si(src, vSignMask);
- simdscalari vExp = _simd_and_si(src, vExpMask);
- simdscalari vMan = _simd_and_si(src, vManMask);
+ simdscalari vSign = _simd_and_si(src, vSignMask);
+ simdscalari vExp = _simd_and_si(src, vExpMask);
+ simdscalari vMan = _simd_and_si(src, vManMask);
simdscalari vFTZMask = _simd_cmplt_epi32(vExp, vExpMinFtz);
simdscalari vDenormMask = _simd_andnot_si(vFTZMask, _simd_cmplt_epi32(vExp, vExpMin));
simdscalari vInfMask = _simd_cmpeq_epi32(vExpMask, vExp);
simdscalari vClampMask = _simd_andnot_si(vInfMask, _simd_cmplt_epi32(vExpMax, vExp));
- simdscalari vHalfExp = _simd_add_epi32(_simd_sub_epi32(vExp, vExpMin), _simd_set1_epi32(1U << FLOAT_MANTISSA_BITS));
+ simdscalari vHalfExp = _simd_add_epi32(_simd_sub_epi32(vExp, vExpMin),
+ _simd_set1_epi32(1U << FLOAT_MANTISSA_BITS));
// pack output 16-bits into the lower 16-bits of each 32-bit channel
- simdscalari vDst = _simd_and_si(_simd_srli_epi32(vHalfExp, 13), _simd_set1_epi32(HALF_EXP_MASK));
- vDst = _simd_or_si(vDst, _simd_srli_epi32(vMan, FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
+ simdscalari vDst =
+ _simd_and_si(_simd_srli_epi32(vHalfExp, 13), _simd_set1_epi32(HALF_EXP_MASK));
+ vDst = _simd_or_si(vDst, _simd_srli_epi32(vMan, FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
// Flush To Zero
- vDst = _simd_andnot_si(vFTZMask, vDst);
+ vDst = _simd_andnot_si(vFTZMask, vDst);
// Apply Infinites / NaN
- vDst = _simd_or_si(vDst, _simd_and_si(vInfMask, _simd_set1_epi32(HALF_EXP_MASK)));
+ vDst = _simd_or_si(vDst, _simd_and_si(vInfMask, _simd_set1_epi32(HALF_EXP_MASK)));
// Apply clamps
vDst = _simd_andnot_si(vClampMask, vDst);
- vDst = _simd_or_si(vDst,
- _simd_and_si(vClampMask, _simd_set1_epi32(0x7BFF)));
+ vDst = _simd_or_si(vDst, _simd_and_si(vClampMask, _simd_set1_epi32(0x7BFF)));
// Compute Denormals (subnormals)
if (!_mm256_testz_si256(vDenormMask, vDenormMask))
{
- uint32_t *pDenormMask = (uint32_t*)&vDenormMask;
- uint32_t *pExp = (uint32_t*)&vExp;
- uint32_t *pMan = (uint32_t*)&vMan;
- uint32_t *pDst = (uint32_t*)&vDst;
+ uint32_t* pDenormMask = (uint32_t*)&vDenormMask;
+ uint32_t* pExp = (uint32_t*)&vExp;
+ uint32_t* pMan = (uint32_t*)&vMan;
+ uint32_t* pDst = (uint32_t*)&vDst;
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
{
if (pDenormMask[i])
{
// Need to compute subnormal value
uint32_t exponent = pExp[i] >> FLOAT_MANTISSA_BITS;
- uint32_t mantissa = pMan[i] |
- (1U << FLOAT_MANTISSA_BITS); // Denorms include no "implicit" 1s. Make it explicit
+ uint32_t mantissa =
+ pMan[i] | (1U << FLOAT_MANTISSA_BITS); // Denorms include no "implicit" 1s.
+ // Make it explicit
- pDst[i] = mantissa >> ((FLOAT_EXP_MIN - exponent) + (FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
+ pDst[i] = mantissa >> ((FLOAT_EXP_MIN - exponent) +
+ (FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
}
}
}
vDst = _simd_or_si(vDst, _simd_srli_epi32(vSign, 16));
// Pack to lower 128-bits
- vDst = _mm256_castsi128_si256(_mm_packus_epi32(_mm256_castsi256_si128(vDst), _mm256_extractf128_si256(vDst, 1)));
+ vDst = _mm256_castsi128_si256(
+ _mm_packus_epi32(_mm256_castsi256_si128(vDst), _mm256_extractf128_si256(vDst, 1)));
#if 0
#if !defined(NDEBUG)
#endif
}
- static simdscalar unpack(const simdscalar &in)
+ static simdscalar unpack(const simdscalar& in)
{
// input is 8 packed float16, output is 8 packed float32
SWR_NOT_IMPL; // @todo
}
#if ENABLE_AVX512_SIMD16
- static simd16scalar pack(const simd16scalar &in)
+ static simd16scalar pack(const simd16scalar& in)
{
- simd16scalari result = _simd16_setzero_si();
- simdscalari resultlo = _simd_setzero_si();
+ simd16scalari result = _simd16_setzero_si();
+ simdscalari resultlo = _simd_setzero_si();
#if (KNOB_ARCH == KNOB_ARCH_AVX)
simdscalar simdlo = pack(_simd16_extract_ps(in, 0));
return _simd16_castsi_ps(result);
}
- static simd16scalar unpack(const simd16scalar &in)
+ static simd16scalar unpack(const simd16scalar& in)
{
// input is 16 packed float16, output is 16 packed float32
SWR_NOT_IMPL; // @todo
//////////////////////////////////////////////////////////////////////////
/// TypeTraits - Format type traits specialization for FLOAT32
//////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_FLOAT, 32> : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_FLOAT, 32> : PackTraits<32>
{
- static const SWR_TYPE MyType = SWR_TYPE_FLOAT;
- static float toFloat() { return 1.0f; }
- static float fromFloat() { return 1.0f; }
- static inline simdscalar convertSrgb(simdscalar &in)
+ static const SWR_TYPE MyType = SWR_TYPE_FLOAT;
+ static float toFloat() { return 1.0f; }
+ static float fromFloat() { return 1.0f; }
+ static inline simdscalar convertSrgb(simdscalar& in)
{
#if KNOB_SIMD_WIDTH == 8
__m128 srcLo = _mm256_extractf128_ps(in, 0);
}
#if ENABLE_AVX512_SIMD16
- static inline simd16scalar convertSrgb(simd16scalar &in)
- {
- return ConvertFloatToSRGB2(in);
- }
+ static inline simd16scalar convertSrgb(simd16scalar& in) { return ConvertFloatToSRGB2(in); }
#endif
};
//////////////////////////////////////////////////////////////////////////
/// Format1 - Bitfield for single component formats.
//////////////////////////////////////////////////////////////////////////
-template<uint32_t x>
+template <uint32_t x>
union Format1
{
typedef typename FormatIntType<x>::TYPE TYPE;
{
TYPE g : x;
};
- struct
+ struct
{
TYPE b : x;
};
- struct
+ struct
{
TYPE a : x;
};
//////////////////////////////////////////////////////////////////////////
/// Format2 - Bitfield for 2 component formats.
//////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y>
+template <uint32_t x, uint32_t y>
union Format2
{
typedef typename FormatIntType<x + y>::TYPE TYPE;
//////////////////////////////////////////////////////////////////////////
/// Format3 - Bitfield for 3 component formats.
//////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y, uint32_t z>
+template <uint32_t x, uint32_t y, uint32_t z>
union Format3
{
typedef typename FormatIntType<x + y + z>::TYPE TYPE;
TYPE g : y;
TYPE b : z;
};
- TYPE a; ///@note This is here to provide full template needed in Formats.
+ TYPE a; ///@note This is here to provide full template needed in Formats.
};
//////////////////////////////////////////////////////////////////////////
/// Format4 - Bitfield for 4 component formats.
//////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y, uint32_t z, uint32_t w>
+template <uint32_t x, uint32_t y, uint32_t z, uint32_t w>
struct Format4
{
typedef typename FormatIntType<x + y + z + w>::TYPE TYPE;
//////////////////////////////////////////////////////////////////////////
/// ComponentTraits - Default components
//////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y, uint32_t z, uint32_t w>
+template <uint32_t x, uint32_t y, uint32_t z, uint32_t w>
struct Defaults
{
INLINE static uint32_t GetDefault(uint32_t comp)
{
- static const uint32_t defaults[4]{ x, y, z, w };
+ static const uint32_t defaults[4]{x, y, z, w};
return defaults[comp];
}
};
//////////////////////////////////////////////////////////////////////////
/// ComponentTraits - Component type traits.
//////////////////////////////////////////////////////////////////////////
-template<SWR_TYPE X, uint32_t NumBitsX, SWR_TYPE Y = SWR_TYPE_UNKNOWN, uint32_t NumBitsY = 0, SWR_TYPE Z = SWR_TYPE_UNKNOWN, uint32_t NumBitsZ = 0, SWR_TYPE W = SWR_TYPE_UNKNOWN, uint32_t NumBitsW = 0>
+template <SWR_TYPE X,
+ uint32_t NumBitsX,
+ SWR_TYPE Y = SWR_TYPE_UNKNOWN,
+ uint32_t NumBitsY = 0,
+ SWR_TYPE Z = SWR_TYPE_UNKNOWN,
+ uint32_t NumBitsZ = 0,
+ SWR_TYPE W = SWR_TYPE_UNKNOWN,
+ uint32_t NumBitsW = 0>
struct ComponentTraits
{
INLINE static SWR_TYPE GetType(uint32_t comp)
{
- static const SWR_TYPE CompType[4]{ X, Y, Z, W };
+ static const SWR_TYPE CompType[4]{X, Y, Z, W};
return CompType[comp];
}
INLINE static constexpr uint32_t GetConstBPC(uint32_t comp)
{
- return (comp == 3) ? NumBitsW :
- ((comp == 2) ? NumBitsZ :
- ((comp == 1) ? NumBitsY : NumBitsX) );
+ return (comp == 3) ? NumBitsW
+ : ((comp == 2) ? NumBitsZ : ((comp == 1) ? NumBitsY : NumBitsX));
}
INLINE static uint32_t GetBPC(uint32_t comp)
{
- static const uint32_t MyBpc[4]{ NumBitsX, NumBitsY, NumBitsZ, NumBitsW };
+ static const uint32_t MyBpc[4]{NumBitsX, NumBitsY, NumBitsZ, NumBitsW};
return MyBpc[comp];
}
}
SWR_INVALID("Invalid component: %d", comp);
return TypeTraits<X, NumBitsX>::toFloat();
-
}
INLINE static float fromFloat(uint32_t comp)
return TypeTraits<X, NumBitsX>::loadSOA(pSrc);
}
- INLINE static void storeSOA(uint32_t comp, uint8_t *pDst, simdscalar const &src)
+ INLINE static void storeSOA(uint32_t comp, uint8_t* pDst, simdscalar const& src)
{
switch (comp)
{
SWR_INVALID("Invalid component: %d", comp);
}
- INLINE static simdscalar unpack(uint32_t comp, simdscalar &in)
+ INLINE static simdscalar unpack(uint32_t comp, simdscalar& in)
{
simdscalar out;
switch (comp)
{
case 0:
- out = TypeTraits<X, NumBitsX>::unpack(in); break;
+ out = TypeTraits<X, NumBitsX>::unpack(in);
+ break;
case 1:
- out = TypeTraits<Y, NumBitsY>::unpack(in); break;
+ out = TypeTraits<Y, NumBitsY>::unpack(in);
+ break;
case 2:
- out = TypeTraits<Z, NumBitsZ>::unpack(in); break;
+ out = TypeTraits<Z, NumBitsZ>::unpack(in);
+ break;
case 3:
- out = TypeTraits<W, NumBitsW>::unpack(in); break;
+ out = TypeTraits<W, NumBitsW>::unpack(in);
+ break;
default:
SWR_INVALID("Invalid component: %d", comp);
out = in;
return out;
}
- INLINE static simdscalar pack(uint32_t comp, simdscalar &in)
+ INLINE static simdscalar pack(uint32_t comp, simdscalar& in)
{
simdscalar out;
switch (comp)
{
case 0:
- out = TypeTraits<X, NumBitsX>::pack(in); break;
+ out = TypeTraits<X, NumBitsX>::pack(in);
+ break;
case 1:
- out = TypeTraits<Y, NumBitsY>::pack(in); break;
+ out = TypeTraits<Y, NumBitsY>::pack(in);
+ break;
case 2:
- out = TypeTraits<Z, NumBitsZ>::pack(in); break;
+ out = TypeTraits<Z, NumBitsZ>::pack(in);
+ break;
case 3:
- out = TypeTraits<W, NumBitsW>::pack(in); break;
+ out = TypeTraits<W, NumBitsW>::pack(in);
+ break;
default:
SWR_INVALID("Invalid component: %d", comp);
out = in;
return out;
}
- INLINE static simdscalar convertSrgb(uint32_t comp, simdscalar &in)
+ INLINE static simdscalar convertSrgb(uint32_t comp, simdscalar& in)
{
switch (comp)
{
return TypeTraits<X, NumBitsX>::loadSOA_16(pSrc);
}
- INLINE static void SIMDCALL storeSOA(uint32_t comp, uint8_t *pDst, simd16scalar const &src)
+ INLINE static void SIMDCALL storeSOA(uint32_t comp, uint8_t* pDst, simd16scalar const& src)
{
switch (comp)
{
TypeTraits<X, NumBitsX>::storeSOA(pDst, src);
}
- INLINE static simd16scalar unpack(uint32_t comp, simd16scalar &in)
+ INLINE static simd16scalar unpack(uint32_t comp, simd16scalar& in)
{
switch (comp)
{
return TypeTraits<X, NumBitsX>::unpack(in);
}
- INLINE static simd16scalar pack(uint32_t comp, simd16scalar &in)
+ INLINE static simd16scalar pack(uint32_t comp, simd16scalar& in)
{
switch (comp)
{
return TypeTraits<X, NumBitsX>::pack(in);
}
- INLINE static simd16scalar convertSrgb(uint32_t comp, simd16scalar &in)
+ INLINE static simd16scalar convertSrgb(uint32_t comp, simd16scalar& in)
{
switch (comp)
{
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file utils.h
-*
-* @brief Utilities used by SWR core related to pixel formats.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file utils.h
+ *
+ * @brief Utilities used by SWR core related to pixel formats.
+ *
+ ******************************************************************************/
#pragma once
#include "core/utils.h"
#include "common/simdintrin.h"
INLINE
-void vTranspose(simd4scalar &row0, simd4scalar &row1, simd4scalar &row2, simd4scalar &row3)
+void vTranspose(simd4scalar& row0, simd4scalar& row1, simd4scalar& row2, simd4scalar& row3)
{
simd4scalari row0i = SIMD128::castps_si(row0);
simd4scalari row1i = SIMD128::castps_si(row1);
simd4scalari row3i = SIMD128::castps_si(row3);
simd4scalari vTemp = row2i;
- row2i = SIMD128::unpacklo_epi32(row2i, row3i);
- vTemp = SIMD128::unpackhi_epi32(vTemp, row3i);
+ row2i = SIMD128::unpacklo_epi32(row2i, row3i);
+ vTemp = SIMD128::unpackhi_epi32(vTemp, row3i);
row3i = row0i;
row0i = SIMD128::unpacklo_epi32(row0i, row1i);
}
INLINE
-void vTranspose(simd4scalari &row0, simd4scalari &row1, simd4scalari &row2, simd4scalari &row3)
+void vTranspose(simd4scalari& row0, simd4scalari& row1, simd4scalari& row2, simd4scalari& row3)
{
simd4scalari vTemp = row2;
- row2 = SIMD128::unpacklo_epi32(row2, row3);
- vTemp = SIMD128::unpackhi_epi32(vTemp, row3);
+ row2 = SIMD128::unpacklo_epi32(row2, row3);
+ vTemp = SIMD128::unpackhi_epi32(vTemp, row3);
row3 = row0;
row0 = SIMD128::unpacklo_epi32(row0, row1);
#if KNOB_SIMD_WIDTH == 8
INLINE
-void vTranspose3x8(simd4scalar (&vDst)[8], const simdscalar &vSrc0, const simdscalar &vSrc1, const simdscalar &vSrc2)
+void vTranspose3x8(simd4scalar (&vDst)[8],
+ const simdscalar& vSrc0,
+ const simdscalar& vSrc1,
+ const simdscalar& vSrc2)
{
- simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2); //x0z0x1z1 x4z4x5z5
- simdscalar r1rx = _simd_unpacklo_ps(vSrc1, _simd_setzero_ps()); //y0w0y1w1 y4w4y5w5
- simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx); //x0y0z0w0 x4y4z4w4
- simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx); //x1y1z1w1 x5y5z5w5
+ simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2); // x0z0x1z1 x4z4x5z5
+ simdscalar r1rx = _simd_unpacklo_ps(vSrc1, _simd_setzero_ps()); // y0w0y1w1 y4w4y5w5
+ simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx); // x0y0z0w0 x4y4z4w4
+ simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx); // x1y1z1w1 x5y5z5w5
- r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2); //x2z2x3z3 x6z6x7z7
- r1rx = _simd_unpackhi_ps(vSrc1, _simd_setzero_ps()); //y2w2y3w3 y6w6yw77
- simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx); //x2y2z2w2 x6y6z6w6
- simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx); //x3y3z3w3 x7y7z7w7
+ r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2); // x2z2x3z3 x6z6x7z7
+ r1rx = _simd_unpackhi_ps(vSrc1, _simd_setzero_ps()); // y2w2y3w3 y6w6yw77
+ simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx); // x2y2z2w2 x6y6z6w6
+ simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx); // x3y3z3w3 x7y7z7w7
vDst[0] = _simd_extractf128_ps(r02r1xlolo, 0);
vDst[1] = _simd_extractf128_ps(r02r1xlohi, 0);
}
INLINE
-void vTranspose4x8(simd4scalar (&vDst)[8], const simdscalar &vSrc0, const simdscalar &vSrc1, const simdscalar &vSrc2, const simdscalar &vSrc3)
+void vTranspose4x8(simd4scalar (&vDst)[8],
+ const simdscalar& vSrc0,
+ const simdscalar& vSrc1,
+ const simdscalar& vSrc2,
+ const simdscalar& vSrc3)
{
- simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2); //x0z0x1z1 x4z4x5z5
- simdscalar r1rx = _simd_unpacklo_ps(vSrc1, vSrc3); //y0w0y1w1 y4w4y5w5
- simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx); //x0y0z0w0 x4y4z4w4
- simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx); //x1y1z1w1 x5y5z5w5
+ simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2); // x0z0x1z1 x4z4x5z5
+ simdscalar r1rx = _simd_unpacklo_ps(vSrc1, vSrc3); // y0w0y1w1 y4w4y5w5
+ simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx); // x0y0z0w0 x4y4z4w4
+ simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx); // x1y1z1w1 x5y5z5w5
- r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2); //x2z2x3z3 x6z6x7z7
- r1rx = _simd_unpackhi_ps(vSrc1, vSrc3); //y2w2y3w3 y6w6yw77
- simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx); //x2y2z2w2 x6y6z6w6
- simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx); //x3y3z3w3 x7y7z7w7
+ r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2); // x2z2x3z3 x6z6x7z7
+ r1rx = _simd_unpackhi_ps(vSrc1, vSrc3); // y2w2y3w3 y6w6yw77
+ simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx); // x2y2z2w2 x6y6z6w6
+ simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx); // x3y3z3w3 x7y7z7w7
vDst[0] = _simd_extractf128_ps(r02r1xlolo, 0);
vDst[1] = _simd_extractf128_ps(r02r1xlohi, 0);
#if ENABLE_AVX512_SIMD16
INLINE
-void vTranspose4x16(simd16scalar(&dst)[4], const simd16scalar &src0, const simd16scalar &src1, const simd16scalar &src2, const simd16scalar &src3)
+void vTranspose4x16(simd16scalar (&dst)[4],
+ const simd16scalar& src0,
+ const simd16scalar& src1,
+ const simd16scalar& src2,
+ const simd16scalar& src3)
{
- const simd16scalari perm = _simd16_set_epi32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0); // pre-permute input to setup the right order after all the unpacking
+ const simd16scalari perm =
+ _simd16_set_epi32(15,
+ 11,
+ 7,
+ 3,
+ 14,
+ 10,
+ 6,
+ 2,
+ 13,
+ 9,
+ 5,
+ 1,
+ 12,
+ 8,
+ 4,
+ 0); // pre-permute input to setup the right order after all the unpacking
simd16scalar pre0 = _simd16_permute_ps(src0, perm); // r
simd16scalar pre1 = _simd16_permute_ps(src1, perm); // g
#endif
INLINE
-void vTranspose8x8(simdscalar (&vDst)[8], const simdscalar &vMask0, const simdscalar &vMask1, const simdscalar &vMask2, const simdscalar &vMask3, const simdscalar &vMask4, const simdscalar &vMask5, const simdscalar &vMask6, const simdscalar &vMask7)
+void vTranspose8x8(simdscalar (&vDst)[8],
+ const simdscalar& vMask0,
+ const simdscalar& vMask1,
+ const simdscalar& vMask2,
+ const simdscalar& vMask3,
+ const simdscalar& vMask4,
+ const simdscalar& vMask5,
+ const simdscalar& vMask6,
+ const simdscalar& vMask7)
{
- simdscalar __t0 = _simd_unpacklo_ps(vMask0, vMask1);
- simdscalar __t1 = _simd_unpackhi_ps(vMask0, vMask1);
- simdscalar __t2 = _simd_unpacklo_ps(vMask2, vMask3);
- simdscalar __t3 = _simd_unpackhi_ps(vMask2, vMask3);
- simdscalar __t4 = _simd_unpacklo_ps(vMask4, vMask5);
- simdscalar __t5 = _simd_unpackhi_ps(vMask4, vMask5);
- simdscalar __t6 = _simd_unpacklo_ps(vMask6, vMask7);
- simdscalar __t7 = _simd_unpackhi_ps(vMask6, vMask7);
- simdscalar __tt0 = _simd_shuffle_ps(__t0,__t2,_MM_SHUFFLE(1,0,1,0));
- simdscalar __tt1 = _simd_shuffle_ps(__t0,__t2,_MM_SHUFFLE(3,2,3,2));
- simdscalar __tt2 = _simd_shuffle_ps(__t1,__t3,_MM_SHUFFLE(1,0,1,0));
- simdscalar __tt3 = _simd_shuffle_ps(__t1,__t3,_MM_SHUFFLE(3,2,3,2));
- simdscalar __tt4 = _simd_shuffle_ps(__t4,__t6,_MM_SHUFFLE(1,0,1,0));
- simdscalar __tt5 = _simd_shuffle_ps(__t4,__t6,_MM_SHUFFLE(3,2,3,2));
- simdscalar __tt6 = _simd_shuffle_ps(__t5,__t7,_MM_SHUFFLE(1,0,1,0));
- simdscalar __tt7 = _simd_shuffle_ps(__t5,__t7,_MM_SHUFFLE(3,2,3,2));
- vDst[0] = _simd_permute2f128_ps(__tt0, __tt4, 0x20);
- vDst[1] = _simd_permute2f128_ps(__tt1, __tt5, 0x20);
- vDst[2] = _simd_permute2f128_ps(__tt2, __tt6, 0x20);
- vDst[3] = _simd_permute2f128_ps(__tt3, __tt7, 0x20);
- vDst[4] = _simd_permute2f128_ps(__tt0, __tt4, 0x31);
- vDst[5] = _simd_permute2f128_ps(__tt1, __tt5, 0x31);
- vDst[6] = _simd_permute2f128_ps(__tt2, __tt6, 0x31);
- vDst[7] = _simd_permute2f128_ps(__tt3, __tt7, 0x31);
+ simdscalar __t0 = _simd_unpacklo_ps(vMask0, vMask1);
+ simdscalar __t1 = _simd_unpackhi_ps(vMask0, vMask1);
+ simdscalar __t2 = _simd_unpacklo_ps(vMask2, vMask3);
+ simdscalar __t3 = _simd_unpackhi_ps(vMask2, vMask3);
+ simdscalar __t4 = _simd_unpacklo_ps(vMask4, vMask5);
+ simdscalar __t5 = _simd_unpackhi_ps(vMask4, vMask5);
+ simdscalar __t6 = _simd_unpacklo_ps(vMask6, vMask7);
+ simdscalar __t7 = _simd_unpackhi_ps(vMask6, vMask7);
+ simdscalar __tt0 = _simd_shuffle_ps(__t0, __t2, _MM_SHUFFLE(1, 0, 1, 0));
+ simdscalar __tt1 = _simd_shuffle_ps(__t0, __t2, _MM_SHUFFLE(3, 2, 3, 2));
+ simdscalar __tt2 = _simd_shuffle_ps(__t1, __t3, _MM_SHUFFLE(1, 0, 1, 0));
+ simdscalar __tt3 = _simd_shuffle_ps(__t1, __t3, _MM_SHUFFLE(3, 2, 3, 2));
+ simdscalar __tt4 = _simd_shuffle_ps(__t4, __t6, _MM_SHUFFLE(1, 0, 1, 0));
+ simdscalar __tt5 = _simd_shuffle_ps(__t4, __t6, _MM_SHUFFLE(3, 2, 3, 2));
+ simdscalar __tt6 = _simd_shuffle_ps(__t5, __t7, _MM_SHUFFLE(1, 0, 1, 0));
+ simdscalar __tt7 = _simd_shuffle_ps(__t5, __t7, _MM_SHUFFLE(3, 2, 3, 2));
+ vDst[0] = _simd_permute2f128_ps(__tt0, __tt4, 0x20);
+ vDst[1] = _simd_permute2f128_ps(__tt1, __tt5, 0x20);
+ vDst[2] = _simd_permute2f128_ps(__tt2, __tt6, 0x20);
+ vDst[3] = _simd_permute2f128_ps(__tt3, __tt7, 0x20);
+ vDst[4] = _simd_permute2f128_ps(__tt0, __tt4, 0x31);
+ vDst[5] = _simd_permute2f128_ps(__tt1, __tt5, 0x31);
+ vDst[6] = _simd_permute2f128_ps(__tt2, __tt6, 0x31);
+ vDst[7] = _simd_permute2f128_ps(__tt3, __tt7, 0x31);
}
INLINE
-void vTranspose8x8(simdscalar (&vDst)[8], const simdscalari &vMask0, const simdscalari &vMask1, const simdscalari &vMask2, const simdscalari &vMask3, const simdscalari &vMask4, const simdscalari &vMask5, const simdscalari &vMask6, const simdscalari &vMask7)
+void vTranspose8x8(simdscalar (&vDst)[8],
+ const simdscalari& vMask0,
+ const simdscalari& vMask1,
+ const simdscalari& vMask2,
+ const simdscalari& vMask3,
+ const simdscalari& vMask4,
+ const simdscalari& vMask5,
+ const simdscalari& vMask6,
+ const simdscalari& vMask7)
{
- vTranspose8x8(vDst, _simd_castsi_ps(vMask0), _simd_castsi_ps(vMask1), _simd_castsi_ps(vMask2), _simd_castsi_ps(vMask3),
- _simd_castsi_ps(vMask4), _simd_castsi_ps(vMask5), _simd_castsi_ps(vMask6), _simd_castsi_ps(vMask7));
+ vTranspose8x8(vDst,
+ _simd_castsi_ps(vMask0),
+ _simd_castsi_ps(vMask1),
+ _simd_castsi_ps(vMask2),
+ _simd_castsi_ps(vMask3),
+ _simd_castsi_ps(vMask4),
+ _simd_castsi_ps(vMask5),
+ _simd_castsi_ps(vMask6),
+ _simd_castsi_ps(vMask7));
}
#endif
//////////////////////////////////////////////////////////////////////////
/// TranposeSingleComponent
//////////////////////////////////////////////////////////////////////////
-template<uint32_t bpp>
+template <uint32_t bpp>
struct TransposeSingleComponent
{
//////////////////////////////////////////////////////////////////////////
#if KNOB_SIMD_WIDTH == 8
#if KNOB_ARCH <= KNOB_ARCH_AVX
- simd4scalari c0c1 = src.v4[0]; // rrrrrrrrgggggggg
- simd4scalari c2c3 = SIMD128::castps_si(_simd_extractf128_ps(_simd_castsi_ps(src), 1)); // bbbbbbbbaaaaaaaa
- simd4scalari c0c2 = SIMD128::unpacklo_epi64(c0c1, c2c3); // rrrrrrrrbbbbbbbb
- simd4scalari c1c3 = SIMD128::unpackhi_epi64(c0c1, c2c3); // ggggggggaaaaaaaa
- simd4scalari c01 = SIMD128::unpacklo_epi8(c0c2, c1c3); // rgrgrgrgrgrgrgrg
- simd4scalari c23 = SIMD128::unpackhi_epi8(c0c2, c1c3); // babababababababa
- simd4scalari c0123lo = SIMD128::unpacklo_epi16(c01, c23); // rgbargbargbargba
- simd4scalari c0123hi = SIMD128::unpackhi_epi16(c01, c23); // rgbargbargbargba
+ simd4scalari c0c1 = src.v4[0]; // rrrrrrrrgggggggg
+ simd4scalari c2c3 =
+ SIMD128::castps_si(_simd_extractf128_ps(_simd_castsi_ps(src), 1)); // bbbbbbbbaaaaaaaa
+ simd4scalari c0c2 = SIMD128::unpacklo_epi64(c0c1, c2c3); // rrrrrrrrbbbbbbbb
+ simd4scalari c1c3 = SIMD128::unpackhi_epi64(c0c1, c2c3); // ggggggggaaaaaaaa
+ simd4scalari c01 = SIMD128::unpacklo_epi8(c0c2, c1c3); // rgrgrgrgrgrgrgrg
+ simd4scalari c23 = SIMD128::unpackhi_epi8(c0c2, c1c3); // babababababababa
+ simd4scalari c0123lo = SIMD128::unpacklo_epi16(c01, c23); // rgbargbargbargba
+ simd4scalari c0123hi = SIMD128::unpackhi_epi16(c01, c23); // rgbargbargbargba
SIMD128::store_si((simd4scalari*)pDst, c0123lo);
SIMD128::store_si((simd4scalari*)(pDst + 16), c0123hi);
#else
simdscalari dst01 = _simd_shuffle_epi8(src,
- _simd_set_epi32(0x0f078080, 0x0e068080, 0x0d058080, 0x0c048080, 0x80800b03, 0x80800a02, 0x80800901, 0x80800800));
+ _simd_set_epi32(0x0f078080,
+ 0x0e068080,
+ 0x0d058080,
+ 0x0c048080,
+ 0x80800b03,
+ 0x80800a02,
+ 0x80800901,
+ 0x80800800));
simdscalari dst23 = _mm256_permute2x128_si256(src, src, 0x01);
- dst23 = _simd_shuffle_epi8(dst23,
- _simd_set_epi32(0x80800f07, 0x80800e06, 0x80800d05, 0x80800c04, 0x0b038080, 0x0a028080, 0x09018080, 0x08008080));
- simdscalari dst = _simd_or_si(dst01, dst23);
+ dst23 = _simd_shuffle_epi8(dst23,
+ _simd_set_epi32(0x80800f07,
+ 0x80800e06,
+ 0x80800d05,
+ 0x80800c04,
+ 0x0b038080,
+ 0x0a028080,
+ 0x09018080,
+ 0x08008080));
+ simdscalari dst = _simd_or_si(dst01, dst23);
_simd_store_si((simdscalari*)pDst, dst);
#endif
#else
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simd4scalari src0 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc)); // rrrrrrrrrrrrrrrr
- simd4scalari src1 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 1); // gggggggggggggggg
- simd4scalari src2 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 2); // bbbbbbbbbbbbbbbb
- simd4scalari src3 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 3); // aaaaaaaaaaaaaaaa
+ simd4scalari src0 =
+ SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+ simd4scalari src1 =
+ SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 1); // gggggggggggggggg
+ simd4scalari src2 =
+ SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
+ simd4scalari src3 =
+ SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 3); // aaaaaaaaaaaaaaaa
simd16scalari cvt0 = _simd16_cvtepu8_epi32(src0);
simd16scalari cvt1 = _simd16_cvtepu8_epi32(src1);
simd16scalari cvt2 = _simd16_cvtepu8_epi32(src2);
simd16scalari cvt3 = _simd16_cvtepu8_epi32(src3);
- simd16scalari shl1 = _simd16_slli_epi32(cvt1, 8);
+ simd16scalari shl1 = _simd16_slli_epi32(cvt1, 8);
simd16scalari shl2 = _simd16_slli_epi32(cvt2, 16);
simd16scalari shl3 = _simd16_slli_epi32(cvt3, 24);
simd16scalari dst = _simd16_or_si(_simd16_or_si(cvt0, shl1), _simd16_or_si(shl2, shl3));
- _simd16_store_si(reinterpret_cast<simd16scalari *>(pDst), dst); // rgbargbargbargbargbargbargbargbargbargbargbargbargbargbargbargba
+ _simd16_store_si(reinterpret_cast<simd16scalari*>(pDst),
+ dst); // rgbargbargbargbargbargbargbargbargbargbargbargbargbargbargbargba
}
#endif
};
#if KNOB_SIMD_WIDTH == 8
simdscalari src = _simd_load_si((const simdscalari*)pSrc);
- simd4scalari rg = src.v4[0]; // rrrrrrrr gggggggg
- simd4scalari g = SIMD128::unpackhi_epi64(rg, rg); // gggggggg gggggggg
- rg = SIMD128::unpacklo_epi8(rg, g);
+ simd4scalari rg = src.v4[0]; // rrrrrrrr gggggggg
+ simd4scalari g = SIMD128::unpackhi_epi64(rg, rg); // gggggggg gggggggg
+ rg = SIMD128::unpacklo_epi8(rg, g);
SIMD128::store_si((simd4scalari*)pDst, rg);
#else
#error Unsupported vector width
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simd4scalari src0 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc)); // rrrrrrrrrrrrrrrr
- simd4scalari src1 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 1); // gggggggggggggggg
+ simd4scalari src0 =
+ SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+ simd4scalari src1 =
+ SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 1); // gggggggggggggggg
simdscalari cvt0 = _simd_cvtepu8_epi16(src0);
simdscalari cvt1 = _simd_cvtepu8_epi16(src1);
simdscalari dst = _simd_or_si(cvt0, shl1);
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst), dst); // rgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrg
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst),
+ dst); // rgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrg
}
#endif
};
simd4scalar vDst[8];
vTranspose4x8(vDst, src0, src1, src2, src3);
SIMD128::store_ps((float*)pDst, vDst[0]);
- SIMD128::store_ps((float*)pDst+4, vDst[1]);
- SIMD128::store_ps((float*)pDst+8, vDst[2]);
- SIMD128::store_ps((float*)pDst+12, vDst[3]);
- SIMD128::store_ps((float*)pDst+16, vDst[4]);
- SIMD128::store_ps((float*)pDst+20, vDst[5]);
- SIMD128::store_ps((float*)pDst+24, vDst[6]);
- SIMD128::store_ps((float*)pDst+28, vDst[7]);
+ SIMD128::store_ps((float*)pDst + 4, vDst[1]);
+ SIMD128::store_ps((float*)pDst + 8, vDst[2]);
+ SIMD128::store_ps((float*)pDst + 12, vDst[3]);
+ SIMD128::store_ps((float*)pDst + 16, vDst[4]);
+ SIMD128::store_ps((float*)pDst + 20, vDst[5]);
+ SIMD128::store_ps((float*)pDst + 24, vDst[6]);
+ SIMD128::store_ps((float*)pDst + 28, vDst[7]);
#else
#error Unsupported vector width
#endif
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc));
- simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 16);
- simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 32);
- simd16scalar src3 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 48);
+ simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
+ simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16);
+ simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 32);
+ simd16scalar src3 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 48);
simd16scalar dst[4];
vTranspose4x16(dst, src0, src1, src2, src3);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 0, dst[0]);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 16, dst[1]);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 32, dst[2]);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 48, dst[3]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst[0]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst[1]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 32, dst[2]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 48, dst[3]);
}
#endif
};
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc));
- simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 16);
- simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 32);
+ simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
+ simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16);
+ simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 32);
simd16scalar src3 = _simd16_setzero_ps();
simd16scalar dst[4];
vTranspose4x16(dst, src0, src1, src2, src3);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 0, dst[0]);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 16, dst[1]);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 32, dst[2]);
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 48, dst[3]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst[0]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst[1]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 32, dst[2]);
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 48, dst[3]);
}
#endif
};
INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
#if KNOB_SIMD_WIDTH == 8
- const float* pfSrc = (const float*)pSrc;
- simd4scalar src_r0 = SIMD128::load_ps(pfSrc + 0);
- simd4scalar src_r1 = SIMD128::load_ps(pfSrc + 4);
- simd4scalar src_g0 = SIMD128::load_ps(pfSrc + 8);
- simd4scalar src_g1 = SIMD128::load_ps(pfSrc + 12);
+ const float* pfSrc = (const float*)pSrc;
+ simd4scalar src_r0 = SIMD128::load_ps(pfSrc + 0);
+ simd4scalar src_r1 = SIMD128::load_ps(pfSrc + 4);
+ simd4scalar src_g0 = SIMD128::load_ps(pfSrc + 8);
+ simd4scalar src_g1 = SIMD128::load_ps(pfSrc + 12);
simd4scalar dst0 = SIMD128::unpacklo_ps(src_r0, src_g0);
simd4scalar dst1 = SIMD128::unpackhi_ps(src_r0, src_g0);
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc)); // rrrrrrrrrrrrrrrr
- simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 16); // gggggggggggggggg
-
- simd16scalar tmp0 = _simd16_unpacklo_ps(src0, src1); // r0 g0 r1 g1 r4 g4 r5 g5 r8 g8 r9 g9 rC gC rD gD
- simd16scalar tmp1 = _simd16_unpackhi_ps(src0, src1); // r2 g2 r3 g3 r6 g6 r7 g7 rA gA rB gB rE gE rF gF
-
- simd16scalar per0 = _simd16_permute2f128_ps(tmp0, tmp1, 0x44); // (1, 0, 1, 0) // r0 g0 r1 g1 r4 g4 r5 g5 r2 g2 r3 g3 r6 g6 r7 g7
- simd16scalar per1 = _simd16_permute2f128_ps(tmp0, tmp1, 0xEE); // (3, 2, 3, 2) // r8 g8 r9 g9 rC gC rD gD rA gA rB gB rE gE rF gF
-
- simd16scalar dst0 = _simd16_permute2f128_ps(per0, per0, 0xD8); // (3, 1, 2, 0) // r0 g0 r1 g1 r2 g2 r3 g3 r4 g4 r5 g5 r6 g6 r7 g7
- simd16scalar dst1 = _simd16_permute2f128_ps(per1, per1, 0xD8); // (3, 1, 2, 0) // r8 g8 r9 g9 rA gA rB gB rC gC rD gD rE gE rF gF
-
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 0, dst0); // rgrgrgrgrgrgrgrg
- _simd16_store_ps(reinterpret_cast<float *>(pDst) + 16, dst1); // rgrgrgrgrgrgrgrg
+ simd16scalar src0 =
+ _simd16_load_ps(reinterpret_cast<const float*>(pSrc)); // rrrrrrrrrrrrrrrr
+ simd16scalar src1 =
+ _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16); // gggggggggggggggg
+
+ simd16scalar tmp0 =
+ _simd16_unpacklo_ps(src0, src1); // r0 g0 r1 g1 r4 g4 r5 g5 r8 g8 r9 g9 rC gC rD gD
+ simd16scalar tmp1 =
+ _simd16_unpackhi_ps(src0, src1); // r2 g2 r3 g3 r6 g6 r7 g7 rA gA rB gB rE gE rF gF
+
+ simd16scalar per0 = _simd16_permute2f128_ps(
+ tmp0,
+ tmp1,
+ 0x44); // (1, 0, 1, 0) // r0 g0 r1 g1 r4 g4 r5 g5 r2 g2 r3 g3 r6 g6 r7 g7
+ simd16scalar per1 = _simd16_permute2f128_ps(
+ tmp0,
+ tmp1,
+ 0xEE); // (3, 2, 3, 2) // r8 g8 r9 g9 rC gC rD gD rA gA rB gB rE gE rF gF
+
+ simd16scalar dst0 = _simd16_permute2f128_ps(
+ per0,
+ per0,
+ 0xD8); // (3, 1, 2, 0) // r0 g0 r1 g1 r2 g2 r3 g3 r4 g4 r5 g5 r6 g6 r7 g7
+ simd16scalar dst1 = _simd16_permute2f128_ps(
+ per1,
+ per1,
+ 0xD8); // (3, 1, 2, 0) // r8 g8 r9 g9 rA gA rB gB rC gC rD gD rE gE rF gF
+
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst0); // rgrgrgrgrgrgrgrg
+ _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst1); // rgrgrgrgrgrgrgrg
}
#endif
};
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc)); // rrrrrrrrrrrrrrrr
- simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 1); // gggggggggggggggg
- simdscalari src2 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 2); // bbbbbbbbbbbbbbbb
- simdscalari src3 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 3); // aaaaaaaaaaaaaaaa
-
- simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
- simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
- simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
- simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
-
- simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
- simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
- simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
- simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
-
- simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0) // rbga0 rbga1 rbga2 rbga3
- simdscalari dst1 = _simd_permute2f128_si(tmp2, tmp3, 0x20); // (2, 0) // rbga4 rbga5 rbga6 rbga7
- simdscalari dst2 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1) // rbga8 rbga9 rbgaA rbgaB
- simdscalari dst3 = _simd_permute2f128_si(tmp2, tmp3, 0x31); // (3, 1) // rbgaC rbgaD rbgaE rbgaF
-
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 0, dst0); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 1, dst1); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 2, dst2); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 3, dst3); // rgbargbargbargba
+ simdscalari src0 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+ simdscalari src1 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
+ simdscalari src2 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
+ simdscalari src3 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 3); // aaaaaaaaaaaaaaaa
+
+ simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
+ simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
+ simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
+ simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
+
+ simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
+ simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
+ simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
+ simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
+
+ simdscalari dst0 = _simd_permute2f128_si(
+ tmp0, tmp1, 0x20); // (2, 0) // rbga0 rbga1 rbga2 rbga3
+ simdscalari dst1 = _simd_permute2f128_si(
+ tmp2, tmp3, 0x20); // (2, 0) // rbga4 rbga5 rbga6 rbga7
+ simdscalari dst2 = _simd_permute2f128_si(
+ tmp0, tmp1, 0x31); // (3, 1) // rbga8 rbga9 rbgaA rbgaB
+ simdscalari dst3 = _simd_permute2f128_si(
+ tmp2, tmp3, 0x31); // (3, 1) // rbgaC rbgaD rbgaE rbgaF
+
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgbargbargbargba
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgbargbargbargba
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 2, dst2); // rgbargbargbargba
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 3, dst3); // rgbargbargbargba
}
#endif
};
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc)); // rrrrrrrrrrrrrrrr
- simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 1); // gggggggggggggggg
- simdscalari src2 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 2); // bbbbbbbbbbbbbbbb
- simdscalari src3 = _simd_setzero_si(); // aaaaaaaaaaaaaaaa
-
- simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
- simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
- simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
- simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
-
- simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
- simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
- simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
- simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
-
- simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0) // rbga0 rbga1 rbga2 rbga3
- simdscalari dst1 = _simd_permute2f128_si(tmp2, tmp3, 0x20); // (2, 0) // rbga4 rbga5 rbga6 rbga7
- simdscalari dst2 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1) // rbga8 rbga9 rbgaA rbgaB
- simdscalari dst3 = _simd_permute2f128_si(tmp2, tmp3, 0x31); // (3, 1) // rbgaC rbgaD rbgaE rbgaF
-
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 0, dst0); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 1, dst1); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 2, dst2); // rgbargbargbargba
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 3, dst3); // rgbargbargbargba
+ simdscalari src0 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+ simdscalari src1 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
+ simdscalari src2 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
+ simdscalari src3 = _simd_setzero_si(); // aaaaaaaaaaaaaaaa
+
+ simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
+ simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
+ simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
+ simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
+
+ simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
+ simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
+ simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
+ simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
+
+ simdscalari dst0 = _simd_permute2f128_si(
+ tmp0, tmp1, 0x20); // (2, 0) // rbga0 rbga1 rbga2 rbga3
+ simdscalari dst1 = _simd_permute2f128_si(
+ tmp2, tmp3, 0x20); // (2, 0) // rbga4 rbga5 rbga6 rbga7
+ simdscalari dst2 = _simd_permute2f128_si(
+ tmp0, tmp1, 0x31); // (3, 1) // rbga8 rbga9 rbgaA rbgaB
+ simdscalari dst3 = _simd_permute2f128_si(
+ tmp2, tmp3, 0x31); // (3, 1) // rbgaC rbgaD rbgaE rbgaF
+
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgbargbargbargba
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgbargbargbargba
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 2, dst2); // rgbargbargbargba
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 3, dst3); // rgbargbargbargba
}
#endif
};
INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
{
- simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc)); // rrrrrrrrrrrrrrrr
- simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 1); // gggggggggggggggg
+ simdscalari src0 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+ simdscalari src1 =
+ _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
- simdscalari tmp0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
- simdscalari tmp1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
+ simdscalari tmp0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
+ simdscalari tmp1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
- simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0) // rg0 rg1 rg2 rg3 rg4 rg5 rg6 rg7
- simdscalari dst1 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1) // rg8 rg9 rgA rgB rgC rgD rgE rgF
+ simdscalari dst0 = _simd_permute2f128_si(
+ tmp0, tmp1, 0x20); // (2, 0) // rg0 rg1 rg2 rg3 rg4 rg5 rg6 rg7
+ simdscalari dst1 = _simd_permute2f128_si(
+ tmp0, tmp1, 0x31); // (3, 1) // rg8 rg9 rgA rgB rgC rgD rgE rgF
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 0, dst0); // rgrgrgrgrgrgrgrg
- _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 1, dst1); // rgrgrgrgrgrgrgrg
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgrgrgrgrgrgrgrg
+ _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgrgrgrgrgrgrgrg
}
#endif
};
static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete;
#endif
};
-
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file frontend.cpp
-*
-* @brief Implementation for Frontend which handles vertex processing,
-* primitive assembly, clipping, binning, etc.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file frontend.cpp
+ *
+ * @brief Implementation for Frontend which handles vertex processing,
+ * primitive assembly, clipping, binning, etc.
+ *
+ ******************************************************************************/
#include "api.h"
#include "frontend.h"
/// @brief Helper macro to generate a bitmask
static INLINE uint32_t GenMask(uint32_t numBits)
{
- SWR_ASSERT(numBits <= (sizeof(uint32_t) * 8), "Too many bits (%d) for %s", numBits, __FUNCTION__);
+ SWR_ASSERT(
+ numBits <= (sizeof(uint32_t) * 8), "Too many bits (%d) for %s", numBits, __FUNCTION__);
return ((1U << numBits) - 1);
}
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pUserData - Pointer to user data passed back to sync callback.
/// @todo This should go away when we switch this to use compute threading.
-void ProcessSync(
- SWR_CONTEXT *pContext,
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- void *pUserData)
+void ProcessSync(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
{
BE_WORK work;
- work.type = SYNC;
+ work.type = SYNC;
work.pfnWork = ProcessSyncBE;
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
pTileMgr->enqueue(0, 0, &work);
}
/// @param pDC - pointer to draw context.
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pUserData - Pointer to user data passed back to sync callback.
-void ProcessShutdown(
- SWR_CONTEXT *pContext,
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- void *pUserData)
+void ProcessShutdown(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
{
BE_WORK work;
- work.type = SHUTDOWN;
+ work.type = SHUTDOWN;
work.pfnWork = ProcessShutdownBE;
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
// Enqueue at least 1 work item for each worker thread
// account for number of numa nodes
uint32_t numNumaNodes = pContext->threadPool.numaMask + 1;
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pUserData - Pointer to user data passed back to clear callback.
/// @todo This should go away when we switch this to use compute threading.
-void ProcessClear(
- SWR_CONTEXT *pContext,
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- void *pUserData)
+void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
{
- CLEAR_DESC *pDesc = (CLEAR_DESC*)pUserData;
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ CLEAR_DESC* pDesc = (CLEAR_DESC*)pUserData;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
// queue a clear to each macro tile
// compute macro tile bounds for the specified rect
uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
BE_WORK work;
- work.type = CLEAR;
- work.pfnWork = ProcessClearBE;
+ work.type = CLEAR;
+ work.pfnWork = ProcessClearBE;
work.desc.clear = *pDesc;
for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pUserData - Pointer to user data passed back to callback.
/// @todo This should go away when we switch this to use compute threading.
-void ProcessStoreTiles(
- SWR_CONTEXT *pContext,
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- void *pUserData)
+void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
{
RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId);
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
- STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
+ STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
// queue a store to each macro tile
// compute macro tile bounds for the specified rect
// store tiles
BE_WORK work;
- work.type = STORETILES;
- work.pfnWork = ProcessStoreTilesBE;
+ work.type = STORETILES;
+ work.pfnWork = ProcessStoreTilesBE;
work.desc.storeTiles = *pDesc;
for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pUserData - Pointer to user data passed back to callback.
/// @todo This should go away when we switch this to use compute threading.
-void ProcessDiscardInvalidateTiles(
- SWR_CONTEXT *pContext,
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- void *pUserData)
+void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ void* pUserData)
{
RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
- DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
- MacroTileMgr *pTileMgr = pDC->pTileMgr;
+ DISCARD_INVALIDATE_TILES_DESC* pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
+ MacroTileMgr* pTileMgr = pDC->pTileMgr;
// compute macro tile bounds for the specified rect
uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM;
// load tiles
BE_WORK work;
- work.type = DISCARDINVALIDATETILES;
- work.pfnWork = ProcessDiscardInvalidateTilesBE;
+ work.type = DISCARDINVALIDATETILES;
+ work.pfnWork = ProcessDiscardInvalidateTilesBE;
work.desc.discardInvalidateTiles = *pDesc;
for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
/// @param mode - primitive topology for draw operation.
/// @param numPrims - number of vertices or indices for draw.
/// @todo Frontend needs to be refactored. This will go in appropriate place then.
-uint32_t GetNumPrims(
- PRIMITIVE_TOPOLOGY mode,
- uint32_t numPrims)
+uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numPrims)
{
switch (mode)
{
- case TOP_POINT_LIST: return numPrims;
- case TOP_TRIANGLE_LIST: return numPrims / 3;
- case TOP_TRIANGLE_STRIP: return numPrims < 3 ? 0 : numPrims - 2;
- case TOP_TRIANGLE_FAN: return numPrims < 3 ? 0 : numPrims - 2;
- case TOP_TRIANGLE_DISC: return numPrims < 2 ? 0 : numPrims - 1;
- case TOP_QUAD_LIST: return numPrims / 4;
- case TOP_QUAD_STRIP: return numPrims < 4 ? 0 : (numPrims - 2) / 2;
- case TOP_LINE_STRIP: return numPrims < 2 ? 0 : numPrims - 1;
- case TOP_LINE_LIST: return numPrims / 2;
- case TOP_LINE_LOOP: return numPrims;
- case TOP_RECT_LIST: return numPrims / 3;
- case TOP_LINE_LIST_ADJ: return numPrims / 4;
- case TOP_LISTSTRIP_ADJ: return numPrims < 3 ? 0 : numPrims - 3;
- case TOP_TRI_LIST_ADJ: return numPrims / 6;
- case TOP_TRI_STRIP_ADJ: return numPrims < 4 ? 0 : (numPrims / 2) - 2;
+ case TOP_POINT_LIST:
+ return numPrims;
+ case TOP_TRIANGLE_LIST:
+ return numPrims / 3;
+ case TOP_TRIANGLE_STRIP:
+ return numPrims < 3 ? 0 : numPrims - 2;
+ case TOP_TRIANGLE_FAN:
+ return numPrims < 3 ? 0 : numPrims - 2;
+ case TOP_TRIANGLE_DISC:
+ return numPrims < 2 ? 0 : numPrims - 1;
+ case TOP_QUAD_LIST:
+ return numPrims / 4;
+ case TOP_QUAD_STRIP:
+ return numPrims < 4 ? 0 : (numPrims - 2) / 2;
+ case TOP_LINE_STRIP:
+ return numPrims < 2 ? 0 : numPrims - 1;
+ case TOP_LINE_LIST:
+ return numPrims / 2;
+ case TOP_LINE_LOOP:
+ return numPrims;
+ case TOP_RECT_LIST:
+ return numPrims / 3;
+ case TOP_LINE_LIST_ADJ:
+ return numPrims / 4;
+ case TOP_LISTSTRIP_ADJ:
+ return numPrims < 3 ? 0 : numPrims - 3;
+ case TOP_TRI_LIST_ADJ:
+ return numPrims / 6;
+ case TOP_TRI_STRIP_ADJ:
+ return numPrims < 4 ? 0 : (numPrims / 2) - 2;
case TOP_PATCHLIST_1:
case TOP_PATCHLIST_2:
/// @brief Computes the number of verts given the number of primitives.
/// @param mode - primitive topology for draw operation.
/// @param numPrims - number of primitives for draw.
-uint32_t GetNumVerts(
- PRIMITIVE_TOPOLOGY mode,
- uint32_t numPrims)
+uint32_t GetNumVerts(PRIMITIVE_TOPOLOGY mode, uint32_t numPrims)
{
switch (mode)
{
- case TOP_POINT_LIST: return numPrims;
- case TOP_TRIANGLE_LIST: return numPrims * 3;
- case TOP_TRIANGLE_STRIP: return numPrims ? numPrims + 2 : 0;
- case TOP_TRIANGLE_FAN: return numPrims ? numPrims + 2 : 0;
- case TOP_TRIANGLE_DISC: return numPrims ? numPrims + 1 : 0;
- case TOP_QUAD_LIST: return numPrims * 4;
- case TOP_QUAD_STRIP: return numPrims ? numPrims * 2 + 2 : 0;
- case TOP_LINE_STRIP: return numPrims ? numPrims + 1 : 0;
- case TOP_LINE_LIST: return numPrims * 2;
- case TOP_LINE_LOOP: return numPrims;
- case TOP_RECT_LIST: return numPrims * 3;
- case TOP_LINE_LIST_ADJ: return numPrims * 4;
- case TOP_LISTSTRIP_ADJ: return numPrims ? numPrims + 3 : 0;
- case TOP_TRI_LIST_ADJ: return numPrims * 6;
- case TOP_TRI_STRIP_ADJ: return numPrims ? (numPrims + 2) * 2 : 0;
+ case TOP_POINT_LIST:
+ return numPrims;
+ case TOP_TRIANGLE_LIST:
+ return numPrims * 3;
+ case TOP_TRIANGLE_STRIP:
+ return numPrims ? numPrims + 2 : 0;
+ case TOP_TRIANGLE_FAN:
+ return numPrims ? numPrims + 2 : 0;
+ case TOP_TRIANGLE_DISC:
+ return numPrims ? numPrims + 1 : 0;
+ case TOP_QUAD_LIST:
+ return numPrims * 4;
+ case TOP_QUAD_STRIP:
+ return numPrims ? numPrims * 2 + 2 : 0;
+ case TOP_LINE_STRIP:
+ return numPrims ? numPrims + 1 : 0;
+ case TOP_LINE_LIST:
+ return numPrims * 2;
+ case TOP_LINE_LOOP:
+ return numPrims;
+ case TOP_RECT_LIST:
+ return numPrims * 3;
+ case TOP_LINE_LIST_ADJ:
+ return numPrims * 4;
+ case TOP_LISTSTRIP_ADJ:
+ return numPrims ? numPrims + 3 : 0;
+ case TOP_TRI_LIST_ADJ:
+ return numPrims * 6;
+ case TOP_TRI_STRIP_ADJ:
+ return numPrims ? (numPrims + 2) * 2 : 0;
case TOP_PATCHLIST_1:
case TOP_PATCHLIST_2:
switch (topology)
{
case TOP_LISTSTRIP_ADJ:
- case TOP_LINE_LIST_ADJ: numVerts = 4; break;
+ case TOP_LINE_LIST_ADJ:
+ numVerts = 4;
+ break;
case TOP_TRI_STRIP_ADJ:
- case TOP_TRI_LIST_ADJ: numVerts = 6; break;
- default: break;
+ case TOP_TRI_LIST_ADJ:
+ numVerts = 6;
+ break;
+ default:
+ break;
}
}
/// @param numWorkItems - Number of items being worked on by a SIMD.
static INLINE simdscalari GenerateMask(uint32_t numItemsRemaining)
{
- uint32_t numActive = (numItemsRemaining >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : numItemsRemaining;
+ uint32_t numActive =
+ (numItemsRemaining >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : numItemsRemaining;
uint32_t mask = (numActive > 0) ? ((1 << numActive) - 1) : 0;
return _simd_castps_si(_simd_vmask_ps(mask));
}
static INLINE simd16scalari GenerateMask16(uint32_t numItemsRemaining)
{
- uint32_t numActive = (numItemsRemaining >= KNOB_SIMD16_WIDTH) ? KNOB_SIMD16_WIDTH : numItemsRemaining;
+ uint32_t numActive =
+ (numItemsRemaining >= KNOB_SIMD16_WIDTH) ? KNOB_SIMD16_WIDTH : numItemsRemaining;
uint32_t mask = (numActive > 0) ? ((1 << numActive) - 1) : 0;
return _simd16_castps_si(_simd16_vmask_ps(mask));
}
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param numPrims - Number of prims to streamout (e.g. points, lines, tris)
static void StreamOut(
- DRAW_CONTEXT* pDC,
- PA_STATE& pa,
- uint32_t workerId,
- uint32_t* pPrimData,
- uint32_t streamIndex)
+ DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t workerId, uint32_t* pPrimData, uint32_t streamIndex)
{
RDTSC_BEGIN(FEStreamout, pDC->drawId);
- const API_STATE& state = GetApiState(pDC);
- const SWR_STREAMOUT_STATE &soState = state.soState;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_STREAMOUT_STATE& soState = state.soState;
uint32_t soVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
- // The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each vertex.
+ // The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each
+ // vertex.
uint32_t primDataDwordVertexStride = (SWR_VTX_NUM_SLOTS * sizeof(float) * 4) / sizeof(uint32_t);
- SWR_STREAMOUT_CONTEXT soContext = { 0 };
+ SWR_STREAMOUT_CONTEXT soContext = {0};
// Setup buffer state pointers.
for (uint32_t i = 0; i < 4; ++i)
for (uint32_t primIndex = 0; primIndex < numPrims; ++primIndex)
{
- DWORD slot = 0;
+ DWORD slot = 0;
uint64_t soMask = soState.streamMasks[streamIndex];
// Write all entries into primitive data buffer for SOS.
while (_BitScanForward64(&slot, soMask))
{
- simd4scalar attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
- uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
+ simd4scalar attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
+ uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
pa.AssembleSingle(paSlot, primIndex, attrib);
// Attribute offset is relative offset from start of vertex.
// Store each vertex's attrib at appropriate locations in pPrimData buffer.
for (uint32_t v = 0; v < soVertsPerPrim; ++v)
{
- uint32_t* pPrimDataAttrib = pPrimData + primDataAttribOffset + (v * primDataDwordVertexStride);
+ uint32_t* pPrimDataAttrib =
+ pPrimData + primDataAttribOffset + (v * primDataDwordVertexStride);
_mm_store_ps((float*)pPrimDataAttrib, attrib[v]);
}
soMask &= ~(uint64_t(1) << slot);
}
- // Update pPrimData pointer
+ // Update pPrimData pointer
soContext.pPrimData = pPrimData;
// Call SOS
- SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr, "Trying to execute uninitialized streamout jit function.");
+ SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr,
+ "Trying to execute uninitialized streamout jit function.");
state.pfnSoFunc[streamIndex](soContext);
}
///
/// note: the stride between vertexes is determinded by SWR_VTX_NUM_SLOTS
///
-void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex *vertex_simd16, const simdvertex *vertex, uint32_t vertexCount, uint32_t attribCount)
+void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex* vertex_simd16,
+ const simdvertex* vertex,
+ uint32_t vertexCount,
+ uint32_t attribCount)
{
SWR_ASSERT(vertex);
SWR_ASSERT(vertex_simd16);
{
for (uint32_t k = 0; k < 4; k += 1)
{
- temp.attrib[j][k] = _simd16_insert_ps(_simd16_setzero_ps(), vertex[i].attrib[j][k], 0);
+ temp.attrib[j][k] =
+ _simd16_insert_ps(_simd16_setzero_ps(), vertex[i].attrib[j][k], 0);
if ((i + 1) < vertexCount)
{
- temp.attrib[j][k] = _simd16_insert_ps(temp.attrib[j][k], vertex[i + 1].attrib[j][k], 1);
+ temp.attrib[j][k] =
+ _simd16_insert_ps(temp.attrib[j][k], vertex[i + 1].attrib[j][k], 1);
}
}
}
/// then return the remaining amount of work.
/// @param curIndex - The start index for the SIMD.
/// @param maxIndex - The last index for all work items.
-static INLINE uint32_t GetNumInvocations(
- uint32_t curIndex,
- uint32_t maxIndex)
+static INLINE uint32_t GetNumInvocations(uint32_t curIndex, uint32_t maxIndex)
{
uint32_t remainder = (maxIndex - curIndex);
#if USE_SIMD16_FRONTEND
/// @param pStreamIdBase - pointer to the stream ID buffer
/// @param numEmittedVerts - Number of total verts emitted by the GS
/// @param pCutBuffer - output buffer to write cuts to
-void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t numEmittedVerts, uint8_t *pCutBuffer)
+void ProcessStreamIdBuffer(uint32_t stream,
+ uint8_t* pStreamIdBase,
+ uint32_t numEmittedVerts,
+ uint8_t* pCutBuffer)
{
SWR_ASSERT(stream < MAX_SO_STREAMS);
- uint32_t numInputBytes = (numEmittedVerts * 2 + 7) / 8;
+ uint32_t numInputBytes = (numEmittedVerts * 2 + 7) / 8;
uint32_t numOutputBytes = std::max(numInputBytes / 2, 1U);
for (uint32_t b = 0; b < numOutputBytes; ++b)
{
- uint8_t curInputByte = pStreamIdBase[2*b];
- uint8_t outByte = 0;
+ uint8_t curInputByte = pStreamIdBase[2 * b];
+ uint8_t outByte = 0;
for (uint32_t i = 0; i < 4; ++i)
{
if ((curInputByte & 0x3) != stream)
uint8_t* pGsIn;
uint8_t* pGsOut[KNOB_SIMD_WIDTH];
uint8_t* pGsTransposed;
- void* pStreamCutBuffer;
+ void* pStreamCutBuffer;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Transposes GS output from SOA to AOS to feed the primitive assembler
-/// @param pDst - Destination buffer in AOS form for the current SIMD width, fed into the primitive assembler
+/// @param pDst - Destination buffer in AOS form for the current SIMD width, fed into the primitive
+/// assembler
/// @param pSrc - Buffer of vertices in SOA form written by the geometry shader
/// @param numVerts - Number of vertices outputted by the GS
/// @param numAttribs - Number of attributes per vertex
-template<typename SIMD_T, uint32_t SimdWidth>
+template <typename SIMD_T, uint32_t SimdWidth>
void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t numAttribs)
{
uint32_t srcVertexStride = numAttribs * sizeof(float) * 4;
}
auto vGatherOffsets = SIMD_T::load_si((Integer<SIMD_T>*)&gatherOffsets[0]);
- uint32_t numSimd = AlignUp(numVerts, SimdWidth) / SimdWidth;
+ uint32_t numSimd = AlignUp(numVerts, SimdWidth) / SimdWidth;
uint32_t remainingVerts = numVerts;
for (uint32_t s = 0; s < numSimd; ++s)
// Compute mask to prevent src overflow
uint32_t mask = std::min(remainingVerts, SimdWidth);
- mask = GenMask(mask);
- auto vMask = SIMD_T::vmask_ps(mask);
- auto viMask = SIMD_T::castps_si(vMask);
+ mask = GenMask(mask);
+ auto vMask = SIMD_T::vmask_ps(mask);
+ auto viMask = SIMD_T::castps_si(vMask);
for (uint32_t a = 0; a < numAttribs; ++a)
{
- auto attribGatherX = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask);
- auto attribGatherY = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float)), vGatherOffsets, vMask);
- auto attribGatherZ = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 2), vGatherOffsets, vMask);
- auto attribGatherW = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 3), vGatherOffsets, vMask);
+ auto attribGatherX = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask);
+ auto attribGatherY = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD_T::setzero_ps(),
+ (const float*)(pSrcBase + sizeof(float)),
+ vGatherOffsets,
+ vMask);
+ auto attribGatherZ = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD_T::setzero_ps(),
+ (const float*)(pSrcBase + sizeof(float) * 2),
+ vGatherOffsets,
+ vMask);
+ auto attribGatherW = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+ SIMD_T::setzero_ps(),
+ (const float*)(pSrcBase + sizeof(float) * 3),
+ vGatherOffsets,
+ vMask);
SIMD_T::maskstore_ps((float*)pDstBase, viMask, attribGatherX);
SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>)), viMask, attribGatherY);
- SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>) * 2), viMask, attribGatherZ);
- SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>) * 3), viMask, attribGatherW);
+ SIMD_T::maskstore_ps(
+ (float*)(pDstBase + sizeof(Float<SIMD_T>) * 2), viMask, attribGatherZ);
+ SIMD_T::maskstore_ps(
+ (float*)(pDstBase + sizeof(Float<SIMD_T>) * 3), viMask, attribGatherW);
pSrcBase += sizeof(float) * 4;
pDstBase += sizeof(Float<SIMD_T>) * 4;
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pa - The primitive assembly object.
/// @param pGsOut - output stream for GS
-template <
- typename HasStreamOutT,
- typename HasRastT>
-static void GeometryShaderStage(
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- PA_STATE& pa,
- GsBuffers* pGsBuffers,
- uint32_t* pSoPrimData,
+template <typename HasStreamOutT, typename HasRastT>
+static void GeometryShaderStage(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ PA_STATE& pa,
+ GsBuffers* pGsBuffers,
+ uint32_t* pSoPrimData,
#if USE_SIMD16_FRONTEND
- uint32_t numPrims_simd8,
+ uint32_t numPrims_simd8,
#endif
- simdscalari const &primID)
+ simdscalari const& primID)
{
RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
- const API_STATE& state = GetApiState(pDC);
+ const API_STATE& state = GetApiState(pDC);
const SWR_GS_STATE* pState = &state.gsState;
- SWR_GS_CONTEXT gsContext;
+ SWR_GS_CONTEXT gsContext;
- static uint8_t sNullBuffer[128] = { 0 };
+ static uint8_t sNullBuffer[128] = {0};
for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
{
gsContext.pStreams[i] = pGsBuffers->pGsOut[i];
}
- gsContext.pVerts = (simdvector*)pGsBuffers->pGsIn;
+ gsContext.pVerts = (simdvector*)pGsBuffers->pGsIn;
gsContext.PrimitiveID = primID;
- uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
+ uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
simdvector attrib[MAX_NUM_VERTS_PER_PRIM];
// assemble all attributes for the input primitive
for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot)
{
uint32_t srcAttribSlot = pState->srcVertexAttribOffset + slot;
- uint32_t attribSlot = pState->vertexAttribOffset + slot;
+ uint32_t attribSlot = pState->vertexAttribOffset + slot;
pa.Assemble(srcAttribSlot, attrib);
for (uint32_t i = 0; i < numVertsPerPrim; ++i)
#if USE_SIMD16_FRONTEND
uint32_t numInputPrims = numPrims_simd8;
#else
- uint32_t numInputPrims = pa.NumPrims();
+ uint32_t numInputPrims = pa.NumPrims();
#endif
for (uint32_t instance = 0; instance < pState->instanceCount; ++instance)
{
gsContext.InstanceID = instance;
- gsContext.mask = GenerateMask(numInputPrims);
+ gsContext.mask = GenerateMask(numInputPrims);
// execute the geometry shader
state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
{
switch (pState->outputTopology)
{
- case TOP_RECT_LIST: pfnClipFunc = ClipRectangles_simd16; break;
- case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles_simd16; break;
- case TOP_LINE_STRIP: pfnClipFunc = ClipLines_simd16; break;
- case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break;
- default: SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
+ case TOP_RECT_LIST:
+ pfnClipFunc = ClipRectangles_simd16;
+ break;
+ case TOP_TRIANGLE_STRIP:
+ pfnClipFunc = ClipTriangles_simd16;
+ break;
+ case TOP_LINE_STRIP:
+ pfnClipFunc = ClipLines_simd16;
+ break;
+ case TOP_POINT_LIST:
+ pfnClipFunc = ClipPoints_simd16;
+ break;
+ default:
+ SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
}
}
#else
- PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
+ PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
if (HasRastT::value)
{
switch (pState->outputTopology)
{
- case TOP_RECT_LIST: pfnClipFunc = ClipRectangles; break;
- case TOP_TRIANGLE_STRIP: pfnClipFunc = ClipTriangles; break;
- case TOP_LINE_STRIP: pfnClipFunc = ClipLines; break;
- case TOP_POINT_LIST: pfnClipFunc = ClipPoints; break;
- default: SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
+ case TOP_RECT_LIST:
+ pfnClipFunc = ClipRectangles;
+ break;
+ case TOP_TRIANGLE_STRIP:
+ pfnClipFunc = ClipTriangles;
+ break;
+ case TOP_LINE_STRIP:
+ pfnClipFunc = ClipLines;
+ break;
+ case TOP_POINT_LIST:
+ pfnClipFunc = ClipPoints;
+ break;
+ default:
+ SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
}
}
}
uint8_t* pBase = pInstanceBase + instance * pState->allocationSize;
- uint8_t* pCutBase = pState->controlDataSize == 0 ? &sNullBuffer[0] : pBase + pState->controlDataOffset;
+ uint8_t* pCutBase =
+ pState->controlDataSize == 0 ? &sNullBuffer[0] : pBase + pState->controlDataOffset;
uint8_t* pVertexBaseAOS = pBase + pState->outputVertexOffset;
#if USE_SIMD16_FRONTEND
- TransposeSOAtoAOS<SIMD512, KNOB_SIMD16_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed, pVertexBaseAOS, vertexCount, pState->outputVertexSize);
+ TransposeSOAtoAOS<SIMD512, KNOB_SIMD16_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed,
+ pVertexBaseAOS,
+ vertexCount,
+ pState->outputVertexSize);
#else
- TransposeSOAtoAOS<SIMD256, KNOB_SIMD_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed, pVertexBaseAOS, vertexCount, pState->outputVertexSize);
+ TransposeSOAtoAOS<SIMD256, KNOB_SIMD_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed,
+ pVertexBaseAOS,
+ vertexCount,
+ pState->outputVertexSize);
#endif
uint32_t numAttribs = state.feNumAttributes;
for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
{
- bool processCutVerts = false;
- uint8_t* pCutBuffer = pCutBase;
+ bool processCutVerts = false;
+ uint8_t* pCutBuffer = pCutBase;
// assign default stream ID, only relevant when GS is outputting a single stream
uint32_t streamID = 0;
if (pState->isSingleStream)
{
processCutVerts = true;
- streamID = pState->singleStreamID;
- if (streamID != stream) continue;
+ streamID = pState->singleStreamID;
+ if (streamID != stream)
+ continue;
}
else
{
}
// multi-stream output, need to translate StreamID buffer to a cut buffer
- ProcessStreamIdBuffer(stream, pCutBase, numEmittedVerts, (uint8_t*)pGsBuffers->pStreamCutBuffer);
- pCutBuffer = (uint8_t*)pGsBuffers->pStreamCutBuffer;
+ ProcessStreamIdBuffer(
+ stream, pCutBase, numEmittedVerts, (uint8_t*)pGsBuffers->pStreamCutBuffer);
+ pCutBuffer = (uint8_t*)pGsBuffers->pStreamCutBuffer;
processCutVerts = false;
}
#if USE_SIMD16_FRONTEND
- PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts, pa.numVertsPerPrim);
+ PA_STATE_CUT gsPa(pDC,
+ (uint8_t*)pGsBuffers->pGsTransposed,
+ numEmittedVerts,
+ pState->outputVertexSize,
+ reinterpret_cast<simd16mask*>(pCutBuffer),
+ numEmittedVerts,
+ numAttribs,
+ pState->outputTopology,
+ processCutVerts,
+ pa.numVertsPerPrim);
#else
- PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts, pa.numVertsPerPrim);
+ PA_STATE_CUT gsPa(pDC,
+ (uint8_t*)pGsBuffers->pGsTransposed,
+ numEmittedVerts,
+ pState->outputVertexSize,
+ pCutBuffer,
+ numEmittedVerts,
+ numAttribs,
+ pState->outputTopology,
+ processCutVerts,
+ pa.numVertsPerPrim);
#endif
while (gsPa.GetNextStreamOutput())
// Gather data from the SVG if provided.
simd16scalari vViewportIdx = SIMD16::setzero_si();
- simd16scalari vRtIdx = SIMD16::setzero_si();
- SIMD16::Vec4 svgAttrib[4];
+ simd16scalari vRtIdx = SIMD16::setzero_si();
+ SIMD16::Vec4 svgAttrib[4];
- if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ if (state.backendState.readViewportArrayIndex ||
+ state.backendState.readRenderTargetArrayIndex)
{
gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
}
-
if (state.backendState.readViewportArrayIndex)
{
- vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx =
+ SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
gsPa.viewportArrayActive = true;
}
if (state.backendState.readRenderTargetArrayIndex)
{
// OOB VPAI indices => forced to zero.
- vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
- simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
+ vViewportIdx =
+ SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
+ simd16scalari vNumViewports =
+ SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask =
+ SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
gsPa.useAlternateOffset = false;
- pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
+ pfnClipFunc(pDC,
+ gsPa,
+ workerId,
+ attrib_simd16,
+ GenMask(gsPa.NumPrims()),
+ vPrimId,
+ vViewportIdx,
+ vRtIdx);
}
#else
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
// Gather data from the SVG if provided.
simdscalari vViewportIdx = SIMD::setzero_si();
- simdscalari vRtIdx = SIMD::setzero_si();
- SIMD::Vec4 svgAttrib[4];
+ simdscalari vRtIdx = SIMD::setzero_si();
+ SIMD::Vec4 svgAttrib[4];
- if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ if (state.backendState.readViewportArrayIndex ||
+ state.backendState.readRenderTargetArrayIndex)
{
gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
}
-
if (state.backendState.readViewportArrayIndex)
{
- vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx =
+ SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB VPAI indices => forced to zero.
- vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
- simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
+ vViewportIdx =
+ SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
+ simdscalari vNumViewports =
+ SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask =
+ SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
gsPa.viewportArrayActive = true;
}
gsPa.rtArrayActive = true;
}
- pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
+ pfnClipFunc(pDC,
+ gsPa,
+ workerId,
+ attrib,
+ GenMask(gsPa.NumPrims()),
+ vPrimId,
+ vViewportIdx,
+ vRtIdx);
#endif
}
}
// update GS pipeline stats
UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
- AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims));
+ AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim * numInputPrims));
RDTSC_END(FEGeometryShader, 1);
}
/// @param state - API state
/// @param ppGsOut - pointer to GS output buffer allocation
/// @param ppCutBuffer - pointer to GS output cut buffer allocation
-template<typename SIMD_T, uint32_t SIMD_WIDTH>
-static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state, uint32_t vertsPerPrim, GsBuffers* pGsBuffers)
+template <typename SIMD_T, uint32_t SIMD_WIDTH>
+static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC,
+ const API_STATE& state,
+ uint32_t vertsPerPrim,
+ GsBuffers* pGsBuffers)
{
auto pArena = pDC->pArena;
SWR_ASSERT(pArena != nullptr);
// Allocate storage for vertex inputs
uint32_t vertexInBufferSize = gsState.inputVertStride * sizeof(simdvector) * vertsPerPrim;
- pGsBuffers->pGsIn = (uint8_t*)pArena->AllocAligned(vertexInBufferSize, 32);
+ pGsBuffers->pGsIn = (uint8_t*)pArena->AllocAligned(vertexInBufferSize, 32);
// Allocate arena space to hold GS output verts
const uint32_t vertexBufferSize = gsState.instanceCount * gsState.allocationSize;
// Allocate storage for transposed GS output
uint32_t numSimdBatches = AlignUp(gsState.maxNumVerts, SIMD_WIDTH) / SIMD_WIDTH;
- uint32_t transposedBufferSize = numSimdBatches * gsState.outputVertexSize * sizeof(Vec4<SIMD_T>);
+ uint32_t transposedBufferSize =
+ numSimdBatches * gsState.outputVertexSize * sizeof(Vec4<SIMD_T>);
pGsBuffers->pGsTransposed = (uint8_t*)pArena->AllocAligned(transposedBufferSize, 32);
// Allocate storage to hold temporary stream->cut buffer, if necessary
}
else
{
- pGsBuffers->pStreamCutBuffer = (uint8_t*)pArena->AllocAligned(AlignUp(gsState.maxNumVerts * 2, 32), 32);
+ pGsBuffers->pStreamCutBuffer =
+ (uint8_t*)pArena->AllocAligned(AlignUp(gsState.maxNumVerts * 2, 32), 32);
}
}
struct TessellationThreadLocalData
{
SWR_HS_CONTEXT hsContext;
- ScalarPatch patchData[KNOB_SIMD_WIDTH];
- void* pTxCtx;
- size_t tsCtxSize;
+ ScalarPatch patchData[KNOB_SIMD_WIDTH];
+ void* pTxCtx;
+ size_t tsCtxSize;
simdscalar* pDSOutput;
- size_t dsOutputAllocSize;
+ size_t dsOutputAllocSize;
};
THREAD TessellationThreadLocalData* gt_pTessellationThreadData = nullptr;
/// @TODO - Don't use thread local storage. Use Worker local storage instead.
if (gt_pTessellationThreadData == nullptr)
{
- gt_pTessellationThreadData = (TessellationThreadLocalData*)
- AlignedMalloc(sizeof(TessellationThreadLocalData), 64);
+ gt_pTessellationThreadData =
+ (TessellationThreadLocalData*)AlignedMalloc(sizeof(TessellationThreadLocalData), 64);
memset(gt_pTessellationThreadData, 0, sizeof(*gt_pTessellationThreadData));
}
}
/// @param workerId - thread's worker id. Even thread has a unique id.
/// @param pa - The primitive assembly object.
/// @param pGsOut - output stream for GS
-template <
- typename HasGeometryShaderT,
- typename HasStreamOutT,
- typename HasRastT>
-static void TessellationStages(
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- PA_STATE& pa,
- GsBuffers* pGsBuffers,
- uint32_t* pSoPrimData,
+template <typename HasGeometryShaderT, typename HasStreamOutT, typename HasRastT>
+static void TessellationStages(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ PA_STATE& pa,
+ GsBuffers* pGsBuffers,
+ uint32_t* pSoPrimData,
#if USE_SIMD16_FRONTEND
- uint32_t numPrims_simd8,
+ uint32_t numPrims_simd8,
#endif
- simdscalari const &primID)
+ simdscalari const& primID)
{
- const API_STATE& state = GetApiState(pDC);
+ const API_STATE& state = GetApiState(pDC);
const SWR_TS_STATE& tsState = state.tsState;
void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
SWR_ASSERT(gt_pTessellationThreadData);
- HANDLE tsCtx = TSInitCtx(
- tsState.domain,
- tsState.partitioning,
- tsState.tsOutputTopology,
- gt_pTessellationThreadData->pTxCtx,
- gt_pTessellationThreadData->tsCtxSize);
+ HANDLE tsCtx = TSInitCtx(tsState.domain,
+ tsState.partitioning,
+ tsState.tsOutputTopology,
+ gt_pTessellationThreadData->pTxCtx,
+ gt_pTessellationThreadData->tsCtxSize);
if (tsCtx == nullptr)
{
- gt_pTessellationThreadData->pTxCtx = AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64);
- tsCtx = TSInitCtx(
- tsState.domain,
- tsState.partitioning,
- tsState.tsOutputTopology,
- gt_pTessellationThreadData->pTxCtx,
- gt_pTessellationThreadData->tsCtxSize);
+ gt_pTessellationThreadData->pTxCtx =
+ AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64);
+ tsCtx = TSInitCtx(tsState.domain,
+ tsState.partitioning,
+ tsState.tsOutputTopology,
+ gt_pTessellationThreadData->pTxCtx,
+ gt_pTessellationThreadData->tsCtxSize);
}
SWR_ASSERT(tsCtx);
{
switch (tsState.postDSTopology)
{
- case TOP_TRIANGLE_LIST: pfnClipFunc = ClipTriangles_simd16; break;
- case TOP_LINE_LIST: pfnClipFunc = ClipLines_simd16; break;
- case TOP_POINT_LIST: pfnClipFunc = ClipPoints_simd16; break;
- default: SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
+ case TOP_TRIANGLE_LIST:
+ pfnClipFunc = ClipTriangles_simd16;
+ break;
+ case TOP_LINE_LIST:
+ pfnClipFunc = ClipLines_simd16;
+ break;
+ case TOP_POINT_LIST:
+ pfnClipFunc = ClipPoints_simd16;
+ break;
+ default:
+ SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
}
}
{
switch (tsState.postDSTopology)
{
- case TOP_TRIANGLE_LIST: pfnClipFunc = ClipTriangles; break;
- case TOP_LINE_LIST: pfnClipFunc = ClipLines; break;
- case TOP_POINT_LIST: pfnClipFunc = ClipPoints; break;
- default: SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
+ case TOP_TRIANGLE_LIST:
+ pfnClipFunc = ClipTriangles;
+ break;
+ case TOP_LINE_LIST:
+ pfnClipFunc = ClipLines;
+ break;
+ case TOP_POINT_LIST:
+ pfnClipFunc = ClipPoints;
+ break;
+ default:
+ SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
}
}
#endif
SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
- hsContext.pCPout = gt_pTessellationThreadData->patchData;
- hsContext.PrimitiveID = primID;
+ hsContext.pCPout = gt_pTessellationThreadData->patchData;
+ hsContext.PrimitiveID = primID;
uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
// Max storage for one attribute for an entire simdprimitive
for (uint32_t p = 0; p < numPrims; ++p)
{
// Run Tessellator
- SWR_TS_TESSELLATED_DATA tsData = { 0 };
+ SWR_TS_TESSELLATED_DATA tsData = {0};
RDTSC_BEGIN(FETessellation, pDC->drawId);
TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
AR_EVENT(TessPrimCount(1));
SWR_ASSERT(tsData.NumDomainPoints);
// Allocate DS Output memory
- uint32_t requiredDSVectorInvocations = AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH;
+ uint32_t requiredDSVectorInvocations =
+ AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH;
#if USE_SIMD16_FRONTEND
- size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) * tsState.dsAllocationSize; // simd8 -> simd16, padding
+ size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) *
+ tsState.dsAllocationSize; // simd8 -> simd16, padding
#else
size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.dsAllocationSize;
- size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors;
+ size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors;
#endif
if (requiredAllocSize > gt_pTessellationThreadData->dsOutputAllocSize)
{
AlignedFree(gt_pTessellationThreadData->pDSOutput);
- gt_pTessellationThreadData->pDSOutput = (simdscalar*)AlignedMalloc(requiredAllocSize, 64);
+ gt_pTessellationThreadData->pDSOutput =
+ (simdscalar*)AlignedMalloc(requiredAllocSize, 64);
gt_pTessellationThreadData->dsOutputAllocSize = requiredAllocSize;
}
SWR_ASSERT(gt_pTessellationThreadData->pDSOutput);
// Run Domain Shader
SWR_DS_CONTEXT dsContext;
- dsContext.PrimitiveID = pPrimId[p];
- dsContext.pCpIn = &hsContext.pCPout[p];
- dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU;
- dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV;
- dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput;
+ dsContext.PrimitiveID = pPrimId[p];
+ dsContext.pCpIn = &hsContext.pCPout[p];
+ dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU;
+ dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV;
+ dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput;
dsContext.outVertexAttribOffset = tsState.dsOutVtxAttribOffset;
#if USE_SIMD16_FRONTEND
- dsContext.vectorStride = RoundUpEven(requiredDSVectorInvocations); // simd8 -> simd16
+ dsContext.vectorStride = RoundUpEven(requiredDSVectorInvocations); // simd8 -> simd16
#else
- dsContext.vectorStride = requiredDSVectorInvocations;
+ dsContext.vectorStride = requiredDSVectorInvocations;
#endif
uint32_t dsInvocations = 0;
- for (dsContext.vectorOffset = 0; dsContext.vectorOffset < requiredDSVectorInvocations; ++dsContext.vectorOffset)
+ for (dsContext.vectorOffset = 0; dsContext.vectorOffset < requiredDSVectorInvocations;
+ ++dsContext.vectorOffset)
{
dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints);
#if USE_SIMD16_FRONTEND
- SWR_ASSERT(IsEven(dsContext.vectorStride)); // simd8 -> simd16
+ SWR_ASSERT(IsEven(dsContext.vectorStride)); // simd8 -> simd16
#endif
PA_TESS tessPa(
pDC,
#if USE_SIMD16_FRONTEND
- reinterpret_cast<const simd16scalar *>(dsContext.pOutputData), // simd8 -> simd16
- dsContext.vectorStride / 2, // simd8 -> simd16
+ reinterpret_cast<const simd16scalar*>(dsContext.pOutputData), // simd8 -> simd16
+ dsContext.vectorStride / 2, // simd8 -> simd16
#else
dsContext.pOutputData,
dsContext.vectorStride,
while (tessPa.HasWork())
{
#if USE_SIMD16_FRONTEND
- const uint32_t numPrims = tessPa.NumPrims();
+ const uint32_t numPrims = tessPa.NumPrims();
const uint32_t numPrims_lo = std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
- const uint32_t numPrims_hi = std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
+ const uint32_t numPrims_hi =
+ std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
- const simd16scalari primID = _simd16_set1_epi32(dsContext.PrimitiveID);
- const simdscalari primID_lo = _simd16_extract_si(primID, 0);
- const simdscalari primID_hi = _simd16_extract_si(primID, 1);
+ const simd16scalari primID = _simd16_set1_epi32(dsContext.PrimitiveID);
+ const simdscalari primID_lo = _simd16_extract_si(primID, 0);
+ const simdscalari primID_hi = _simd16_extract_si(primID, 1);
#endif
if (HasGeometryShaderT::value)
{
#if USE_SIMD16_FRONTEND
tessPa.useAlternateOffset = false;
- GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_lo, primID_lo);
+ GeometryShaderStage<HasStreamOutT, HasRastT>(
+ pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_lo, primID_lo);
if (numPrims_hi)
{
tessPa.useAlternateOffset = true;
- GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_hi, primID_hi);
+ GeometryShaderStage<HasStreamOutT, HasRastT>(
+ pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_hi, primID_hi);
}
#else
GeometryShaderStage<HasStreamOutT, HasRastT>(
- pDC, workerId, tessPa, pGsBuffers, pSoPrimData, _simd_set1_epi32(dsContext.PrimitiveID));
+ pDC,
+ workerId,
+ tessPa,
+ pGsBuffers,
+ pSoPrimData,
+ _simd_set1_epi32(dsContext.PrimitiveID));
#endif
}
else
if (HasRastT::value)
{
#if USE_SIMD16_FRONTEND
- simd16vector prim_simd16[3]; // Only deal with triangles, lines, or points
+ simd16vector prim_simd16[3]; // Only deal with triangles, lines, or points
#else
- simdvector prim[3]; // Only deal with triangles, lines, or points
+ simdvector prim[3]; // Only deal with triangles, lines, or points
#endif
RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
bool assemble =
#if USE_SIMD16_FRONTEND
// Gather data from the SVG if provided.
simd16scalari vViewportIdx = SIMD16::setzero_si();
- simd16scalari vRtIdx = SIMD16::setzero_si();
- SIMD16::Vec4 svgAttrib[4];
+ simd16scalari vRtIdx = SIMD16::setzero_si();
+ SIMD16::Vec4 svgAttrib[4];
- if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ if (state.backendState.readViewportArrayIndex ||
+ state.backendState.readRenderTargetArrayIndex)
{
tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
}
-
if (state.backendState.readViewportArrayIndex)
{
vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
{
// OOB VPAI indices => forced to zero.
vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
- simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vNumViewports =
+ SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
- vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
+ vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
tessPa.useAlternateOffset = false;
- pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vViewportIdx, vRtIdx);
+ pfnClipFunc(pDC,
+ tessPa,
+ workerId,
+ prim_simd16,
+ GenMask(numPrims),
+ primID,
+ vViewportIdx,
+ vRtIdx);
}
#else
// Gather data from the SGV if provided.
simdscalari vViewportIdx = SIMD::setzero_si();
- simdscalari vRtIdx = SIMD::setzero_si();
- SIMD::Vec4 svgAttrib[4];
+ simdscalari vRtIdx = SIMD::setzero_si();
+ SIMD::Vec4 svgAttrib[4];
- if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ if (state.backendState.readViewportArrayIndex ||
+ state.backendState.readRenderTargetArrayIndex)
{
tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
}
// OOB VPAI indices => forced to zero.
vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
- simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
- vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
+ simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
+ vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
tessPa.viewportArrayActive = true;
}
if (state.backendState.readRenderTargetArrayIndex)
{
- vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
tessPa.rtArrayActive = true;
}
- pfnClipFunc(pDC, tessPa, workerId, prim,
- GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx, vRtIdx);
+ pfnClipFunc(pDC,
+ tessPa,
+ workerId,
+ prim,
+ GenMask(tessPa.NumPrims()),
+ _simd_set1_epi32(dsContext.PrimitiveID),
+ vViewportIdx,
+ vRtIdx);
#endif
}
}
tessPa.NextPrim();
} // while (tessPa.HasWork())
- } // for (uint32_t p = 0; p < numPrims; ++p)
+ } // for (uint32_t p = 0; p < numPrims; ++p)
#if USE_SIMD16_FRONTEND
if (gt_pTessellationThreadData->pDSOutput != nullptr)
TSDestroyCtx(tsCtx);
}
-THREAD PA_STATE::SIMDVERTEX *gpVertexStore = nullptr;
-THREAD uint32_t gVertexStoreSize = 0;
+THREAD PA_STATE::SIMDVERTEX* gpVertexStore = nullptr;
+THREAD uint32_t gVertexStoreSize = 0;
//////////////////////////////////////////////////////////////////////////
/// @brief FE handler for SwrDraw.
/// @param pDC - pointer to draw context.
/// @param workerId - thread's worker id.
/// @param pUserData - Pointer to DRAW_WORK
-template <
- typename IsIndexedT,
- typename IsCutIndexEnabledT,
- typename HasTessellationT,
- typename HasGeometryShaderT,
- typename HasStreamOutT,
- typename HasRastT>
-void ProcessDraw(
- SWR_CONTEXT *pContext,
- DRAW_CONTEXT *pDC,
- uint32_t workerId,
- void *pUserData)
+template <typename IsIndexedT,
+ typename IsCutIndexEnabledT,
+ typename HasTessellationT,
+ typename HasGeometryShaderT,
+ typename HasStreamOutT,
+ typename HasRastT>
+void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
{
-
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_QUEUE_FE)
{
void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
- DRAW_WORK& work = *(DRAW_WORK*)pUserData;
- const API_STATE& state = GetApiState(pDC);
+ DRAW_WORK& work = *(DRAW_WORK*)pUserData;
+ const API_STATE& state = GetApiState(pDC);
uint32_t indexSize = 0;
uint32_t endVertex = work.numVerts;
if (HasGeometryShaderT::value)
{
#if USE_SIMD16_FRONTEND
- AllocateGsBuffers<SIMD512, KNOB_SIMD16_WIDTH>(pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
+ AllocateGsBuffers<SIMD512, KNOB_SIMD16_WIDTH>(
+ pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
#else
- AllocateGsBuffers<SIMD256, KNOB_SIMD_WIDTH>(pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
+ AllocateGsBuffers<SIMD256, KNOB_SIMD_WIDTH>(
+ pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
#endif
}
#if USE_SIMD16_FRONTEND
uint32_t simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simd16vector);
#else
- uint32_t simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simdvector);
+ uint32_t simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simdvector);
#endif
SWR_ASSERT(vertexCount <= MAX_NUM_VERTS_PER_PRIM);
// Compute storage requirements for vertex store
// TODO: allocation needs to be rethought for better cut support
- uint32_t numVerts = vertexCount + 2; // Need extra space for PA state machine
+ uint32_t numVerts = vertexCount + 2; // Need extra space for PA state machine
uint32_t vertexStoreSize = numVerts * simdVertexSizeBytes;
// grow the vertex store for the PA as necessary
SWR_ASSERT(gpVertexStore == nullptr);
- gpVertexStore = reinterpret_cast<PA_STATE::SIMDVERTEX *>(AlignedMalloc(vertexStoreSize, 64));
+ gpVertexStore = reinterpret_cast<PA_STATE::SIMDVERTEX*>(AlignedMalloc(vertexStoreSize, 64));
gVertexStoreSize = vertexStoreSize;
SWR_ASSERT(gpVertexStore != nullptr);
}
// choose primitive assembler
-
- PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC, state.topology, work.numVerts, gpVertexStore, numVerts, state.frontendState.vsVertexSize, GetNumVerts(state.topology, 1));
- PA_STATE& pa = paFactory.GetPA();
+
+ PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC,
+ state.topology,
+ work.numVerts,
+ gpVertexStore,
+ numVerts,
+ state.frontendState.vsVertexSize,
+ GetNumVerts(state.topology, 1));
+ PA_STATE& pa = paFactory.GetPA();
#if USE_SIMD16_FRONTEND
#if USE_SIMD16_SHADERS
- simd16vertex vin;
+ simd16vertex vin;
#else
- simdvertex vin_lo;
- simdvertex vin_hi;
+ simdvertex vin_lo;
+ simdvertex vin_hi;
#endif
- SWR_VS_CONTEXT vsContext_lo;
- SWR_VS_CONTEXT vsContext_hi;
+ SWR_VS_CONTEXT vsContext_lo;
+ SWR_VS_CONTEXT vsContext_hi;
#if USE_SIMD16_SHADERS
- vsContext_lo.pVin = reinterpret_cast<simdvertex *>(&vin);
- vsContext_hi.pVin = reinterpret_cast<simdvertex *>(&vin);
+ vsContext_lo.pVin = reinterpret_cast<simdvertex*>(&vin);
+ vsContext_hi.pVin = reinterpret_cast<simdvertex*>(&vin);
#else
vsContext_lo.pVin = &vin_lo;
vsContext_hi.pVin = &vin_hi;
vsContext_lo.AlternateOffset = 0;
vsContext_hi.AlternateOffset = 1;
- SWR_FETCH_CONTEXT fetchInfo_lo = { 0 };
+ SWR_FETCH_CONTEXT fetchInfo_lo = {0};
- fetchInfo_lo.pStreams = &state.vertexBuffers[0];
+ fetchInfo_lo.pStreams = &state.vertexBuffers[0];
fetchInfo_lo.StartInstance = work.startInstance;
- fetchInfo_lo.StartVertex = 0;
+ fetchInfo_lo.StartVertex = 0;
if (IsIndexedT::value)
{
fetchInfo_lo.StartVertex = work.startVertex;
}
- SWR_FETCH_CONTEXT fetchInfo_hi = fetchInfo_lo;
+ SWR_FETCH_CONTEXT fetchInfo_hi = fetchInfo_lo;
- const simd16scalari vScale = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ const simd16scalari vScale =
+ _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
for (uint32_t instanceNum = 0; instanceNum < work.numInstances; instanceNum++)
{
- uint32_t i = 0;
+ uint32_t i = 0;
simd16scalari vIndex;
if (IsIndexedT::value)
{
fetchInfo_lo.xpIndices = work.xpIB;
- fetchInfo_hi.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize; // 1/2 of KNOB_SIMD16_WIDTH
+ fetchInfo_hi.xpIndices =
+ fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize; // 1/2 of KNOB_SIMD16_WIDTH
}
else
{
vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
fetchInfo_lo.xpIndices = (gfxptr_t)&vIndex;
- fetchInfo_hi.xpIndices = (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH
+ fetchInfo_hi.xpIndices =
+ (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH
}
fetchInfo_lo.CurInstance = instanceNum;
while (pa.HasWork())
{
- // GetNextVsOutput currently has the side effect of updating some PA state machine state.
- // So we need to keep this outside of (i < endVertex) check.
+ // GetNextVsOutput currently has the side effect of updating some PA state machine
+ // state. So we need to keep this outside of (i < endVertex) check.
- simdmask *pvCutIndices_lo = nullptr;
- simdmask *pvCutIndices_hi = nullptr;
+ simdmask* pvCutIndices_lo = nullptr;
+ simdmask* pvCutIndices_hi = nullptr;
if (IsIndexedT::value)
{
// simd16mask <=> simdmask[2]
- pvCutIndices_lo = &reinterpret_cast<simdmask *>(&pa.GetNextVsIndices())[0];
- pvCutIndices_hi = &reinterpret_cast<simdmask *>(&pa.GetNextVsIndices())[1];
+ pvCutIndices_lo = &reinterpret_cast<simdmask*>(&pa.GetNextVsIndices())[0];
+ pvCutIndices_hi = &reinterpret_cast<simdmask*>(&pa.GetNextVsIndices())[1];
}
- simd16vertex &vout = pa.GetNextVsOutput();
+ simd16vertex& vout = pa.GetNextVsOutput();
- vsContext_lo.pVout = reinterpret_cast<simdvertex *>(&vout);
- vsContext_hi.pVout = reinterpret_cast<simdvertex *>(&vout);
+ vsContext_lo.pVout = reinterpret_cast<simdvertex*>(&vout);
+ vsContext_hi.pVout = reinterpret_cast<simdvertex*>(&vout);
if (i < endVertex)
{
{
fetchInfo_lo.xpLastIndex = fetchInfo_lo.xpIndices;
uint32_t offset;
- offset = std::min(endVertex-i, (uint32_t) KNOB_SIMD16_WIDTH);
+ offset = std::min(endVertex - i, (uint32_t)KNOB_SIMD16_WIDTH);
offset *= 4; // convert from index to address
#if USE_SIMD16_SHADERS
fetchInfo_lo.xpLastIndex += offset;
#else
- fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH);
- uint32_t offset2 = std::min(offset, (uint32_t) KNOB_SIMD16_WIDTH)-KNOB_SIMD_WIDTH;
+ fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t)KNOB_SIMD_WIDTH);
+ uint32_t offset2 =
+ std::min(offset, (uint32_t)KNOB_SIMD16_WIDTH) - KNOB_SIMD_WIDTH;
assert(offset >= 0);
fetchInfo_hi.xpLastIndex = fetchInfo_hi.xpIndices;
fetchInfo_hi.xpLastIndex += offset2;
#else
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin_lo);
- if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
+ if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
}
// forward fetch generated vertex IDs to the vertex shader
#if USE_SIMD16_SHADERS
#if USE_SIMD16_VS
- vsContext_lo.VertexID16 = _simd16_insert_si(
- vsContext_lo.VertexID16, fetchInfo_lo.VertexID, 0);
- vsContext_lo.VertexID16 = _simd16_insert_si(
- vsContext_lo.VertexID16, fetchInfo_lo.VertexID2, 1);
+ vsContext_lo.VertexID16 =
+ _simd16_insert_si(vsContext_lo.VertexID16, fetchInfo_lo.VertexID, 0);
+ vsContext_lo.VertexID16 =
+ _simd16_insert_si(vsContext_lo.VertexID16, fetchInfo_lo.VertexID2, 1);
#else
vsContext_lo.VertexID = fetchInfo_lo.VertexID;
vsContext_hi.VertexID = fetchInfo_lo.VertexID2;
#if USE_SIMD16_VS
vsContext_lo.mask16 = GenerateMask16(endVertex - i);
#else
- vsContext_lo.mask = GenerateMask(endVertex - i);
- vsContext_hi.mask = GenerateMask(endVertex - (i + KNOB_SIMD_WIDTH));
+ vsContext_lo.mask = GenerateMask(endVertex - i);
+ vsContext_hi.mask = GenerateMask(endVertex - (i + KNOB_SIMD_WIDTH));
#endif
// forward cut mask to the PA
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
- if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
+ if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
{
state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
UPDATE_STAT_FE(IaPrimitives, pa.NumPrims());
const uint32_t numPrims = pa.NumPrims();
- const uint32_t numPrims_lo = std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
- const uint32_t numPrims_hi = std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
+ const uint32_t numPrims_lo =
+ std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
+ const uint32_t numPrims_hi =
+ std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
- const simd16scalari primID = pa.GetPrimID(work.startPrimID);
- const simdscalari primID_lo = _simd16_extract_si(primID, 0);
- const simdscalari primID_hi = _simd16_extract_si(primID, 1);
+ const simd16scalari primID = pa.GetPrimID(work.startPrimID);
+ const simdscalari primID_lo = _simd16_extract_si(primID, 0);
+ const simdscalari primID_hi = _simd16_extract_si(primID, 1);
if (HasTessellationT::value)
{
pa.useAlternateOffset = false;
- TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_lo, primID_lo);
+ TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
+ pDC,
+ workerId,
+ pa,
+ &gsBuffers,
+ pSoPrimData,
+ numPrims_lo,
+ primID_lo);
if (numPrims_hi)
{
pa.useAlternateOffset = true;
- TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_hi, primID_hi);
+ TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
+ pDC,
+ workerId,
+ pa,
+ &gsBuffers,
+ pSoPrimData,
+ numPrims_hi,
+ primID_hi);
}
}
else if (HasGeometryShaderT::value)
{
pa.useAlternateOffset = false;
- GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_lo, primID_lo);
+ GeometryShaderStage<HasStreamOutT, HasRastT>(pDC,
+ workerId,
+ pa,
+ &gsBuffers,
+ pSoPrimData,
+ numPrims_lo,
+ primID_lo);
if (numPrims_hi)
{
pa.useAlternateOffset = true;
- GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_hi, primID_hi);
+ GeometryShaderStage<HasStreamOutT, HasRastT>(pDC,
+ workerId,
+ pa,
+ &gsBuffers,
+ pSoPrimData,
+ numPrims_hi,
+ primID_hi);
}
}
else
// Gather data from the SVG if provided.
simd16scalari vpai = SIMD16::setzero_si();
simd16scalari rtai = SIMD16::setzero_si();
- SIMD16::Vec4 svgAttrib[4];
+ SIMD16::Vec4 svgAttrib[4];
- if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ if (state.backendState.readViewportArrayIndex ||
+ state.backendState.readRenderTargetArrayIndex)
{
pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
}
-
if (state.backendState.readViewportArrayIndex)
{
vpai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
}
if (state.backendState.readRenderTargetArrayIndex)
{
- rtai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ rtai =
+ SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
pa.rtArrayActive = true;
}
{
// OOB VPAI indices => forced to zero.
vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
- simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
+ simd16scalari vNumViewports =
+ SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simd16scalari vClearMask =
+ SIMD16::cmplt_epi32(vpai, vNumViewports);
vpai = SIMD16::and_si(vClearMask, vpai);
pa.useAlternateOffset = false;
- pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai, rtai);
+ pDC->pState->pfnProcessPrims_simd16(pDC,
+ pa,
+ workerId,
+ prim_simd16,
+ GenMask(numPrims),
+ primID,
+ vpai,
+ rtai);
}
}
}
}
#else
- SWR_VS_CONTEXT vsContext;
- SWR_FETCH_CONTEXT fetchInfo = { 0 };
+ SWR_VS_CONTEXT vsContext;
+ SWR_FETCH_CONTEXT fetchInfo = {0};
- fetchInfo.pStreams = &state.vertexBuffers[0];
+ fetchInfo.pStreams = &state.vertexBuffers[0];
fetchInfo.StartInstance = work.startInstance;
- fetchInfo.StartVertex = 0;
+ fetchInfo.StartVertex = 0;
if (IsIndexedT::value)
{
// if the entire index buffer isn't being consumed, set the last index
// so that fetches < a SIMD wide will be masked off
- fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
+ fetchInfo.pLastIndex =
+ (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
if (xpLastRequestedIndex < fetchInfo.pLastIndex)
{
fetchInfo.pLastIndex = xpLastRequestedIndex;
fetchInfo.StartVertex = work.startVertex;
}
- const simdscalari vScale = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+ const simdscalari vScale = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
/// @todo: temporarily move instance loop in the FE to ensure SO ordering
for (uint32_t instanceNum = 0; instanceNum < work.numInstances; instanceNum++)
{
simdscalari vIndex;
- uint32_t i = 0;
+ uint32_t i = 0;
if (IsIndexedT::value)
{
}
else
{
- vIndex = _simd_add_epi32(_simd_set1_epi32(work.startVertexID), vScale);
+ vIndex = _simd_add_epi32(_simd_set1_epi32(work.startVertexID), vScale);
fetchInfo.pIndices = (const int32_t*)&vIndex;
}
fetchInfo.CurInstance = instanceNum;
- vsContext.InstanceID = instanceNum;
+ vsContext.InstanceID = instanceNum;
while (pa.HasWork())
{
- // GetNextVsOutput currently has the side effect of updating some PA state machine state.
- // So we need to keep this outside of (i < endVertex) check.
+ // GetNextVsOutput currently has the side effect of updating some PA state machine
+ // state. So we need to keep this outside of (i < endVertex) check.
simdmask* pvCutIndices = nullptr;
if (IsIndexedT::value)
{
}
simdvertex& vout = pa.GetNextVsOutput();
- vsContext.pVin = &vout;
- vsContext.pVout = &vout;
+ vsContext.pVin = &vout;
+ vsContext.pVout = &vout;
if (i < endVertex)
{
-
// 1. Execute FS/VS for a single SIMD.
RDTSC_BEGIN(FEFetchShader, pDC->drawId);
state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
if (HasTessellationT::value)
{
TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
- pDC, workerId, pa, &gsBuffers, pSoPrimData, pa.GetPrimID(work.startPrimID));
+ pDC,
+ workerId,
+ pa,
+ &gsBuffers,
+ pSoPrimData,
+ pa.GetPrimID(work.startPrimID));
}
else if (HasGeometryShaderT::value)
{
GeometryShaderStage<HasStreamOutT, HasRastT>(
- pDC, workerId, pa, &gsBuffers, pSoPrimData, pa.GetPrimID(work.startPrimID));
+ pDC,
+ workerId,
+ pa,
+ &gsBuffers,
+ pSoPrimData,
+ pa.GetPrimID(work.startPrimID));
}
else
{
// Gather data from the SVG if provided.
simdscalari vViewportIdx = SIMD::setzero_si();
- simdscalari vRtIdx = SIMD::setzero_si();
- SIMD::Vec4 svgAttrib[4];
+ simdscalari vRtIdx = SIMD::setzero_si();
+ SIMD::Vec4 svgAttrib[4];
- if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+ if (state.backendState.readViewportArrayIndex ||
+ state.backendState.readRenderTargetArrayIndex)
{
pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
}
if (state.backendState.readViewportArrayIndex)
{
- vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
+ vViewportIdx =
+ SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
// OOB VPAI indices => forced to zero.
- vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
- simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
- simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
+ vViewportIdx =
+ SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
+ simdscalari vNumViewports =
+ SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+ simdscalari vClearMask =
+ SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
pa.viewportArrayActive = true;
}
if (state.backendState.readRenderTargetArrayIndex)
{
- vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+ vRtIdx =
+ SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
pa.rtArrayActive = true;
}
- pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
- GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx, vRtIdx);
+ pDC->pState->pfnProcessPrims(pDC,
+ pa,
+ workerId,
+ prim,
+ GenMask(pa.NumPrims()),
+ pa.GetPrimID(work.startPrimID),
+ vViewportIdx,
+ vRtIdx);
}
}
}
if (IsIndexedT::value)
{
- fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
+ fetchInfo.pIndices =
+ (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
}
else
{
}
};
-
// Selector for correct templated Draw front-end function
-PFN_FE_WORK_FUNC GetProcessDrawFunc(
- bool IsIndexed,
- bool IsCutIndexEnabled,
- bool HasTessellation,
- bool HasGeometryShader,
- bool HasStreamOut,
- bool HasRasterization)
+PFN_FE_WORK_FUNC GetProcessDrawFunc(bool IsIndexed,
+ bool IsCutIndexEnabled,
+ bool HasTessellation,
+ bool HasGeometryShader,
+ bool HasStreamOut,
+ bool HasRasterization)
{
- return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
+ return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed,
+ IsCutIndexEnabled,
+ HasTessellation,
+ HasGeometryShader,
+ HasStreamOut,
+ HasRasterization);
}
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file frontend.h
-*
-* @brief Definitions for Frontend which handles vertex processing,
-* primitive assembly, clipping, binning, etc.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file frontend.h
+ *
+ * @brief Definitions for Frontend which handles vertex processing,
+ * primitive assembly, clipping, binning, etc.
+ *
+ ******************************************************************************/
#pragma once
#include "context.h"
#include "common/simdintrin.h"
#include <type_traits>
// Calculates the A and B coefficients for the 3 edges of the triangle
-//
+//
// maths for edge equations:
// standard form of a line in 2d
// Ax + By + C = 0
// B = x1 - x0
// C = x0y1 - x1y0
INLINE
-void triangleSetupAB(const __m128 vX, const __m128 vY, __m128 & vA, __m128 & vB)
+void triangleSetupAB(const __m128 vX, const __m128 vY, __m128& vA, __m128& vB)
{
// vYsub = y1 y2 y0 dc
__m128 vYsub = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(3, 0, 2, 1));
// vY = y0 y1 y2 dc
vA = _mm_sub_ps(vY, vYsub);
- // Result:
+ // Result:
// A[0] = y0 - y1
// A[1] = y1 - y2
// A[2] = y2 - y0
// vX = x0 x1 x2 dc
vB = _mm_sub_ps(vXsub, vX);
- // Result:
+ // Result:
// B[0] = x1 - x0
// B[1] = x2 - x1
// B[2] = x0 - x2
}
INLINE
-void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i & vA, __m128i & vB)
+void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i& vA, __m128i& vB)
{
// generate edge equations
// A = y0 - y1
// B = x1 - x0
// C = x0y1 - x1y0
__m128i vYsub = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 2, 1));
- vA = _mm_sub_epi32(vY, vYsub);
+ vA = _mm_sub_epi32(vY, vYsub);
__m128i vXsub = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 2, 1));
- vB = _mm_sub_epi32(vXsub, vX);
+ vB = _mm_sub_epi32(vXsub, vX);
}
INLINE
-void triangleSetupABIntVertical(const simdscalari vX[3], const simdscalari vY[3], simdscalari (&vA)[3], simdscalari (&vB)[3])
+void triangleSetupABIntVertical(const simdscalari vX[3],
+ const simdscalari vY[3],
+ simdscalari (&vA)[3],
+ simdscalari (&vB)[3])
{
// A = y0 - y1
// B = x1 - x0
#if ENABLE_AVX512_SIMD16
INLINE
-void triangleSetupABIntVertical(const simd16scalari vX[3], const simd16scalari vY[3], simd16scalari(&vA)[3], simd16scalari(&vB)[3])
+void triangleSetupABIntVertical(const simd16scalari vX[3],
+ const simd16scalari vY[3],
+ simd16scalari (&vA)[3],
+ simd16scalari (&vB)[3])
{
// A = y0 - y1
// B = x1 - x0
// Px = x0-x2, Py = y0-y2
// Qx = x1-x2, Qy = y1-y2
// |Px Qx|
-// det = | | = PxQy - PyQx
+// det = | | = PxQy - PyQx
// |Py Qy|
// simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
// try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
// vBShuf = [B2, B0, B1, B0]
__m128i vBShuf = _mm_shuffle_epi32(vB, _MM_SHUFFLE(0, 1, 0, 2));
// vMul = [A1*B2, B1*A2]
- __m128i vMul = _mm_mul_epi32(vAShuf, vBShuf);
+ __m128i vMul = _mm_mul_epi32(vAShuf, vBShuf);
// shuffle upper to lower
// vMul2 = [B1*A2, B1*A2]
__m128i vMul2 = _mm_shuffle_epi32(vMul, _MM_SHUFFLE(3, 2, 3, 2));
- //vMul = [A1*B2 - B1*A2]
+ // vMul = [A1*B2 - B1*A2]
vMul = _mm_sub_epi64(vMul, vMul2);
int64_t result;
_mm_store_sd((double*)&result, _mm_castsi128_pd(vMul));
double dResult = (double)result;
- dResult = dResult * (1.0 / FIXED_POINT16_SCALE);
+ dResult = dResult * (1.0 / FIXED_POINT16_SCALE);
return (float)dResult;
}
INLINE
-void calcDeterminantIntVertical(const simdscalari vA[3], const simdscalari vB[3], simdscalari *pvDet)
+void calcDeterminantIntVertical(const simdscalari vA[3],
+ const simdscalari vB[3],
+ simdscalari* pvDet)
{
// refer to calcDeterminantInt comment for calculation explanation
// A1*B2
- simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]); // 0 0 1 1 4 4 5 5
- simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]); // 2 2 3 3 6 6 7 7
+ simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]); // 0 0 1 1 4 4 5 5
+ simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]); // 2 2 3 3 6 6 7 7
simdscalari vB2Lo = _simd_unpacklo_epi32(vB[2], vB[2]);
simdscalari vB2Hi = _simd_unpackhi_epi32(vB[2], vB[2]);
- simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo); // 0 1 4 5
- simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi); // 2 3 6 7
+ simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo); // 0 1 4 5
+ simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi); // 2 3 6 7
// B1*A2
simdscalari vA2Lo = _simd_unpacklo_epi32(vA[2], vA[2]);
#if ENABLE_AVX512_SIMD16
INLINE
-void calcDeterminantIntVertical(const simd16scalari vA[3], const simd16scalari vB[3], simd16scalari *pvDet)
+void calcDeterminantIntVertical(const simd16scalari vA[3],
+ const simd16scalari vB[3],
+ simd16scalari* pvDet)
{
// refer to calcDeterminantInt comment for calculation explanation
// A1*B2
- simd16scalari vA1_lo = _simd16_unpacklo_epi32(vA[1], vA[1]); // X 0 X 1 X 4 X 5 X 8 X 9 X C X D (32b)
- simd16scalari vA1_hi = _simd16_unpackhi_epi32(vA[1], vA[1]); // X 2 X 3 X 6 X 7 X A X B X E X F
+ simd16scalari vA1_lo =
+ _simd16_unpacklo_epi32(vA[1], vA[1]); // X 0 X 1 X 4 X 5 X 8 X 9 X C X D (32b)
+ simd16scalari vA1_hi = _simd16_unpackhi_epi32(vA[1], vA[1]); // X 2 X 3 X 6 X 7 X A X B X E X F
simd16scalari vB2_lo = _simd16_unpacklo_epi32(vB[2], vB[2]);
simd16scalari vB2_hi = _simd16_unpackhi_epi32(vB[2], vB[2]);
- simd16scalari vA1B2_lo = _simd16_mul_epi32(vA1_lo, vB2_lo); // 0 1 4 5 8 9 C D (64b)
- simd16scalari vA1B2_hi = _simd16_mul_epi32(vA1_hi, vB2_hi); // 2 3 6 7 A B E F
+ simd16scalari vA1B2_lo = _simd16_mul_epi32(vA1_lo, vB2_lo); // 0 1 4 5 8 9 C D (64b)
+ simd16scalari vA1B2_hi = _simd16_mul_epi32(vA1_hi, vB2_hi); // 2 3 6 7 A B E F
// B1*A2
simd16scalari vA2_lo = _simd16_unpacklo_epi32(vA[2], vA[2]);
simd16scalari vA2B1_hi = _simd16_mul_epi32(vA2_hi, vB1_hi);
// A1*B2 - A2*B1
- simd16scalari difflo = _simd16_sub_epi64(vA1B2_lo, vA2B1_lo); // 0 1 4 5 8 9 C D (64b)
- simd16scalari diffhi = _simd16_sub_epi64(vA1B2_hi, vA2B1_hi); // 2 3 6 7 A B E F
+ simd16scalari difflo = _simd16_sub_epi64(vA1B2_lo, vA2B1_lo); // 0 1 4 5 8 9 C D (64b)
+ simd16scalari diffhi = _simd16_sub_epi64(vA1B2_hi, vA2B1_hi); // 2 3 6 7 A B E F
// (1, 0, 1, 0) = 01 00 01 00 = 0x44, (3, 2, 3, 2) = 11 10 11 10 = 0xEE
- simd16scalari templo = _simd16_permute2f128_si(difflo, diffhi, 0x44); // 0 1 4 5 2 3 6 7 (64b)
- simd16scalari temphi = _simd16_permute2f128_si(difflo, diffhi, 0xEE); // 8 9 C D A B E F
+ simd16scalari templo = _simd16_permute2f128_si(difflo, diffhi, 0x44); // 0 1 4 5 2 3 6 7 (64b)
+ simd16scalari temphi = _simd16_permute2f128_si(difflo, diffhi, 0xEE); // 8 9 C D A B E F
// (3, 1, 2, 0) = 11 01 10 00 = 0xD8
- pvDet[0] = _simd16_permute2f128_si(templo, templo, 0xD8); // 0 1 2 3 4 5 6 7 (64b)
- pvDet[1] = _simd16_permute2f128_si(temphi, temphi, 0xD8); // 8 9 A B C D E F
+ pvDet[0] = _simd16_permute2f128_si(templo, templo, 0xD8); // 0 1 2 3 4 5 6 7 (64b)
+ pvDet[1] = _simd16_permute2f128_si(temphi, temphi, 0xD8); // 8 9 A B C D E F
}
#endif
INLINE
-void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128 &vB, __m128 &vC)
+void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128& vB, __m128& vC)
{
// C = -Ax - By
- vC = _mm_mul_ps(vA, vX);
- __m128 vCy = _mm_mul_ps(vB, vY);
- vC = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
- vC = _mm_sub_ps(vC, vCy);
+ vC = _mm_mul_ps(vA, vX);
+ __m128 vCy = _mm_mul_ps(vB, vY);
+ vC = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
+ vC = _mm_sub_ps(vC, vCy);
}
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simdvector* v, const SWR_VIEWPORT_MATRICES& vpMatrices)
{
simdscalar m00 = _simd_load1_ps(&vpMatrices.m00[0]);
simdscalar m30 = _simd_load1_ps(&vpMatrices.m30[0]);
}
#if USE_SIMD16_FRONTEND
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simd16vector* v, const SWR_VIEWPORT_MATRICES& vpMatrices)
{
const simd16scalar m00 = _simd16_broadcast_ss(&vpMatrices.m00[0]);
const simd16scalar m30 = _simd16_broadcast_ss(&vpMatrices.m30[0]);
}
#endif
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari const &vViewportIdx)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simdvector* v,
+ const SWR_VIEWPORT_MATRICES& vpMatrices,
+ simdscalari const& vViewportIdx)
{
// perform a gather of each matrix element based on the viewport array indexes
simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
}
#if USE_SIMD16_FRONTEND
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simd16scalari const &vViewportIdx)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simd16vector* v,
+ const SWR_VIEWPORT_MATRICES& vpMatrices,
+ simd16scalari const& vViewportIdx)
{
// perform a gather of each matrix element based on the viewport array indexes
const simd16scalar m00 = _simd16_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
#endif
INLINE
-void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
+void calcBoundingBoxInt(const __m128i& vX, const __m128i& vY, SWR_RECT& bbox)
{
// Need horizontal fp min here
__m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
__m128i vY1 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 2, 0, 1));
__m128i vY2 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 1, 2));
-
__m128i vMinX = _mm_min_epi32(vX, vX1);
- vMinX = _mm_min_epi32(vMinX, vX2);
+ vMinX = _mm_min_epi32(vMinX, vX2);
__m128i vMaxX = _mm_max_epi32(vX, vX1);
- vMaxX = _mm_max_epi32(vMaxX, vX2);
+ vMaxX = _mm_max_epi32(vMaxX, vX2);
__m128i vMinY = _mm_min_epi32(vY, vY1);
- vMinY = _mm_min_epi32(vMinY, vY2);
+ vMinY = _mm_min_epi32(vMinY, vY2);
__m128i vMaxY = _mm_max_epi32(vY, vY1);
- vMaxY = _mm_max_epi32(vMaxY, vY2);
+ vMaxY = _mm_max_epi32(vMaxY, vY2);
bbox.xmin = _mm_extract_epi32(vMinX, 0);
bbox.xmax = _mm_extract_epi32(vMaxX, 0);
}
INLINE
-bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
+bool CanUseSimplePoints(DRAW_CONTEXT* pDC)
{
const API_STATE& state = GetApiState(pDC);
return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X &&
- state.rastState.pointSize == 1.0f &&
- !state.rastState.pointParam &&
- !state.rastState.pointSpriteEnable &&
- !state.backendState.clipDistanceMask);
+ state.rastState.pointSize == 1.0f && !state.rastState.pointParam &&
+ !state.rastState.pointSpriteEnable && !state.backendState.clipDistanceMask);
}
INLINE
bool vHasNaN(const __m128& vec)
{
- const __m128 result = _mm_cmpunord_ps(vec, vec);
- const int32_t mask = _mm_movemask_ps(result);
+ const __m128 result = _mm_cmpunord_ps(vec, vec);
+ const int32_t mask = _mm_movemask_ps(result);
return (mask != 0);
}
uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
-
// ProcessDraw front-end function. All combinations of parameter values are available
-PFN_FE_WORK_FUNC GetProcessDrawFunc(
- bool IsIndexed,
- bool IsCutIndexEnabled,
- bool HasTessellation,
- bool HasGeometryShader,
- bool HasStreamOut,
- bool HasRasterization);
-
-void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessShutdown(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+PFN_FE_WORK_FUNC GetProcessDrawFunc(bool IsIndexed,
+ bool IsCutIndexEnabled,
+ bool HasTessellation,
+ bool HasGeometryShader,
+ bool HasStreamOut,
+ bool HasRasterization);
+
+void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
+void ProcessStoreTiles(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ void* pUserData);
+void ProcessDiscardInvalidateTiles(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ void* pUserData);
+void ProcessSync(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
+void ProcessShutdown(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
#if USE_SIMD16_FRONTEND
PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
#endif
-struct PA_STATE_BASE; // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+struct PA_STATE_BASE; // forward decl
+void BinPoints(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[3],
+ uint32_t primMask,
+ simdscalari const& primID,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx);
+void BinLines(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simdvector prims[3],
+ uint32_t primMask,
+ simdscalari const& primID,
+ simdscalari const& viewportIdx,
+ simdscalari const& rtIdx);
#if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL BinPoints_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[3],
+ uint32_t primMask,
+ simd16scalari const& primID,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx);
+void SIMDCALL BinLines_simd16(DRAW_CONTEXT* pDC,
+ PA_STATE& pa,
+ uint32_t workerId,
+ simd16vector prims[3],
+ uint32_t primMask,
+ simd16scalari const& primID,
+ simd16scalari const& viewportIdx,
+ simd16scalari const& rtIdx);
#endif
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file knobs.h
-*
-* @brief Static (Compile-Time) Knobs for Core.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file knobs.h
+ *
+ * @brief Static (Compile-Time) Knobs for Core.
+ *
+ ******************************************************************************/
#pragma once
#include <stdint.h>
#include <gen_knobs.h>
-#define KNOB_ARCH_AVX 0
-#define KNOB_ARCH_AVX2 1
+#define KNOB_ARCH_AVX 0
+#define KNOB_ARCH_AVX2 1
#define KNOB_ARCH_AVX512 2
///////////////////////////////////////////////////////////////////////////////
// AVX512 Support
///////////////////////////////////////////////////////////////////////////////
-#define ENABLE_AVX512_SIMD16 1
-#define USE_8x2_TILE_BACKEND 1
-#define USE_SIMD16_FRONTEND 1
-#define USE_SIMD16_SHADERS 1 // requires USE_SIMD16_FRONTEND
-#define USE_SIMD16_VS 1 // requires USE_SIMD16_SHADERS
+#define ENABLE_AVX512_SIMD16 1
+#define USE_8x2_TILE_BACKEND 1
+#define USE_SIMD16_FRONTEND 1
+#define USE_SIMD16_SHADERS 1 // requires USE_SIMD16_FRONTEND
+#define USE_SIMD16_VS 1 // requires USE_SIMD16_SHADERS
///////////////////////////////////////////////////////////////////////////////
// Architecture validation
// Configuration knobs
///////////////////////////////////////////////////////////////////////////////
// Maximum supported number of active vertex buffer streams
-#define KNOB_NUM_STREAMS 32
+#define KNOB_NUM_STREAMS 32
// Maximum supported active viewports and scissors
-#define KNOB_NUM_VIEWPORTS_SCISSORS 16
+#define KNOB_NUM_VIEWPORTS_SCISSORS 16
// Guardband range used by the clipper
-#define KNOB_GUARDBAND_WIDTH 32768.0f
-#define KNOB_GUARDBAND_HEIGHT 32768.0f
+#define KNOB_GUARDBAND_WIDTH 32768.0f
+#define KNOB_GUARDBAND_HEIGHT 32768.0f
///////////////////////////////
// Macro tile configuration
///////////////////////////////
// raster tile dimensions
-#define KNOB_TILE_X_DIM 8
-#define KNOB_TILE_X_DIM_SHIFT 3
-#define KNOB_TILE_Y_DIM 8
-#define KNOB_TILE_Y_DIM_SHIFT 3
+#define KNOB_TILE_X_DIM 8
+#define KNOB_TILE_X_DIM_SHIFT 3
+#define KNOB_TILE_Y_DIM 8
+#define KNOB_TILE_Y_DIM_SHIFT 3
-// fixed macrotile pixel dimension for now, eventually will be
+// fixed macrotile pixel dimension for now, eventually will be
// dynamically set based on tile format and pixel size
-#define KNOB_MACROTILE_X_DIM 32
-#define KNOB_MACROTILE_Y_DIM 32
-#define KNOB_MACROTILE_X_DIM_FIXED_SHIFT 13
-#define KNOB_MACROTILE_Y_DIM_FIXED_SHIFT 13
-#define KNOB_MACROTILE_X_DIM_FIXED (KNOB_MACROTILE_X_DIM << 8)
-#define KNOB_MACROTILE_Y_DIM_FIXED (KNOB_MACROTILE_Y_DIM << 8)
-#define KNOB_MACROTILE_X_DIM_IN_TILES (KNOB_MACROTILE_X_DIM >> KNOB_TILE_X_DIM_SHIFT)
-#define KNOB_MACROTILE_Y_DIM_IN_TILES (KNOB_MACROTILE_Y_DIM >> KNOB_TILE_Y_DIM_SHIFT)
+#define KNOB_MACROTILE_X_DIM 32
+#define KNOB_MACROTILE_Y_DIM 32
+#define KNOB_MACROTILE_X_DIM_FIXED_SHIFT 13
+#define KNOB_MACROTILE_Y_DIM_FIXED_SHIFT 13
+#define KNOB_MACROTILE_X_DIM_FIXED (KNOB_MACROTILE_X_DIM << 8)
+#define KNOB_MACROTILE_Y_DIM_FIXED (KNOB_MACROTILE_Y_DIM << 8)
+#define KNOB_MACROTILE_X_DIM_IN_TILES (KNOB_MACROTILE_X_DIM >> KNOB_TILE_X_DIM_SHIFT)
+#define KNOB_MACROTILE_Y_DIM_IN_TILES (KNOB_MACROTILE_Y_DIM >> KNOB_TILE_Y_DIM_SHIFT)
// total # of hot tiles available. This should be enough to
// fully render a 16kx16k 128bpp render target
-#define KNOB_NUM_HOT_TILES_X 256
-#define KNOB_NUM_HOT_TILES_Y 256
-#define KNOB_COLOR_HOT_TILE_FORMAT R32G32B32A32_FLOAT
-#define KNOB_DEPTH_HOT_TILE_FORMAT R32_FLOAT
-#define KNOB_STENCIL_HOT_TILE_FORMAT R8_UINT
+#define KNOB_NUM_HOT_TILES_X 256
+#define KNOB_NUM_HOT_TILES_Y 256
+#define KNOB_COLOR_HOT_TILE_FORMAT R32G32B32A32_FLOAT
+#define KNOB_DEPTH_HOT_TILE_FORMAT R32_FLOAT
+#define KNOB_STENCIL_HOT_TILE_FORMAT R8_UINT
// Max scissor rectangle
-#define KNOB_MAX_SCISSOR_X KNOB_NUM_HOT_TILES_X * KNOB_MACROTILE_X_DIM
-#define KNOB_MAX_SCISSOR_Y KNOB_NUM_HOT_TILES_Y * KNOB_MACROTILE_Y_DIM
+#define KNOB_MAX_SCISSOR_X KNOB_NUM_HOT_TILES_X* KNOB_MACROTILE_X_DIM
+#define KNOB_MAX_SCISSOR_Y KNOB_NUM_HOT_TILES_Y* KNOB_MACROTILE_Y_DIM
-#if KNOB_SIMD_WIDTH==8 && KNOB_TILE_X_DIM < 4
+#if KNOB_SIMD_WIDTH == 8 && KNOB_TILE_X_DIM < 4
#error "incompatible width/tile dimensions"
#endif
///////////////////////////////////////////////////////////////////////////////
// Optimization knobs
///////////////////////////////////////////////////////////////////////////////
-#define KNOB_USE_FAST_SRGB TRUE
+#define KNOB_USE_FAST_SRGB TRUE
// enables cut-aware primitive assembler
-#define KNOB_ENABLE_CUT_AWARE_PA TRUE
+#define KNOB_ENABLE_CUT_AWARE_PA TRUE
// enables early rasterization (useful for small triangles)
#if !defined(KNOB_ENABLE_EARLY_RAST)
-#define KNOB_ENABLE_EARLY_RAST 1
+#define KNOB_ENABLE_EARLY_RAST 1
#endif
#if KNOB_ENABLE_EARLY_RAST
// Set to 1 to use the dynamic KNOB_TOSS_XXXX knobs.
#if !defined(KNOB_ENABLE_TOSS_POINTS)
-#define KNOB_ENABLE_TOSS_POINTS 0
+#define KNOB_ENABLE_TOSS_POINTS 0
#endif
-
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file knobs_init.h
-*
-* @brief Dynamic Knobs Initialization for Core.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file knobs_init.h
+ *
+ * @brief Dynamic Knobs Initialization for Core.
+ *
+ ******************************************************************************/
#pragma once
#include <core/knobs.h>
template <typename T>
static inline void ConvertEnvToKnob(const char* pOverride, T& knobValue)
{
- uint32_t value = 0;
- char* pStopped = nullptr;
- value = strtoul(pOverride, &pStopped, 0);
+ uint32_t value = 0;
+ char* pStopped = nullptr;
+ value = strtoul(pOverride, &pStopped, 0);
if (pStopped != pOverride)
{
knobValue = static_cast<T>(value);
}
// Try converting to a number and casting to bool
- uint32_t value = 0;
- char* pStopped = nullptr;
- value = strtoul(pOverride, &pStopped, 0);
+ uint32_t value = 0;
+ char* pStopped = nullptr;
+ value = strtoul(pOverride, &pStopped, 0);
if (pStopped != pOverride)
{
knobValue = value != 0;
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file multisample.h
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file multisample.h
+ *
+ ******************************************************************************/
#pragma once
INLINE
SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
{
- switch(numSamples)
+ switch (numSamples)
{
- case 1: return SWR_MULTISAMPLE_1X;
- case 2: return SWR_MULTISAMPLE_2X;
- case 4: return SWR_MULTISAMPLE_4X;
- case 8: return SWR_MULTISAMPLE_8X;
- case 16: return SWR_MULTISAMPLE_16X;
- default: assert(0); return SWR_MULTISAMPLE_1X;
+ case 1:
+ return SWR_MULTISAMPLE_1X;
+ case 2:
+ return SWR_MULTISAMPLE_2X;
+ case 4:
+ return SWR_MULTISAMPLE_4X;
+ case 8:
+ return SWR_MULTISAMPLE_8X;
+ case 16:
+ return SWR_MULTISAMPLE_16X;
+ default:
+ assert(0);
+ return SWR_MULTISAMPLE_1X;
}
}
// hardcoded offsets based on Direct3d standard multisample positions
// 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
// coords are 0.8 fixed point offsets from (0, 0)
-template<SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
+template <SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
struct MultisampleTraits
{
- INLINE static float X(uint32_t sampleNum) = delete;
- INLINE static float Y(uint32_t sampleNum) = delete;
- INLINE static simdscalari FullSampleMask() = delete;
+ INLINE static float X(uint32_t sampleNum) = delete;
+ INLINE static float Y(uint32_t sampleNum) = delete;
+ INLINE static simdscalari FullSampleMask() = delete;
static const uint32_t numSamples = 0;
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
{
- INLINE static float X(uint32_t sampleNum) {return samplePosX[sampleNum];};
- INLINE static float Y(uint32_t sampleNum) {return samplePosY[sampleNum];};
- INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
+ INLINE static float X(uint32_t sampleNum) { return samplePosX[sampleNum]; };
+ INLINE static float Y(uint32_t sampleNum) { return samplePosY[sampleNum]; };
+ INLINE static simdscalari FullSampleMask() { return _simd_set1_epi32(0x1); };
- static const uint32_t numSamples = 1;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
- static constexpr uint32_t samplePosXi[1] = { 0x80 };
- static constexpr uint32_t samplePosYi[1] = { 0x80 };
- static constexpr float samplePosX[1] = { 0.5f };
- static constexpr float samplePosY[1] = { 0.5f };
+ static const uint32_t numSamples = 1;
+ static const uint32_t numCoverageSamples = 1;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
+ static constexpr uint32_t samplePosXi[1] = {0x80};
+ static constexpr uint32_t samplePosYi[1] = {0x80};
+ static constexpr float samplePosX[1] = {0.5f};
+ static constexpr float samplePosY[1] = {0.5f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
{
- INLINE static float X(uint32_t sampleNum) {return 0.5f;};
- INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
- INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
-
- static const uint32_t numSamples = 1;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
- static constexpr uint32_t samplePosXi[1] = { 0x80 };
- static constexpr uint32_t samplePosYi[1] = { 0x80 };
- static constexpr float samplePosX[1] = { 0.5f };
- static constexpr float samplePosY[1] = { 0.5f };
+ INLINE static float X(uint32_t sampleNum) { return 0.5f; };
+ INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
+ INLINE static simdscalari FullSampleMask() { return _simd_set1_epi32(0x1); };
+
+ static const uint32_t numSamples = 1;
+ static const uint32_t numCoverageSamples = 1;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
+ static constexpr uint32_t samplePosXi[1] = {0x80};
+ static constexpr uint32_t samplePosYi[1] = {0x80};
+ static constexpr float samplePosX[1] = {0.5f};
+ static constexpr float samplePosY[1] = {0.5f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
{
- INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
- INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+ INLINE static float X(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosX[sampleNum];
+ };
+ INLINE static float Y(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosY[sampleNum];
+ };
INLINE static simdscalari FullSampleMask()
{
- static const simdscalari mask =_simd_set1_epi32(0x3);
- return mask;
+ static const simdscalari mask = _simd_set1_epi32(0x3);
+ return mask;
}
- static const uint32_t numSamples = 2;
- static const uint32_t numCoverageSamples = 2;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
- static constexpr uint32_t samplePosXi[2] = { 0xC0, 0x40 };
- static constexpr uint32_t samplePosYi[2] = { 0xC0, 0x40 };
- static constexpr float samplePosX[2] = {0.75f, 0.25f};
- static constexpr float samplePosY[2] = {0.75f, 0.25f};
+ static const uint32_t numSamples = 2;
+ static const uint32_t numCoverageSamples = 2;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
+ static constexpr uint32_t samplePosXi[2] = {0xC0, 0x40};
+ static constexpr uint32_t samplePosYi[2] = {0xC0, 0x40};
+ static constexpr float samplePosX[2] = {0.75f, 0.25f};
+ static constexpr float samplePosY[2] = {0.75f, 0.25f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
{
- INLINE static float X(uint32_t sampleNum) {return 0.5f;};
- INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float X(uint32_t sampleNum) { return 0.5f; };
+ INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
INLINE static simdscalari FullSampleMask()
{
- static const simdscalari mask =_simd_set1_epi32(0x3);
- return mask;
+ static const simdscalari mask = _simd_set1_epi32(0x3);
+ return mask;
}
- static const uint32_t numSamples = 2;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
- static constexpr uint32_t samplePosXi[2] = { 0x80 , 0x80 };
- static constexpr uint32_t samplePosYi[2] = { 0x80 , 0x80 };
- static constexpr float samplePosX[2] = { 0.5f, 0.5f };
- static constexpr float samplePosY[2] = { 0.5f, 0.5f };
+ static const uint32_t numSamples = 2;
+ static const uint32_t numCoverageSamples = 1;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
+ static constexpr uint32_t samplePosXi[2] = {0x80, 0x80};
+ static constexpr uint32_t samplePosYi[2] = {0x80, 0x80};
+ static constexpr float samplePosX[2] = {0.5f, 0.5f};
+ static constexpr float samplePosY[2] = {0.5f, 0.5f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
{
- INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
- INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+ INLINE static float X(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosX[sampleNum];
+ };
+ INLINE static float Y(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosY[sampleNum];
+ };
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
return mask;
}
- static const uint32_t numSamples = 4;
- static const uint32_t numCoverageSamples = 4;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
- static constexpr uint32_t samplePosXi[4] = { 0x60, 0xE0, 0x20, 0xA0 };
- static constexpr uint32_t samplePosYi[4] = { 0x20, 0x60, 0xA0, 0xE0 };
- static constexpr float samplePosX[4] = { 0.375f, 0.875f, 0.125f, 0.625f };
- static constexpr float samplePosY[4] = { 0.125f, 0.375f, 0.625f, 0.875f };
+ static const uint32_t numSamples = 4;
+ static const uint32_t numCoverageSamples = 4;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
+ static constexpr uint32_t samplePosXi[4] = {0x60, 0xE0, 0x20, 0xA0};
+ static constexpr uint32_t samplePosYi[4] = {0x20, 0x60, 0xA0, 0xE0};
+ static constexpr float samplePosX[4] = {0.375f, 0.875f, 0.125f, 0.625f};
+ static constexpr float samplePosY[4] = {0.125f, 0.375f, 0.625f, 0.875f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
{
- INLINE static float X(uint32_t sampleNum) {return 0.5f;};
- INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float X(uint32_t sampleNum) { return 0.5f; };
+ INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xF);
return mask;
}
- static const uint32_t numSamples = 4;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
- static constexpr uint32_t samplePosXi[4] = { 0x80, 0x80, 0x80, 0x80 };
- static constexpr uint32_t samplePosYi[4] = { 0x80, 0x80, 0x80, 0x80 };
- static constexpr float samplePosX[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
- static constexpr float samplePosY[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
+ static const uint32_t numSamples = 4;
+ static const uint32_t numCoverageSamples = 1;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
+ static constexpr uint32_t samplePosXi[4] = {0x80, 0x80, 0x80, 0x80};
+ static constexpr uint32_t samplePosYi[4] = {0x80, 0x80, 0x80, 0x80};
+ static constexpr float samplePosX[4] = {0.5f, 0.5f, 0.5f, 0.5f};
+ static constexpr float samplePosY[4] = {0.5f, 0.5f, 0.5f, 0.5f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
{
- INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
- INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+ INLINE static float X(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosX[sampleNum];
+ };
+ INLINE static float Y(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosY[sampleNum];
+ };
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
return mask;
}
- static const uint32_t numSamples = 8;
- static const uint32_t numCoverageSamples = 8;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
- static constexpr uint32_t samplePosXi[8] = { 0x90, 0x70, 0xD0, 0x50, 0x30, 0x10, 0xB0, 0xF0 };
- static constexpr uint32_t samplePosYi[8] = { 0x50, 0xB0, 0x90, 0x30, 0xD0, 0x70, 0xF0, 0x10 };
- static constexpr float samplePosX[8] = { 0.5625f, 0.4375f, 0.8125f, 0.3125f, 0.1875f, 0.0625f, 0.6875f, 0.9375f };
- static constexpr float samplePosY[8] = { 0.3125f, 0.6875f, 0.5625f, 0.1875f, 0.8125f, 0.4375f, 0.9375f, 0.0625f };
+ static const uint32_t numSamples = 8;
+ static const uint32_t numCoverageSamples = 8;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
+ static constexpr uint32_t samplePosXi[8] = {0x90, 0x70, 0xD0, 0x50, 0x30, 0x10, 0xB0, 0xF0};
+ static constexpr uint32_t samplePosYi[8] = {0x50, 0xB0, 0x90, 0x30, 0xD0, 0x70, 0xF0, 0x10};
+ static constexpr float samplePosX[8] = {
+ 0.5625f, 0.4375f, 0.8125f, 0.3125f, 0.1875f, 0.0625f, 0.6875f, 0.9375f};
+ static constexpr float samplePosY[8] = {
+ 0.3125f, 0.6875f, 0.5625f, 0.1875f, 0.8125f, 0.4375f, 0.9375f, 0.0625f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
{
- INLINE static float X(uint32_t sampleNum) {return 0.5f;};
- INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float X(uint32_t sampleNum) { return 0.5f; };
+ INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFF);
return mask;
}
- static const uint32_t numSamples = 8;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
- static constexpr uint32_t samplePosXi[8] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
- static constexpr uint32_t samplePosYi[8] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
- static constexpr float samplePosX[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
- static constexpr float samplePosY[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
+ static const uint32_t numSamples = 8;
+ static const uint32_t numCoverageSamples = 1;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
+ static constexpr uint32_t samplePosXi[8] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
+ static constexpr uint32_t samplePosYi[8] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
+ static constexpr float samplePosX[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
+ static constexpr float samplePosY[8] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
{
- INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
- INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+ INLINE static float X(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosX[sampleNum];
+ };
+ INLINE static float Y(uint32_t sampleNum)
+ {
+ SWR_ASSERT(sampleNum < numSamples);
+ return samplePosY[sampleNum];
+ };
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
return mask;
}
- static const uint32_t numSamples = 16;
- static const uint32_t numCoverageSamples = 16;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
- static constexpr uint32_t samplePosXi[16] = { 0x90, 0x70, 0x50, 0xC0, 0x30, 0xA0, 0xD0, 0xB0, 0x60, 0x80, 0x40, 0x20, 0x00, 0xF0, 0xE0, 0x10 };
- static constexpr uint32_t samplePosYi[16] = { 0x90, 0x50, 0xA0, 0x70, 0x60, 0xD0, 0xB0, 0x30, 0xE0, 0x10, 0x20, 0xC0, 0x80, 0x40, 0xF0, 0x00 };
- static constexpr float samplePosX[16] = { 0.5625f, 0.4375f, 0.3125f, 0.7500f, 0.1875f, 0.6250f, 0.8125f, 0.6875f, 0.3750f, 0.5000f, 0.2500f, 0.1250f, 0.0000f, 0.9375f, 0.8750f, 0.0625f };
- static constexpr float samplePosY[16] = { 0.5625f, 0.3125f, 0.6250f, 0.4375f, 0.3750f, 0.8125f, 0.6875f, 0.1875f, 0.8750f, 0.0625f, 0.1250f, 0.7500f, 0.5000f, 0.2500f, 0.9375f, 0.0000f };
+ static const uint32_t numSamples = 16;
+ static const uint32_t numCoverageSamples = 16;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
+ static constexpr uint32_t samplePosXi[16] = {0x90,
+ 0x70,
+ 0x50,
+ 0xC0,
+ 0x30,
+ 0xA0,
+ 0xD0,
+ 0xB0,
+ 0x60,
+ 0x80,
+ 0x40,
+ 0x20,
+ 0x00,
+ 0xF0,
+ 0xE0,
+ 0x10};
+ static constexpr uint32_t samplePosYi[16] = {0x90,
+ 0x50,
+ 0xA0,
+ 0x70,
+ 0x60,
+ 0xD0,
+ 0xB0,
+ 0x30,
+ 0xE0,
+ 0x10,
+ 0x20,
+ 0xC0,
+ 0x80,
+ 0x40,
+ 0xF0,
+ 0x00};
+ static constexpr float samplePosX[16] = {0.5625f,
+ 0.4375f,
+ 0.3125f,
+ 0.7500f,
+ 0.1875f,
+ 0.6250f,
+ 0.8125f,
+ 0.6875f,
+ 0.3750f,
+ 0.5000f,
+ 0.2500f,
+ 0.1250f,
+ 0.0000f,
+ 0.9375f,
+ 0.8750f,
+ 0.0625f};
+ static constexpr float samplePosY[16] = {0.5625f,
+ 0.3125f,
+ 0.6250f,
+ 0.4375f,
+ 0.3750f,
+ 0.8125f,
+ 0.6875f,
+ 0.1875f,
+ 0.8750f,
+ 0.0625f,
+ 0.1250f,
+ 0.7500f,
+ 0.5000f,
+ 0.2500f,
+ 0.9375f,
+ 0.0000f};
};
-template<>
+template <>
struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
{
- INLINE static float X(uint32_t sampleNum) {return 0.5f;};
- INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+ INLINE static float X(uint32_t sampleNum) { return 0.5f; };
+ INLINE static float Y(uint32_t sampleNum) { return 0.5f; };
INLINE static simdscalari FullSampleMask()
{
static const simdscalari mask = _simd_set1_epi32(0xFFFF);
return mask;
}
- static const uint32_t numSamples = 16;
- static const uint32_t numCoverageSamples = 1;
- static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
- static constexpr uint32_t samplePosXi[16] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
- static constexpr uint32_t samplePosYi[16] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
- static constexpr float samplePosX[16] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
- static constexpr float samplePosY[16] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
+ static const uint32_t numSamples = 16;
+ static const uint32_t numCoverageSamples = 1;
+ static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
+ static constexpr uint32_t samplePosXi[16] = {0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80};
+ static constexpr uint32_t samplePosYi[16] = {0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80,
+ 0x80};
+ static constexpr float samplePosX[16] = {0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f};
+ static constexpr float samplePosY[16] = {0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f,
+ 0.5f};
};
INLINE
-bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MULTISAMPLE_POS& samplePos)
+bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount,
+ const SWR_MULTISAMPLE_POS& samplePos)
{
// detect if we're using standard or center sample patterns
const uint32_t *standardPosX, *standardPosY;
- switch(sampleCount)
+ switch (sampleCount)
{
case SWR_MULTISAMPLE_1X:
standardPosX = MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
}
// scan sample pattern for standard or center
- uint32_t numSamples = GetNumSamples(sampleCount);
- bool bIsStandard = true;
- if(numSamples > 1)
+ uint32_t numSamples = GetNumSamples(sampleCount);
+ bool bIsStandard = true;
+ if (numSamples > 1)
{
- for(uint32_t i = 0; i < numSamples; i++)
+ for (uint32_t i = 0; i < numSamples; i++)
{
- bIsStandard = (standardPosX[i] == samplePos.Xi(i)) ||
- (standardPosY[i] == samplePos.Yi(i));
- if(!bIsStandard)
+ bIsStandard =
+ (standardPosX[i] == samplePos.Xi(i)) || (standardPosY[i] == samplePos.Yi(i));
+ if (!bIsStandard)
break;
}
}
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file pa.h
-*
-* @brief Definitions for primitive assembly.
-* N primitives are assembled at a time, where N is the SIMD width.
-* A state machine, that is specific for a given topology, drives the
-* assembly of vertices into triangles.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file pa.h
+ *
+ * @brief Definitions for primitive assembly.
+ * N primitives are assembled at a time, where N is the SIMD width.
+ * A state machine, that is specific for a given topology, drives the
+ * assembly of vertices into triangles.
+ *
+ ******************************************************************************/
#pragma once
#include "frontend.h"
SIMD_WIDTH_LOG2 = 4
};
- typedef simd16mask SIMDMASK;
+ typedef simd16mask SIMDMASK;
- typedef simd16scalar SIMDSCALAR;
- typedef simd16vector SIMDVECTOR;
- typedef simd16vertex SIMDVERTEX;
+ typedef simd16scalar SIMDSCALAR;
+ typedef simd16vector SIMDVECTOR;
+ typedef simd16vertex SIMDVERTEX;
- typedef simd16scalari SIMDSCALARI;
+ typedef simd16scalari SIMDSCALARI;
#else
enum
SIMD_WIDTH_LOG2 = 3
};
- typedef simdmask SIMDMASK;
+ typedef simdmask SIMDMASK;
- typedef simdscalar SIMDSCALAR;
- typedef simdvector SIMDVECTOR;
- typedef simdvertex SIMDVERTEX;
+ typedef simdscalar SIMDSCALAR;
+ typedef simdvector SIMDVECTOR;
+ typedef simdvertex SIMDVERTEX;
- typedef simdscalari SIMDSCALARI;
+ typedef simdscalari SIMDSCALARI;
#endif
- DRAW_CONTEXT *pDC{ nullptr }; // draw context
- uint8_t* pStreamBase{ nullptr }; // vertex stream
- uint32_t streamSizeInVerts{ 0 }; // total size of the input stream in verts
- uint32_t vertexStride{ 0 }; // stride of a vertex in simdvector units
+ DRAW_CONTEXT* pDC{nullptr}; // draw context
+ uint8_t* pStreamBase{nullptr}; // vertex stream
+ uint32_t streamSizeInVerts{0}; // total size of the input stream in verts
+ uint32_t vertexStride{0}; // stride of a vertex in simdvector units
- // The topology the binner will use. In some cases the FE changes the topology from the api state.
- PRIMITIVE_TOPOLOGY binTopology{ TOP_UNKNOWN };
+ // The topology the binner will use. In some cases the FE changes the topology from the api
+ // state.
+ PRIMITIVE_TOPOLOGY binTopology{TOP_UNKNOWN};
#if ENABLE_AVX512_SIMD16
- bool useAlternateOffset{ false };
+ bool useAlternateOffset{false};
#endif
- bool viewportArrayActive{ false };
- bool rtArrayActive { false };
- uint32_t numVertsPerPrim{ 0 };
+ bool viewportArrayActive{false};
+ bool rtArrayActive{false};
+ uint32_t numVertsPerPrim{0};
- PA_STATE(){}
- PA_STATE(DRAW_CONTEXT *in_pDC, uint8_t* in_pStreamBase, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, uint32_t in_numVertsPerPrim) :
- pDC(in_pDC), pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts), vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim) {}
+ PA_STATE() {}
+ PA_STATE(DRAW_CONTEXT* in_pDC,
+ uint8_t* in_pStreamBase,
+ uint32_t in_streamSizeInVerts,
+ uint32_t in_vertexStride,
+ uint32_t in_numVertsPerPrim) :
+ pDC(in_pDC),
+ pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts),
+ vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim)
+ {
+ }
- virtual bool HasWork() = 0;
+ virtual bool HasWork() = 0;
virtual simdvector& GetSimdVector(uint32_t index, uint32_t slot) = 0;
#if ENABLE_AVX512_SIMD16
virtual simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot) = 0;
#if ENABLE_AVX512_SIMD16
virtual bool Assemble(uint32_t slot, simd16vector verts[]) = 0;
#endif
- virtual void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[]) = 0;
- virtual bool NextPrim() = 0;
- virtual SIMDVERTEX& GetNextVsOutput() = 0;
- virtual bool GetNextStreamOutput() = 0;
- virtual SIMDMASK& GetNextVsIndices() = 0;
- virtual uint32_t NumPrims() = 0;
- virtual void Reset() = 0;
- virtual SIMDSCALARI GetPrimID(uint32_t startID) = 0;
+ virtual void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[]) = 0;
+ virtual bool NextPrim() = 0;
+ virtual SIMDVERTEX& GetNextVsOutput() = 0;
+ virtual bool GetNextStreamOutput() = 0;
+ virtual SIMDMASK& GetNextVsIndices() = 0;
+ virtual uint32_t NumPrims() = 0;
+ virtual void Reset() = 0;
+ virtual SIMDSCALARI GetPrimID(uint32_t startID) = 0;
};
// The Optimized PA is a state machine that assembles triangles from vertex shader simd
// 1. We call this the current and previous simd vertex.
// 2. The SSE simd is 4-wide which is not a multiple of 3 needed for triangles. In
// order to assemble the second triangle, for a triangle list, we'll need the
-// last vertex from the previous simd and the first 2 vertices from the current simd.
+// last vertex from the previous simd and the first 2 vertices from the current
+// simd.
// 3. At times the PA can assemble multiple triangles from the 2 simd vertices.
//
// This optimized PA is not cut aware, so only should be used by non-indexed draws or draws without
// cuts
struct PA_STATE_OPT : public PA_STATE
{
- uint32_t numPrims{ 0 }; // Total number of primitives for draw.
- uint32_t numPrimsComplete{ 0 }; // Total number of complete primitives.
+ uint32_t numPrims{0}; // Total number of primitives for draw.
+ uint32_t numPrimsComplete{0}; // Total number of complete primitives.
- uint32_t numSimdPrims{ 0 }; // Number of prims in current simd.
+ uint32_t numSimdPrims{0}; // Number of prims in current simd.
- uint32_t cur{ 0 }; // index to current VS output.
- uint32_t prev{ 0 }; // index to prev VS output. Not really needed in the state.
- const uint32_t first{ 0 }; // index to first VS output. Used for tri fan and line loop.
+ uint32_t cur{0}; // index to current VS output.
+ uint32_t prev{0}; // index to prev VS output. Not really needed in the state.
+ const uint32_t first{0}; // index to first VS output. Used for tri fan and line loop.
- uint32_t counter{ 0 }; // state counter
- bool reset{ false }; // reset state
+ uint32_t counter{0}; // state counter
+ bool reset{false}; // reset state
- uint32_t primIDIncr{ 0 }; // how much to increment for each vector (typically vector / {1, 2})
+ uint32_t primIDIncr{0}; // how much to increment for each vector (typically vector / {1, 2})
SIMDSCALARI primID;
- typedef bool(*PFN_PA_FUNC)(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
+ typedef bool (*PFN_PA_FUNC)(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
#if ENABLE_AVX512_SIMD16
- typedef bool(*PFN_PA_FUNC_SIMD16)(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
+ typedef bool (*PFN_PA_FUNC_SIMD16)(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
#endif
- typedef void(*PFN_PA_SINGLE_FUNC)(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
+ typedef void (*PFN_PA_SINGLE_FUNC)(PA_STATE_OPT& pa,
+ uint32_t slot,
+ uint32_t primIndex,
+ simd4scalar verts[]);
- PFN_PA_FUNC pfnPaFunc{ nullptr }; // PA state machine function for assembling 4 triangles.
+ PFN_PA_FUNC pfnPaFunc{nullptr}; // PA state machine function for assembling 4 triangles.
#if ENABLE_AVX512_SIMD16
- PFN_PA_FUNC_SIMD16 pfnPaFunc_simd16{ nullptr };
+ PFN_PA_FUNC_SIMD16 pfnPaFunc_simd16{nullptr};
#endif
- PFN_PA_SINGLE_FUNC pfnPaSingleFunc{ nullptr }; // PA state machine function for assembling single triangle.
- PFN_PA_FUNC pfnPaFuncReset{ nullptr }; // initial state to set on reset
+ PFN_PA_SINGLE_FUNC pfnPaSingleFunc{
+ nullptr}; // PA state machine function for assembling single triangle.
+ PFN_PA_FUNC pfnPaFuncReset{nullptr}; // initial state to set on reset
#if ENABLE_AVX512_SIMD16
- PFN_PA_FUNC_SIMD16 pfnPaFuncReset_simd16{ nullptr };
+ PFN_PA_FUNC_SIMD16 pfnPaFuncReset_simd16{nullptr};
#endif
// state used to advance the PA when Next is called
- PFN_PA_FUNC pfnPaNextFunc{ nullptr };
+ PFN_PA_FUNC pfnPaNextFunc{nullptr};
#if ENABLE_AVX512_SIMD16
- PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16{ nullptr };
+ PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16{nullptr};
#endif
- uint32_t nextNumSimdPrims{ 0 };
- uint32_t nextNumPrimsIncrement{ 0 };
- bool nextReset{ false };
- bool isStreaming{ false };
+ uint32_t nextNumSimdPrims{0};
+ uint32_t nextNumPrimsIncrement{0};
+ bool nextReset{false};
+ bool isStreaming{false};
- SIMDMASK junkIndices { 0 }; // temporary index store for unused virtual function
+ SIMDMASK junkIndices{0}; // temporary index store for unused virtual function
PA_STATE_OPT() {}
- PA_STATE_OPT(DRAW_CONTEXT* pDC, uint32_t numPrims, uint8_t* pStream, uint32_t streamSizeInVerts,
- uint32_t vertexStride, bool in_isStreaming, uint32_t numVertsPerPrim, PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN);
+ PA_STATE_OPT(DRAW_CONTEXT* pDC,
+ uint32_t numPrims,
+ uint8_t* pStream,
+ uint32_t streamSizeInVerts,
+ uint32_t vertexStride,
+ bool in_isStreaming,
+ uint32_t numVertsPerPrim,
+ PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN);
- bool HasWork()
- {
- return (this->numPrimsComplete < this->numPrims) ? true : false;
- }
+ bool HasWork() { return (this->numPrimsComplete < this->numPrims) ? true : false; }
simdvector& GetSimdVector(uint32_t index, uint32_t slot)
{
SWR_ASSERT(slot < vertexStride);
- uint32_t offset = index * vertexStride + slot;
+ uint32_t offset = index * vertexStride + slot;
simdvector& vertexSlot = ((simdvector*)pStreamBase)[offset];
return vertexSlot;
}
simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot)
{
SWR_ASSERT(slot < vertexStride);
- uint32_t offset = index * vertexStride + slot;
+ uint32_t offset = index * vertexStride + slot;
simd16vector& vertexSlot = ((simd16vector*)pStreamBase)[offset];
return vertexSlot;
}
#endif
// Assembles 4 triangles. Each simdvector is a single vertex from 4
// triangles (xxxx yyyy zzzz wwww) and there are 3 verts per triangle.
- bool Assemble(uint32_t slot, simdvector verts[])
- {
- return this->pfnPaFunc(*this, slot, verts);
- }
+ bool Assemble(uint32_t slot, simdvector verts[]) { return this->pfnPaFunc(*this, slot, verts); }
#if ENABLE_AVX512_SIMD16
bool Assemble(uint32_t slot, simd16vector verts[])
else
{
this->counter = (this->reset) ? 0 : (this->counter + 1);
- this->reset = false;
+ this->reset = false;
}
if (!HasWork())
{
- morePrims = false; // no more to do
+ morePrims = false; // no more to do
}
return morePrims;
{
// prev undefined for first state
prev = cur;
- cur = counter;
+ cur = counter;
}
else
{
- // swap/recycle last two simd verts for prev and cur, leave other simd verts intact in the buffer
+ // swap/recycle last two simd verts for prev and cur, leave other simd verts intact in
+ // the buffer
uint32_t temp = prev;
prev = cur;
- cur = temp;
+ cur = temp;
}
SWR_ASSERT(cur < numSimdVerts);
bool GetNextStreamOutput()
{
this->prev = this->cur;
- this->cur = this->counter;
+ this->cur = this->counter;
return HasWork();
}
uint32_t NumPrims()
{
- return (this->numPrimsComplete + this->nextNumPrimsIncrement > this->numPrims) ?
- (SIMD_WIDTH - (this->numPrimsComplete + this->nextNumPrimsIncrement - this->numPrims)) : SIMD_WIDTH;
+ return (this->numPrimsComplete + this->nextNumPrimsIncrement > this->numPrims)
+ ? (SIMD_WIDTH -
+ (this->numPrimsComplete + this->nextNumPrimsIncrement - this->numPrims))
+ : SIMD_WIDTH;
}
- void SetNextState(PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
+ void SetNextState(PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
+ PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+ uint32_t numSimdPrims = 0,
+ uint32_t numPrimsIncrement = 0,
+ bool reset = false)
{
- this->pfnPaNextFunc = pfnPaNextFunc;
- this->nextNumSimdPrims = numSimdPrims;
+ this->pfnPaNextFunc = pfnPaNextFunc;
+ this->nextNumSimdPrims = numSimdPrims;
this->nextNumPrimsIncrement = numPrimsIncrement;
- this->nextReset = reset;
+ this->nextReset = reset;
this->pfnPaSingleFunc = pfnPaNextSingleFunc;
}
#if ENABLE_AVX512_SIMD16
void SetNextState_simd16(PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
- PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
- {
- this->pfnPaNextFunc_simd16 = pfnPaNextFunc_simd16;
- this->pfnPaNextFunc = pfnPaNextFunc;
- this->nextNumSimdPrims = numSimdPrims;
+ PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
+ PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+ uint32_t numSimdPrims = 0,
+ uint32_t numPrimsIncrement = 0,
+ bool reset = false)
+ {
+ this->pfnPaNextFunc_simd16 = pfnPaNextFunc_simd16;
+ this->pfnPaNextFunc = pfnPaNextFunc;
+ this->nextNumSimdPrims = numSimdPrims;
this->nextNumPrimsIncrement = numPrimsIncrement;
- this->nextReset = reset;
+ this->nextReset = reset;
this->pfnPaSingleFunc = pfnPaNextSingleFunc;
}
this->pfnPaFunc_simd16 = this->pfnPaFuncReset_simd16;
#endif
this->numPrimsComplete = 0;
- this->numSimdPrims = 0;
- this->cur = 0;
- this->prev = 0;
- this->counter = 0;
- this->reset = false;
+ this->numSimdPrims = 0;
+ this->cur = 0;
+ this->prev = 0;
+ this->counter = 0;
+ this->reset = false;
}
SIMDSCALARI GetPrimID(uint32_t startID)
{
#if USE_SIMD16_FRONTEND
- return _simd16_add_epi32(this->primID,
+ return _simd16_add_epi32(
+ this->primID,
_simd16_set1_epi32(startID + this->primIDIncr * (this->numPrimsComplete / SIMD_WIDTH)));
#else
- return _simd_add_epi32(this->primID,
+ return _simd_add_epi32(
+ this->primID,
_simd_set1_epi32(startID + this->primIDIncr * (this->numPrimsComplete / SIMD_WIDTH)));
#endif
}
};
// helper C wrappers to avoid having to rewrite all the PA topology state functions
-INLINE void SetNextPaState(PA_STATE_OPT& pa, PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
+INLINE void SetNextPaState(PA_STATE_OPT& pa,
+ PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
+ PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+ uint32_t numSimdPrims = 0,
+ uint32_t numPrimsIncrement = 0,
+ bool reset = false)
{
- return pa.SetNextState(pfnPaNextFunc, pfnPaNextSingleFunc, numSimdPrims, numPrimsIncrement, reset);
+ return pa.SetNextState(
+ pfnPaNextFunc, pfnPaNextSingleFunc, numSimdPrims, numPrimsIncrement, reset);
}
#if ENABLE_AVX512_SIMD16
-INLINE void SetNextPaState_simd16(PA_STATE_OPT& pa, PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
- PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
- PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
- uint32_t numSimdPrims = 0,
- uint32_t numPrimsIncrement = 0,
- bool reset = false)
+INLINE void SetNextPaState_simd16(PA_STATE_OPT& pa,
+ PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
+ PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
+ PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+ uint32_t numSimdPrims = 0,
+ uint32_t numPrimsIncrement = 0,
+ bool reset = false)
{
- return pa.SetNextState_simd16(pfnPaNextFunc_simd16, pfnPaNextFunc, pfnPaNextSingleFunc, numSimdPrims, numPrimsIncrement, reset);
+ return pa.SetNextState_simd16(pfnPaNextFunc_simd16,
+ pfnPaNextFunc,
+ pfnPaNextSingleFunc,
+ numSimdPrims,
+ numPrimsIncrement,
+ reset);
}
#endif
// Cut-aware primitive assembler.
struct PA_STATE_CUT : public PA_STATE
{
- SIMDMASK* pCutIndices{ nullptr }; // cut indices buffer, 1 bit per vertex
- uint32_t numVerts{ 0 }; // number of vertices available in buffer store
- uint32_t numAttribs{ 0 }; // number of attributes
- int32_t numRemainingVerts{ 0 }; // number of verts remaining to be assembled
- uint32_t numVertsToAssemble{ 0 }; // total number of verts to assemble for the draw
+ SIMDMASK* pCutIndices{nullptr}; // cut indices buffer, 1 bit per vertex
+ uint32_t numVerts{0}; // number of vertices available in buffer store
+ uint32_t numAttribs{0}; // number of attributes
+ int32_t numRemainingVerts{0}; // number of verts remaining to be assembled
+ uint32_t numVertsToAssemble{0}; // total number of verts to assemble for the draw
#if ENABLE_AVX512_SIMD16
- OSALIGNSIMD16(uint32_t) indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
+ OSALIGNSIMD16(uint32_t)
+ indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
#else
- OSALIGNSIMD(uint32_t) indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
+ OSALIGNSIMD(uint32_t)
+ indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
#endif
- SIMDSCALARI vOffsets[MAX_NUM_VERTS_PER_PRIM]; // byte offsets for currently assembling simd
- uint32_t numPrimsAssembled{ 0 }; // number of primitives that are fully assembled
- uint32_t headVertex{ 0 }; // current unused vertex slot in vertex buffer store
- uint32_t tailVertex{ 0 }; // beginning vertex currently assembling
- uint32_t curVertex{ 0 }; // current unprocessed vertex
- uint32_t startPrimId{ 0 }; // starting prim id
- SIMDSCALARI vPrimId; // vector of prim ID
- bool needOffsets{ false }; // need to compute gather offsets for current SIMD
- uint32_t vertsPerPrim{ 0 };
- bool processCutVerts{ false }; // vertex indices with cuts should be processed as normal, otherwise they
- // are ignored. Fetch shader sends invalid verts on cuts that should be ignored
- // while the GS sends valid verts for every index
-
- simdvector junkVector; // junk simdvector for unimplemented API
+ SIMDSCALARI vOffsets[MAX_NUM_VERTS_PER_PRIM]; // byte offsets for currently assembling simd
+ uint32_t numPrimsAssembled{0}; // number of primitives that are fully assembled
+ uint32_t headVertex{0}; // current unused vertex slot in vertex buffer store
+ uint32_t tailVertex{0}; // beginning vertex currently assembling
+ uint32_t curVertex{0}; // current unprocessed vertex
+ uint32_t startPrimId{0}; // starting prim id
+ SIMDSCALARI vPrimId; // vector of prim ID
+ bool needOffsets{false}; // need to compute gather offsets for current SIMD
+ uint32_t vertsPerPrim{0};
+ bool processCutVerts{
+ false}; // vertex indices with cuts should be processed as normal, otherwise they
+ // are ignored. Fetch shader sends invalid verts on cuts that should be ignored
+ // while the GS sends valid verts for every index
+
+ simdvector junkVector; // junk simdvector for unimplemented API
#if ENABLE_AVX512_SIMD16
- simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
+ simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
#endif
// Topology state tracking
uint32_t vert[MAX_NUM_VERTS_PER_PRIM];
- uint32_t curIndex{ 0 };
- bool reverseWinding{ false }; // indicates reverse winding for strips
- int32_t adjExtraVert{ 0 }; // extra vert uses for tristrip w/ adj
+ uint32_t curIndex{0};
+ bool reverseWinding{false}; // indicates reverse winding for strips
+ int32_t adjExtraVert{0}; // extra vert uses for tristrip w/ adj
- typedef void(PA_STATE_CUT::* PFN_PA_FUNC)(uint32_t vert, bool finish);
- PFN_PA_FUNC pfnPa{ nullptr }; // per-topology function that processes a single vert
+ typedef void (PA_STATE_CUT::*PFN_PA_FUNC)(uint32_t vert, bool finish);
+ PFN_PA_FUNC pfnPa{nullptr}; // per-topology function that processes a single vert
PA_STATE_CUT() {}
- PA_STATE_CUT(DRAW_CONTEXT* pDC, uint8_t* in_pStream, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, SIMDMASK* in_pIndices, uint32_t in_numVerts,
- uint32_t in_numAttribs, PRIMITIVE_TOPOLOGY topo, bool in_processCutVerts, uint32_t in_numVertsPerPrim)
- : PA_STATE(pDC, in_pStream, in_streamSizeInVerts, in_vertexStride, in_numVertsPerPrim)
- {
- numVerts = in_streamSizeInVerts;
- numAttribs = in_numAttribs;
- binTopology = topo;
- needOffsets = false;
+ PA_STATE_CUT(DRAW_CONTEXT* pDC,
+ uint8_t* in_pStream,
+ uint32_t in_streamSizeInVerts,
+ uint32_t in_vertexStride,
+ SIMDMASK* in_pIndices,
+ uint32_t in_numVerts,
+ uint32_t in_numAttribs,
+ PRIMITIVE_TOPOLOGY topo,
+ bool in_processCutVerts,
+ uint32_t in_numVertsPerPrim) :
+ PA_STATE(pDC, in_pStream, in_streamSizeInVerts, in_vertexStride, in_numVertsPerPrim)
+ {
+ numVerts = in_streamSizeInVerts;
+ numAttribs = in_numAttribs;
+ binTopology = topo;
+ needOffsets = false;
processCutVerts = in_processCutVerts;
numVertsToAssemble = numRemainingVerts = in_numVerts;
- numPrimsAssembled = 0;
+ numPrimsAssembled = 0;
headVertex = tailVertex = curVertex = 0;
- curIndex = 0;
+ curIndex = 0;
pCutIndices = in_pIndices;
memset(indices, 0, sizeof(indices));
#if USE_SIMD16_FRONTEND
vPrimId = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
#endif
reverseWinding = false;
- adjExtraVert = -1;
+ adjExtraVert = -1;
bool gsEnabled = pDC->pState->state.gsState.gsEnable;
- vertsPerPrim = NumVertsPerPrim(topo, gsEnabled);
+ vertsPerPrim = NumVertsPerPrim(topo, gsEnabled);
switch (topo)
{
- case TOP_TRIANGLE_LIST: pfnPa = &PA_STATE_CUT::ProcessVertTriList; break;
- case TOP_TRI_LIST_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertTriListAdj : &PA_STATE_CUT::ProcessVertTriListAdjNoGs; break;
- case TOP_TRIANGLE_STRIP: pfnPa = &PA_STATE_CUT::ProcessVertTriStrip; break;
- case TOP_TRI_STRIP_ADJ: if (gsEnabled)
- {
- pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj < true > ;
- }
- else
- {
- pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj < false > ;
- }
- break;
-
- case TOP_POINT_LIST: pfnPa = &PA_STATE_CUT::ProcessVertPointList; break;
- case TOP_LINE_LIST: pfnPa = &PA_STATE_CUT::ProcessVertLineList; break;
- case TOP_LINE_LIST_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj : &PA_STATE_CUT::ProcessVertLineListAdjNoGs; break;
- case TOP_LINE_STRIP: pfnPa = &PA_STATE_CUT::ProcessVertLineStrip; break;
- case TOP_LISTSTRIP_ADJ: pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs; break;
- case TOP_RECT_LIST: pfnPa = &PA_STATE_CUT::ProcessVertRectList; break;
- default: assert(0 && "Unimplemented topology");
+ case TOP_TRIANGLE_LIST:
+ pfnPa = &PA_STATE_CUT::ProcessVertTriList;
+ break;
+ case TOP_TRI_LIST_ADJ:
+ pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertTriListAdj
+ : &PA_STATE_CUT::ProcessVertTriListAdjNoGs;
+ break;
+ case TOP_TRIANGLE_STRIP:
+ pfnPa = &PA_STATE_CUT::ProcessVertTriStrip;
+ break;
+ case TOP_TRI_STRIP_ADJ:
+ if (gsEnabled)
+ {
+ pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj<true>;
+ }
+ else
+ {
+ pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj<false>;
+ }
+ break;
+
+ case TOP_POINT_LIST:
+ pfnPa = &PA_STATE_CUT::ProcessVertPointList;
+ break;
+ case TOP_LINE_LIST:
+ pfnPa = &PA_STATE_CUT::ProcessVertLineList;
+ break;
+ case TOP_LINE_LIST_ADJ:
+ pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj
+ : &PA_STATE_CUT::ProcessVertLineListAdjNoGs;
+ break;
+ case TOP_LINE_STRIP:
+ pfnPa = &PA_STATE_CUT::ProcessVertLineStrip;
+ break;
+ case TOP_LISTSTRIP_ADJ:
+ pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj
+ : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs;
+ break;
+ case TOP_RECT_LIST:
+ pfnPa = &PA_STATE_CUT::ProcessVertRectList;
+ break;
+ default:
+ assert(0 && "Unimplemented topology");
}
}
SIMDVERTEX& GetNextVsOutput()
{
uint32_t vertexIndex = this->headVertex / SIMD_WIDTH;
- this->headVertex = (this->headVertex + SIMD_WIDTH) % this->numVerts;
- this->needOffsets = true;
- SIMDVECTOR* pVertex = &((SIMDVECTOR*)pStreamBase)[vertexIndex * vertexStride];
+ this->headVertex = (this->headVertex + SIMD_WIDTH) % this->numVerts;
+ this->needOffsets = true;
+ SIMDVECTOR* pVertex = &((SIMDVECTOR*)pStreamBase)[vertexIndex * vertexStride];
return *(SIMDVERTEX*)pVertex;
}
SIMDMASK& GetNextVsIndices()
{
- uint32_t vertexIndex = this->headVertex / SIMD_WIDTH;
+ uint32_t vertexIndex = this->headVertex / SIMD_WIDTH;
SIMDMASK* pCurCutIndex = this->pCutIndices + vertexIndex;
return *pCurCutIndex;
}
#endif
this->numRemainingVerts = this->numVertsToAssemble;
this->numPrimsAssembled = 0;
- this->curIndex = 0;
- this->curVertex = 0;
- this->tailVertex = 0;
- this->headVertex = 0;
- this->reverseWinding = false;
- this->adjExtraVert = -1;
+ this->curIndex = 0;
+ this->curVertex = 0;
+ this->tailVertex = 0;
+ this->headVertex = 0;
+ this->reverseWinding = false;
+ this->adjExtraVert = -1;
#if USE_SIMD16_FRONTEND
this->vPrimId = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
#else
#endif
}
- bool HasWork()
- {
- return this->numRemainingVerts > 0 || this->adjExtraVert != -1;
- }
+ bool HasWork() { return this->numRemainingVerts > 0 || this->adjExtraVert != -1; }
bool IsVertexStoreFull()
{
void RestartTopology()
{
- this->curIndex = 0;
+ this->curIndex = 0;
this->reverseWinding = false;
- this->adjExtraVert = -1;
+ this->adjExtraVert = -1;
}
bool IsCutIndex(uint32_t vertex)
{
- uint32_t vertexIndex = vertex / SIMD_WIDTH;
+ uint32_t vertexIndex = vertex / SIMD_WIDTH;
uint32_t vertexOffset = vertex & (SIMD_WIDTH - 1);
return CheckBit(this->pCutIndices[vertexIndex], vertexOffset);
}
// have assembled SIMD prims
void ProcessVerts()
{
- while (this->numPrimsAssembled != SIMD_WIDTH &&
- this->numRemainingVerts > 0 &&
- this->curVertex != this->headVertex)
+ while (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts > 0 &&
+ this->curVertex != this->headVertex)
{
// if cut index, restart topology
if (IsCutIndex(this->curVertex))
}
this->curVertex++;
- if (this->curVertex >= this->numVerts) {
- this->curVertex = 0;
+ if (this->curVertex >= this->numVerts)
+ {
+ this->curVertex = 0;
}
this->numRemainingVerts--;
}
// special case last primitive for tri strip w/ adj
- if (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts == 0 && this->adjExtraVert != -1)
+ if (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts == 0 &&
+ this->adjExtraVert != -1)
{
(this->*pfnPa)(this->curVertex, true);
}
{
// done with current batch
// advance tail to the current unsubmitted vertex
- this->tailVertex = this->curVertex;
+ this->tailVertex = this->curVertex;
this->numPrimsAssembled = 0;
#if USE_SIMD16_FRONTEND
this->vPrimId = _simd16_add_epi32(vPrimId, _simd16_set1_epi32(SIMD_WIDTH));
{
for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
{
- uint32_t vertexStrideBytes = vertexStride * sizeof(SIMDVECTOR);
- SIMDSCALARI vIndices = *(SIMDSCALARI*)&this->indices[v][0];
+ uint32_t vertexStrideBytes = vertexStride * sizeof(SIMDVECTOR);
+ SIMDSCALARI vIndices = *(SIMDSCALARI*)&this->indices[v][0];
// step to simdvertex batch
const uint32_t simdShift = SIMD_WIDTH_LOG2;
#if USE_SIMD16_FRONTEND
SIMDSCALARI vVertexBatch = _simd16_srai_epi32(vIndices, simdShift);
- this->vOffsets[v] = _simd16_mullo_epi32(vVertexBatch, _simd16_set1_epi32(vertexStrideBytes));
+ this->vOffsets[v] =
+ _simd16_mullo_epi32(vVertexBatch, _simd16_set1_epi32(vertexStrideBytes));
#else
SIMDSCALARI vVertexBatch = _simd_srai_epi32(vIndices, simdShift);
- this->vOffsets[v] = _simd_mullo_epi32(vVertexBatch, _simd_set1_epi32(vertexStrideBytes));
+ this->vOffsets[v] =
+ _simd_mullo_epi32(vVertexBatch, _simd_set1_epi32(vertexStrideBytes));
#endif
// step to index
const uint32_t simdMask = SIMD_WIDTH - 1;
#if USE_SIMD16_FRONTEND
SIMDSCALARI vVertexIndex = _simd16_and_si(vIndices, _simd16_set1_epi32(simdMask));
- this->vOffsets[v] = _simd16_add_epi32(this->vOffsets[v], _simd16_mullo_epi32(vVertexIndex, _simd16_set1_epi32(sizeof(float))));
+ this->vOffsets[v] = _simd16_add_epi32(
+ this->vOffsets[v],
+ _simd16_mullo_epi32(vVertexIndex, _simd16_set1_epi32(sizeof(float))));
#else
SIMDSCALARI vVertexIndex = _simd_and_si(vIndices, _simd_set1_epi32(simdMask));
- this->vOffsets[v] = _simd_add_epi32(this->vOffsets[v], _simd_mullo_epi32(vVertexIndex, _simd_set1_epi32(sizeof(float))));
+ this->vOffsets[v] =
+ _simd_add_epi32(this->vOffsets[v],
+ _simd_mullo_epi32(vVertexIndex, _simd_set1_epi32(sizeof(float))));
#endif
}
}
- bool Assemble(uint32_t slot, simdvector *verts)
+ bool Assemble(uint32_t slot, simdvector* verts)
{
// process any outstanding verts
ProcessVerts();
return false;
}
- // cache off gather offsets given the current SIMD set of indices the first time we get an assemble
+ // cache off gather offsets given the current SIMD set of indices the first time we get an
+ // assemble
if (this->needOffsets)
{
ComputeOffsets();
simd16scalar temp = _simd16_i32gather_ps(pBase, offsets, 1);
// Assigning to a temporary first to avoid an MSVC 2017 compiler bug
- simdscalar t = useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
+ simdscalar t =
+ useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
verts[v].v[c] = t;
#else
verts[v].v[c] = _simd_i32gather_ps(pBase, offsets, 1);
// v1, v3 = v1 + v2 - v0, v2
// v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
- temp = _simd16_sub_ps(temp, verts[1].v[c]);
+ temp = _simd16_sub_ps(temp, verts[1].v[c]);
temp = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
verts[1].v[c] = _simd16_extract_ps(temp, 0);
}
#if ENABLE_AVX512_SIMD16
bool Assemble(uint32_t slot, simd16vector verts[])
{
- // process any outstanding verts
+ // process any outstanding verts
ProcessVerts();
// return false if we don't have enough prims assembled
return false;
}
- // cache off gather offsets given the current SIMD set of indices the first time we get an assemble
+ // cache off gather offsets given the current SIMD set of indices the first time we get an
+ // assemble
if (this->needOffsets)
{
ComputeOffsets();
#if USE_SIMD16_FRONTEND
verts[v].v[c] = _simd16_i32gather_ps(pBase, offsets, 1);
#else
- verts[v].v[c] = _simd16_insert_ps(_simd16_setzero_ps(), _simd_i32gather_ps(pBase, offsets, 1), 0);
+ verts[v].v[c] = _simd16_insert_ps(
+ _simd16_setzero_ps(), _simd_i32gather_ps(pBase, offsets, 1), 0);
#endif
// move base to next component
// v1, v3 = v1 + v2 - v0, v2
// v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
- temp = _simd16_sub_ps(temp, verts[1].v[c]);
- verts[1].v[c] = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+ temp = _simd16_sub_ps(temp, verts[1].v[c]);
+ verts[1].v[c] =
+ _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
}
}
#endif
void AssembleSingle(uint32_t slot, uint32_t triIndex, simd4scalar tri[3])
{
- // move to slot
+ // move to slot
for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
{
uint32_t* pOffset = (uint32_t*)&this->vOffsets[v];
#if USE_SIMD16_FRONTEND
- uint32_t offset = useAlternateOffset ? pOffset[triIndex + SIMD_WIDTH_DIV2] : pOffset[triIndex];
+ uint32_t offset =
+ useAlternateOffset ? pOffset[triIndex + SIMD_WIDTH_DIV2] : pOffset[triIndex];
#else
uint32_t offset = pOffset[triIndex];
#endif
for (uint32_t c = 0; c < 4; ++c)
{
float* pComponent = (float*)(this->pStreamBase + offset);
- pVert[c] = *pComponent;
+ pVert[c] = *pComponent;
offset += SIMD_WIDTH * sizeof(float);
}
}
}
}
- uint32_t NumPrims()
- {
- return this->numPrimsAssembled;
- }
+ uint32_t NumPrims() { return this->numPrimsAssembled; }
// Per-topology functions
void ProcessVertTriStrip(uint32_t index, bool finish)
this->numPrimsAssembled++;
// set up next prim state
- this->vert[0] = this->vert[1];
- this->vert[1] = this->vert[2];
+ this->vert[0] = this->vert[1];
+ this->vert[1] = this->vert[2];
this->curIndex = 2;
this->reverseWinding ^= 1;
}
}
- template<bool gsEnabled>
+ template <bool gsEnabled>
void AssembleTriStripAdj()
{
if (!gsEnabled)
this->numPrimsAssembled++;
}
-
- template<bool gsEnabled>
+ template <bool gsEnabled>
void ProcessVertTriStripAdj(uint32_t index, bool finish)
{
// handle last primitive of tristrip
}
}
-
void ProcessVertLineList(uint32_t index, bool finish)
{
this->vert[this->curIndex] = index;
this->numPrimsAssembled++;
// set up next prim state
- this->vert[0] = this->vert[1];
+ this->vert[0] = this->vert[1];
this->curIndex = 1;
}
}
this->numPrimsAssembled++;
// set up next prim state
- this->vert[0] = this->vert[1];
- this->vert[1] = this->vert[2];
- this->vert[2] = this->vert[3];
+ this->vert[0] = this->vert[1];
+ this->vert[1] = this->vert[2];
+ this->vert[2] = this->vert[3];
this->curIndex = 3;
}
}
this->numPrimsAssembled++;
// set up next prim state
- this->vert[0] = this->vert[1];
- this->vert[1] = this->vert[2];
- this->vert[2] = this->vert[3];
+ this->vert[0] = this->vert[1];
+ this->vert[1] = this->vert[2];
+ this->vert[2] = this->vert[3];
this->curIndex = 3;
}
}
// second triangle in the rectangle
// v1, v3 = v1 + v2 - v0, v2
- this->indices[0][this->numPrimsAssembled+1] = this->vert[1];
- this->indices[1][this->numPrimsAssembled+1] = this->vert[0];
- this->indices[2][this->numPrimsAssembled+1] = this->vert[2];
+ this->indices[0][this->numPrimsAssembled + 1] = this->vert[1];
+ this->indices[1][this->numPrimsAssembled + 1] = this->vert[0];
+ this->indices[2][this->numPrimsAssembled + 1] = this->vert[2];
// increment numPrimsAssembled
this->numPrimsAssembled += 2;
// Primitive Assembly for data output from the DomainShader.
struct PA_TESS : PA_STATE
{
- PA_TESS(
- DRAW_CONTEXT *in_pDC,
- const SIMDSCALAR* in_pVertData,
- uint32_t in_attributeStrideInVectors,
- uint32_t in_vertexStride,
- uint32_t in_numAttributes,
- uint32_t* (&in_ppIndices)[3],
- uint32_t in_numPrims,
- PRIMITIVE_TOPOLOGY in_binTopology,
- uint32_t numVertsPerPrim) :
+ PA_TESS(DRAW_CONTEXT* in_pDC,
+ const SIMDSCALAR* in_pVertData,
+ uint32_t in_attributeStrideInVectors,
+ uint32_t in_vertexStride,
+ uint32_t in_numAttributes,
+ uint32_t* (&in_ppIndices)[3],
+ uint32_t in_numPrims,
+ PRIMITIVE_TOPOLOGY in_binTopology,
+ uint32_t numVertsPerPrim) :
PA_STATE(in_pDC, nullptr, 0, in_vertexStride, numVertsPerPrim),
- m_pVertexData(in_pVertData),
- m_attributeStrideInVectors(in_attributeStrideInVectors),
- m_numAttributes(in_numAttributes),
- m_numPrims(in_numPrims)
+ m_pVertexData(in_pVertData), m_attributeStrideInVectors(in_attributeStrideInVectors),
+ m_numAttributes(in_numAttributes), m_numPrims(in_numPrims)
{
#if USE_SIMD16_FRONTEND
m_vPrimId = _simd16_setzero_si();
#else
m_vPrimId = _simd_setzero_si();
#endif
- binTopology = in_binTopology;
+ binTopology = in_binTopology;
m_ppIndices[0] = in_ppIndices[0];
m_ppIndices[1] = in_ppIndices[1];
m_ppIndices[2] = in_ppIndices[2];
}
}
- bool HasWork()
- {
- return m_numPrims != 0;
- }
+ bool HasWork() { return m_numPrims != 0; }
simdvector& GetSimdVector(uint32_t index, uint32_t slot)
{
{
SWR_ASSERT(numPrims <= SIMD_WIDTH);
#if USE_SIMD16_FRONTEND
- static const OSALIGNLINE(int32_t) maskGen[SIMD_WIDTH * 2] =
- {
+ static const OSALIGNLINE(int32_t) maskGen[SIMD_WIDTH * 2] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- };
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
return _simd16_loadu_si((const SIMDSCALARI*)&maskGen[SIMD_WIDTH - numPrims]);
#else
- static const OSALIGNLINE(int32_t) maskGen[SIMD_WIDTH * 2] =
- {
- -1, -1, -1, -1, -1, -1, -1, -1,
- 0, 0, 0, 0, 0, 0, 0, 0
- };
+ static const OSALIGNLINE(int32_t)
+ maskGen[SIMD_WIDTH * 2] = {-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
return _simd_loadu_si((const SIMDSCALARI*)&maskGen[SIMD_WIDTH - numPrims]);
#endif
SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
- const float* pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ const float* pBaseAttrib =
+ (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
{
#if USE_SIMD16_FRONTEND
for (uint32_t c = 0; c < 4; ++c)
{
#if USE_SIMD16_FRONTEND
- simd16scalar temp = _simd16_mask_i32gather_ps(
- _simd16_setzero_ps(),
- pBase,
- indices,
- _simd16_castsi_ps(mask),
- 4 /* gcc doesn't like sizeof(float) */);
-
- verts[i].v[c] = useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
+ simd16scalar temp =
+ _simd16_mask_i32gather_ps(_simd16_setzero_ps(),
+ pBase,
+ indices,
+ _simd16_castsi_ps(mask),
+ 4 /* gcc doesn't like sizeof(float) */);
+
+ verts[i].v[c] =
+ useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
#else
- verts[i].v[c] = _simd_mask_i32gather_ps(
- _simd_setzero_ps(),
- pBase,
- indices,
- _simd_castsi_ps(mask),
- 4); // gcc doesn't like sizeof(float)
+ verts[i].v[c] = _simd_mask_i32gather_ps(_simd_setzero_ps(),
+ pBase,
+ indices,
+ _simd_castsi_ps(mask),
+ 4); // gcc doesn't like sizeof(float)
#endif
pBase += m_attributeStrideInVectors * SIMD_WIDTH;
}
SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
- const float* pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ const float* pBaseAttrib =
+ (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
{
#if USE_SIMD16_FRONTEND
for (uint32_t c = 0; c < 4; ++c)
{
#if USE_SIMD16_FRONTEND
- verts[i].v[c] = _simd16_mask_i32gather_ps(
- _simd16_setzero_ps(),
- pBase,
- indices,
- _simd16_castsi_ps(mask),
- 4 /* gcc doesn't like sizeof(float) */);
+ verts[i].v[c] = _simd16_mask_i32gather_ps(_simd16_setzero_ps(),
+ pBase,
+ indices,
+ _simd16_castsi_ps(mask),
+ 4 /* gcc doesn't like sizeof(float) */);
#else
- simdscalar temp = _simd_mask_i32gather_ps(
- _simd_setzero_ps(),
- pBase,
- indices,
- _simd_castsi_ps(mask),
- 4 /* gcc doesn't like sizeof(float) */);
- verts[i].v[c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
+ simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(),
+ pBase,
+ indices,
+ _simd_castsi_ps(mask),
+ 4 /* gcc doesn't like sizeof(float) */);
+ verts[i].v[c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
#endif
pBase += m_attributeStrideInVectors * SIMD_WIDTH;
}
void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
SWR_ASSERT(slot < m_numAttributes);
- SWR_ASSERT(primIndex < PA_TESS::NumPrims());
- const float* pVertDataBase = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+ SWR_ASSERT(primIndex < PA_TESS::NumPrims());
+
+ const float* pVertDataBase =
+ (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
{
#if USE_SIMD16_FRONTEND
- uint32_t index = useAlternateOffset ? m_ppIndices[i][primIndex + SIMD_WIDTH_DIV2] : m_ppIndices[i][primIndex];
+ uint32_t index = useAlternateOffset ? m_ppIndices[i][primIndex + SIMD_WIDTH_DIV2]
+ : m_ppIndices[i][primIndex];
#else
uint32_t index = m_ppIndices[i][primIndex];
#endif
const float* pVertData = pVertDataBase;
- float* pVert = (float*)&verts[i];
+ float* pVert = (float*)&verts[i];
for (uint32_t c = 0; c < 4; ++c)
{
return junkIndices;
}
- uint32_t NumPrims()
- {
- return std::min<uint32_t>(m_numPrims, SIMD_WIDTH);
- }
+ uint32_t NumPrims() { return std::min<uint32_t>(m_numPrims, SIMD_WIDTH); }
- void Reset()
- {
- SWR_NOT_IMPL;
- }
+ void Reset() { SWR_NOT_IMPL; }
SIMDSCALARI GetPrimID(uint32_t startID)
{
}
private:
- const SIMDSCALAR* m_pVertexData = nullptr;
- uint32_t m_attributeStrideInVectors = 0;
- uint32_t m_numAttributes = 0;
- uint32_t m_numPrims = 0;
- uint32_t* m_ppIndices[3];
+ const SIMDSCALAR* m_pVertexData = nullptr;
+ uint32_t m_attributeStrideInVectors = 0;
+ uint32_t m_numAttributes = 0;
+ uint32_t m_numPrims = 0;
+ uint32_t* m_ppIndices[3];
- uint32_t m_numVertsPerPrim = 0;
+ uint32_t m_numVertsPerPrim = 0;
- SIMDSCALARI m_vPrimId;
+ SIMDSCALARI m_vPrimId;
- simdvector junkVector; // junk simdvector for unimplemented API
+ simdvector junkVector; // junk simdvector for unimplemented API
#if ENABLE_AVX512_SIMD16
- simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
+ simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
#endif
- SIMDVERTEX junkVertex; // junk SIMDVERTEX for unimplemented API
- SIMDMASK junkIndices; // temporary index store for unused virtual function
+ SIMDVERTEX junkVertex; // junk SIMDVERTEX for unimplemented API
+ SIMDMASK junkIndices; // temporary index store for unused virtual function
};
-// Primitive Assembler factory class, responsible for creating and initializing the correct assembler
-// based on state.
+// Primitive Assembler factory class, responsible for creating and initializing the correct
+// assembler based on state.
template <typename IsIndexedT, typename IsCutIndexEnabledT>
struct PA_FACTORY
{
- PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts, PA_STATE::SIMDVERTEX *pVertexStore, uint32_t vertexStoreSize, uint32_t vertexStride, uint32_t numVertsPerPrim) : topo(in_topo)
+ PA_FACTORY(DRAW_CONTEXT* pDC,
+ PRIMITIVE_TOPOLOGY in_topo,
+ uint32_t numVerts,
+ PA_STATE::SIMDVERTEX* pVertexStore,
+ uint32_t vertexStoreSize,
+ uint32_t vertexStride,
+ uint32_t numVertsPerPrim) :
+ topo(in_topo)
{
#if KNOB_ENABLE_CUT_AWARE_PA == TRUE
const API_STATE& state = GetApiState(pDC);
- if ((IsIndexedT::value && IsCutIndexEnabledT::value && (
- topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST ||
- topo == TOP_LINE_LIST || topo == TOP_LINE_STRIP ||
- topo == TOP_TRIANGLE_LIST)) ||
-
- // non-indexed draws with adjacency topologies must use cut-aware PA until we add support
- // for them in the optimized PA
- (topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ))
+ if ((IsIndexedT::value && IsCutIndexEnabledT::value &&
+ (topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST || topo == TOP_LINE_LIST ||
+ topo == TOP_LINE_STRIP || topo == TOP_TRIANGLE_LIST)) ||
+
+ // non-indexed draws with adjacency topologies must use cut-aware PA until we add
+ // support for them in the optimized PA
+ (topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ ||
+ topo == TOP_TRI_STRIP_ADJ))
{
memset(&indexStore, 0, sizeof(indexStore));
uint32_t numAttribs = state.feNumAttributes;
- new (&this->paCut) PA_STATE_CUT(pDC, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH,
- vertexStride, &this->indexStore[0], numVerts, numAttribs, state.topology, false, numVertsPerPrim);
+ new (&this->paCut) PA_STATE_CUT(pDC,
+ reinterpret_cast<uint8_t*>(pVertexStore),
+ vertexStoreSize * PA_STATE::SIMD_WIDTH,
+ vertexStride,
+ &this->indexStore[0],
+ numVerts,
+ numAttribs,
+ state.topology,
+ false,
+ numVertsPerPrim);
cutPA = true;
}
else
#endif
{
uint32_t numPrims = GetNumPrims(in_topo, numVerts);
- new (&this->paOpt) PA_STATE_OPT(pDC, numPrims, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH, vertexStride, false, numVertsPerPrim);
+ new (&this->paOpt) PA_STATE_OPT(pDC,
+ numPrims,
+ reinterpret_cast<uint8_t*>(pVertexStore),
+ vertexStoreSize * PA_STATE::SIMD_WIDTH,
+ vertexStride,
+ false,
+ numVertsPerPrim);
cutPA = false;
}
-
}
PA_STATE& GetPA()
PA_STATE_OPT paOpt;
PA_STATE_CUT paCut;
- bool cutPA{ false };
+ bool cutPA{false};
- PRIMITIVE_TOPOLOGY topo{ TOP_UNKNOWN };
+ PRIMITIVE_TOPOLOGY topo{TOP_UNKNOWN};
- PA_STATE::SIMDMASK indexStore[MAX_NUM_VERTS_PER_PRIM];
+ PA_STATE::SIMDMASK indexStore[MAX_NUM_VERTS_PER_PRIM];
};
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file pa_avx.cpp
-*
-* @brief AVX implementation for primitive assembly.
-* N primitives are assembled at a time, where N is the SIMD width.
-* A state machine, that is specific for a given topology, drives the
-* assembly of vertices into triangles.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file pa_avx.cpp
+ *
+ * @brief AVX implementation for primitive assembly.
+ * N primitives are assembled at a time, where N is the SIMD width.
+ * A state machine, that is specific for a given topology, drives the
+ * assembly of vertices into triangles.
+ *
+ ******************************************************************************/
#include "context.h"
#include "pa.h"
#include "frontend.h"
#if (KNOB_SIMD_WIDTH == 8)
-INLINE simd4scalar swizzleLane0(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane0(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
}
-INLINE simd4scalar swizzleLane1(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane1(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
}
-INLINE simd4scalar swizzleLane2(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane2(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
}
-INLINE simd4scalar swizzleLane3(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane3(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
}
-INLINE simd4scalar swizzleLane4(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane4(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
}
-INLINE simd4scalar swizzleLane5(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane5(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
}
-INLINE simd4scalar swizzleLane6(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane6(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
}
-INLINE simd4scalar swizzleLane7(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane7(const simdscalar& x,
+ const simdscalar& y,
+ const simdscalar& z,
+ const simdscalar& w)
{
simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
}
-INLINE simd4scalar swizzleLane0(const simdvector &v)
+INLINE simd4scalar swizzleLane0(const simdvector& v)
{
return swizzleLane0(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLane1(const simdvector &v)
+INLINE simd4scalar swizzleLane1(const simdvector& v)
{
return swizzleLane1(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLane2(const simdvector &v)
+INLINE simd4scalar swizzleLane2(const simdvector& v)
{
return swizzleLane2(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLane3(const simdvector &v)
+INLINE simd4scalar swizzleLane3(const simdvector& v)
{
return swizzleLane3(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLane4(const simdvector &v)
+INLINE simd4scalar swizzleLane4(const simdvector& v)
{
return swizzleLane4(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLane5(const simdvector &v)
+INLINE simd4scalar swizzleLane5(const simdvector& v)
{
return swizzleLane5(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLane6(const simdvector &v)
+INLINE simd4scalar swizzleLane6(const simdvector& v)
{
return swizzleLane6(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLane7(const simdvector &v)
+INLINE simd4scalar swizzleLane7(const simdvector& v)
{
return swizzleLane7(v.x, v.y, v.z, v.w);
}
-INLINE simd4scalar swizzleLaneN(const simdvector &v, int lane)
+INLINE simd4scalar swizzleLaneN(const simdvector& v, int lane)
{
switch (lane)
{
}
#if ENABLE_AVX512_SIMD16
-INLINE simd4scalar swizzleLane0(const simd16vector &v)
+INLINE simd4scalar swizzleLane0(const simd16vector& v)
{
- return swizzleLane0(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane0(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane1(const simd16vector &v)
+INLINE simd4scalar swizzleLane1(const simd16vector& v)
{
- return swizzleLane1(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane1(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane2(const simd16vector &v)
+INLINE simd4scalar swizzleLane2(const simd16vector& v)
{
- return swizzleLane2(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane2(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane3(const simd16vector &v)
+INLINE simd4scalar swizzleLane3(const simd16vector& v)
{
- return swizzleLane3(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane3(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane4(const simd16vector &v)
+INLINE simd4scalar swizzleLane4(const simd16vector& v)
{
- return swizzleLane4(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane4(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane5(const simd16vector &v)
+INLINE simd4scalar swizzleLane5(const simd16vector& v)
{
- return swizzleLane5(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane5(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane6(const simd16vector &v)
+INLINE simd4scalar swizzleLane6(const simd16vector& v)
{
- return swizzleLane6(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane6(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane7(const simd16vector &v)
+INLINE simd4scalar swizzleLane7(const simd16vector& v)
{
- return swizzleLane7(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+ return swizzleLane7(_simd16_extract_ps(v.x, 0),
+ _simd16_extract_ps(v.y, 0),
+ _simd16_extract_ps(v.z, 0),
+ _simd16_extract_ps(v.w, 0));
}
-INLINE simd4scalar swizzleLane8(const simd16vector &v)
+INLINE simd4scalar swizzleLane8(const simd16vector& v)
{
- return swizzleLane0(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane0(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLane9(const simd16vector &v)
+INLINE simd4scalar swizzleLane9(const simd16vector& v)
{
- return swizzleLane1(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane1(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLaneA(const simd16vector &v)
+INLINE simd4scalar swizzleLaneA(const simd16vector& v)
{
- return swizzleLane2(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane2(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLaneB(const simd16vector &v)
+INLINE simd4scalar swizzleLaneB(const simd16vector& v)
{
- return swizzleLane3(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane3(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLaneC(const simd16vector &v)
+INLINE simd4scalar swizzleLaneC(const simd16vector& v)
{
- return swizzleLane4(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane4(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLaneD(const simd16vector &v)
+INLINE simd4scalar swizzleLaneD(const simd16vector& v)
{
- return swizzleLane5(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane5(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLaneE(const simd16vector &v)
+INLINE simd4scalar swizzleLaneE(const simd16vector& v)
{
- return swizzleLane6(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane6(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLaneF(const simd16vector &v)
+INLINE simd4scalar swizzleLaneF(const simd16vector& v)
{
- return swizzleLane7(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+ return swizzleLane7(_simd16_extract_ps(v.x, 1),
+ _simd16_extract_ps(v.y, 1),
+ _simd16_extract_ps(v.z, 1),
+ _simd16_extract_ps(v.w, 1));
}
-INLINE simd4scalar swizzleLaneN(const simd16vector &v, int lane)
+INLINE simd4scalar swizzleLaneN(const simd16vector& v, int lane)
{
switch (lane)
{
{
uint32_t input_cp = primIndex * TotalControlPoints + cp;
#if USE_SIMD16_FRONTEND
- uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
+ uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
#else
- uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
+ uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
uint32_t input_lane = input_cp % KNOB_SIMD_WIDTH;
#endif
for (uint32_t i = 0; i < 4; ++i)
{
#if USE_SIMD16_FRONTEND
- const float* pInputVec = (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
+ const float* pInputVec =
+ (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
#else
const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
#endif
}
}
-template<uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
+template <uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
static bool PaPatchList(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
- SetNextPaState(
- pa,
- PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
- PaPatchListSingle<TotalControlPoints>);
+ SetNextPaState(pa,
+ PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
+ PaPatchListSingle<TotalControlPoints>);
return false;
}
-template<uint32_t TotalControlPoints>
+template <uint32_t TotalControlPoints>
static bool PaPatchListTerm(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
// We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
for (uint32_t lane = 0; lane < KNOB_SIMD_WIDTH; ++lane)
{
#if USE_SIMD16_FRONTEND
- uint32_t input_cp = (lane + lane_offset) * TotalControlPoints + cp;
- uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
+ uint32_t input_cp = (lane + lane_offset) * TotalControlPoints + cp;
+ uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
- const float* pInputVec = (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
+ const float* pInputVec =
+ (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
#else
- uint32_t input_cp = lane * TotalControlPoints + cp;
- uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
+ uint32_t input_cp = lane * TotalControlPoints + cp;
+ uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
uint32_t input_lane = input_cp % KNOB_SIMD_WIDTH;
const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
}
}
- SetNextPaState(
- pa,
- PaPatchList<TotalControlPoints>,
- PaPatchListSingle<TotalControlPoints>,
- 0,
- PA_STATE_OPT::SIMD_WIDTH,
- true);
+ SetNextPaState(pa,
+ PaPatchList<TotalControlPoints>,
+ PaPatchListSingle<TotalControlPoints>,
+ 0,
+ PA_STATE_OPT::SIMD_WIDTH,
+ true);
return true;
}
#if ENABLE_AVX512_SIMD16
-template<uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
+template <uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
static bool PaPatchList_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
- SetNextPaState_simd16(
- pa,
- PaPatchList_simd16<TotalControlPoints, CurrentControlPoints + 1>,
- PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
- PaPatchListSingle<TotalControlPoints>);
+ SetNextPaState_simd16(pa,
+ PaPatchList_simd16<TotalControlPoints, CurrentControlPoints + 1>,
+ PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
+ PaPatchListSingle<TotalControlPoints>);
return false;
}
-template<uint32_t TotalControlPoints>
+template <uint32_t TotalControlPoints>
static bool PaPatchListTerm_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
// We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
float vec[KNOB_SIMD16_WIDTH];
for (uint32_t lane = 0; lane < KNOB_SIMD16_WIDTH; ++lane)
{
- uint32_t input_cp = lane * TotalControlPoints + cp;
- uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
+ uint32_t input_cp = lane * TotalControlPoints + cp;
+ uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
- vec[lane] = pInputVec[input_lane];
+ vec[lane] = pInputVec[input_lane];
}
verts[cp][i] = _simd16_loadu_ps(vec);
}
}
- SetNextPaState_simd16(
- pa,
- PaPatchList_simd16<TotalControlPoints>,
- PaPatchList<TotalControlPoints>,
- PaPatchListSingle<TotalControlPoints>,
- 0,
- PA_STATE_OPT::SIMD_WIDTH,
- true);
+ SetNextPaState_simd16(pa,
+ PaPatchList_simd16<TotalControlPoints>,
+ PaPatchList<TotalControlPoints>,
+ PaPatchListSingle<TotalControlPoints>,
+ 0,
+ PA_STATE_OPT::SIMD_WIDTH,
+ true);
return true;
}
#endif
-#define PA_PATCH_LIST_TERMINATOR(N) \
- template<> bool PaPatchList<N, N>(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])\
- { return PaPatchListTerm<N>(pa, slot, verts); }
+#define PA_PATCH_LIST_TERMINATOR(N) \
+ template <> \
+ bool PaPatchList<N, N>(PA_STATE_OPT & pa, uint32_t slot, simdvector verts[]) \
+ { \
+ return PaPatchListTerm<N>(pa, slot, verts); \
+ }
PA_PATCH_LIST_TERMINATOR(1)
PA_PATCH_LIST_TERMINATOR(2)
PA_PATCH_LIST_TERMINATOR(3)
#undef PA_PATCH_LIST_TERMINATOR
#if ENABLE_AVX512_SIMD16
-#define PA_PATCH_LIST_TERMINATOR_SIMD16(N) \
- template<> bool PaPatchList_simd16<N, N>(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])\
- { return PaPatchListTerm_simd16<N>(pa, slot, verts); }
+#define PA_PATCH_LIST_TERMINATOR_SIMD16(N) \
+ template <> \
+ bool PaPatchList_simd16<N, N>(PA_STATE_OPT & pa, uint32_t slot, simd16vector verts[]) \
+ { \
+ return PaPatchListTerm_simd16<N>(pa, slot, verts); \
+ }
PA_PATCH_LIST_TERMINATOR_SIMD16(1)
PA_PATCH_LIST_TERMINATOR_SIMD16(2)
PA_PATCH_LIST_TERMINATOR_SIMD16(3)
bool PaTriList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SetNextPaState(pa, PaTriList1, PaTriListSingle0);
- return false; // Not enough vertices to assemble 4 or 8 triangles.
+ return false; // Not enough vertices to assemble 4 or 8 triangles.
}
bool PaTriList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SetNextPaState(pa, PaTriList2, PaTriListSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
+ return false; // Not enough vertices to assemble 8 triangles.
}
bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector &c_16 = PaGetSimdVector_simd16(pa, 2, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& c_16 = PaGetSimdVector_simd16(pa, 2, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
#else
- simdvector &a = PaGetSimdVector(pa, 0, slot);
- simdvector &b = PaGetSimdVector(pa, 1, slot);
- simdvector &c = PaGetSimdVector(pa, 2, slot);
+ simdvector& a = PaGetSimdVector(pa, 0, slot);
+ simdvector& b = PaGetSimdVector(pa, 1, slot);
+ simdvector& c = PaGetSimdVector(pa, 2, slot);
#endif
simdscalar s;
for (int i = 0; i < 4; ++i)
{
simdvector& v0 = verts[0];
- v0[i] = _simd_blend_ps(a[i], b[i], 0x92);
- v0[i] = _simd_blend_ps(v0[i], c[i], 0x24);
- v0[i] = _simd_permute_ps_i(v0[i], 0x6C);
- s = _simd_permute2f128_ps(v0[i], v0[i], 0x21);
- v0[i] = _simd_blend_ps(v0[i], s, 0x44);
+ v0[i] = _simd_blend_ps(a[i], b[i], 0x92);
+ v0[i] = _simd_blend_ps(v0[i], c[i], 0x24);
+ v0[i] = _simd_permute_ps_i(v0[i], 0x6C);
+ s = _simd_permute2f128_ps(v0[i], v0[i], 0x21);
+ v0[i] = _simd_blend_ps(v0[i], s, 0x44);
simdvector& v1 = verts[1];
- v1[i] = _simd_blend_ps(a[i], b[i], 0x24);
- v1[i] = _simd_blend_ps(v1[i], c[i], 0x49);
- v1[i] = _simd_permute_ps_i(v1[i], 0xB1);
- s = _simd_permute2f128_ps(v1[i], v1[i], 0x21);
- v1[i] = _simd_blend_ps(v1[i], s, 0x66);
+ v1[i] = _simd_blend_ps(a[i], b[i], 0x24);
+ v1[i] = _simd_blend_ps(v1[i], c[i], 0x49);
+ v1[i] = _simd_permute_ps_i(v1[i], 0xB1);
+ s = _simd_permute2f128_ps(v1[i], v1[i], 0x21);
+ v1[i] = _simd_blend_ps(v1[i], s, 0x66);
simdvector& v2 = verts[2];
- v2[i] = _simd_blend_ps(a[i], b[i], 0x49);
- v2[i] = _simd_blend_ps(v2[i], c[i], 0x92);
- v2[i] = _simd_permute_ps_i(v2[i], 0xC6);
- s = _simd_permute2f128_ps(v2[i], v2[i], 0x21);
- v2[i] = _simd_blend_ps(v2[i], s, 0x22);
+ v2[i] = _simd_blend_ps(a[i], b[i], 0x49);
+ v2[i] = _simd_blend_ps(v2[i], c[i], 0x92);
+ v2[i] = _simd_permute_ps_i(v2[i], 0xC6);
+ s = _simd_permute2f128_ps(v2[i], v2[i], 0x21);
+ v2[i] = _simd_blend_ps(v2[i], s, 0x22);
}
#elif KNOB_ARCH >= KNOB_ARCH_AVX2
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector &c_16 = PaGetSimdVector_simd16(pa, 2, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& c_16 = PaGetSimdVector_simd16(pa, 2, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
#else
- const simdvector &a = PaGetSimdVector(pa, 0, slot);
- const simdvector &b = PaGetSimdVector(pa, 1, slot);
- const simdvector &c = PaGetSimdVector(pa, 2, slot);
+ const simdvector& a = PaGetSimdVector(pa, 0, slot);
+ const simdvector& b = PaGetSimdVector(pa, 1, slot);
+ const simdvector& c = PaGetSimdVector(pa, 2, slot);
#endif
// v0 -> a0 a3 a6 b1 b4 b7 c2 c5
// v1 -> a1 a4 a7 b2 b5 c0 c3 c6
// v2 -> a2 a5 b0 b3 b6 c1 c4 c7
- simdvector &v0 = verts[0];
- simdvector &v1 = verts[1];
- simdvector &v2 = verts[2];
+ simdvector& v0 = verts[0];
+ simdvector& v1 = verts[1];
+ simdvector& v2 = verts[2];
// for simd x, y, z, and w
for (int i = 0; i < 4; ++i)
bool PaTriList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SetNextPaState_simd16(pa, PaTriList1_simd16, PaTriList1, PaTriListSingle0);
- return false; // Not enough vertices to assemble 16 triangles
+ return false; // Not enough vertices to assemble 16 triangles
}
bool PaTriList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SetNextPaState_simd16(pa, PaTriList2_simd16, PaTriList2, PaTriListSingle0);
- return false; // Not enough vertices to assemble 16 triangles
+ return false; // Not enough vertices to assemble 16 triangles
}
bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
+ // clang-format off
+
#if KNOB_ARCH >= KNOB_ARCH_AVX2
const simd16scalari perm0 = _simd16_set_epi32(13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3, 0);
const simd16scalari perm1 = _simd16_set_epi32(14, 11, 8, 5, 2, 15, 12, 9, 6, 3, 0, 13, 10, 7, 4, 1);
const simd16scalari perm2 = _simd16_set_epi32(15, 12, 9, 6, 3, 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2);
-#else // KNOB_ARCH == KNOB_ARCH_AVX
+#else // KNOB_ARCH == KNOB_ARCH_AVX
simd16scalar perm0 = _simd16_setzero_ps();
simd16scalar perm1 = _simd16_setzero_ps();
simd16scalar perm2 = _simd16_setzero_ps();
#endif
- const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector &c = PaGetSimdVector_simd16(pa, 2, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& c = PaGetSimdVector_simd16(pa, 2, slot);
const simd16mask mask0 = 0x4924;
const simd16mask mask1 = 0x2492;
// v1 -> a1 a4 a7 aA aD b0 b3 b6 b9 bC bF c2 c5 c8 cB cE
// v2 -> a2 a5 a8 aB aE b1 b4 b7 bA bD c0 c3 c6 c9 cC cF
- simd16vector &v0 = verts[0];
- simd16vector &v1 = verts[1];
- simd16vector &v2 = verts[2];
+ simd16vector& v0 = verts[0];
+ simd16vector& v1 = verts[1];
+ simd16vector& v2 = verts[2];
// for simd16 x, y, z, and w
for (int i = 0; i < 4; i += 1)
{
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
- simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float *>(&c[i]));
+ simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+ simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
+ simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float*>(&c[i]));
simd16scalar temp0 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask0), tempc, mask1);
simd16scalar temp1 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask2), tempc, mask0);
v0[i] = _simd16_permute_ps(temp0, perm0);
v1[i] = _simd16_permute_ps(temp1, perm1);
v2[i] = _simd16_permute_ps(temp2, perm2);
-#else // #if KNOB_ARCH == KNOB_ARCH_AVX
-
+#else // #if KNOB_ARCH == KNOB_ARCH_AVX
+
// the general permutes (above) are prohibitively slow to emulate on AVX (its scalar code)
- temp0 = _simd16_permute_ps_i(temp0, 0x6C); // (0, 3, 2, 1) => 00 11 01 10 => 0x6C
- perm0 = _simd16_permute2f128_ps(temp0, temp0, 0xB1);// (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- temp0 = _simd16_blend_ps(temp0, perm0, 0x4444); // 0010 0010 0010 0010
- perm0 = _simd16_permute2f128_ps(temp0, temp0, 0x4E);// (2, 3, 0, 1) => 10 11 00 01 => 0x4E
- v0[i] = _simd16_blend_ps(temp0, perm0, 0x3838); // 0001 1100 0001 1100
+ temp0 = _simd16_permute_ps_i(temp0, 0x6C); // (0, 3, 2, 1) => 00 11 01 10 => 0x6C
+ perm0 = _simd16_permute2f128_ps(temp0, temp0, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+ temp0 = _simd16_blend_ps(temp0, perm0, 0x4444); // 0010 0010 0010 0010
+ perm0 = _simd16_permute2f128_ps(temp0, temp0, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
+ v0[i] = _simd16_blend_ps(temp0, perm0, 0x3838); // 0001 1100 0001 1100
- temp1 = _simd16_permute_ps_i(temp1, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- perm1 = _simd16_permute2f128_ps(temp1, temp1, 0xB1);// (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- temp1 = _simd16_blend_ps(temp1, perm1, 0x6666); // 0010 0010 0010 0010
- perm1 = _simd16_permute2f128_ps(temp1, temp1, 0x4E);// (2, 3, 0, 1) => 10 11 00 01 => 0x4E
- v1[i] = _simd16_blend_ps(temp1, perm1, 0x1818); // 0001 1000 0001 1000
+ temp1 = _simd16_permute_ps_i(temp1, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+ perm1 = _simd16_permute2f128_ps(temp1, temp1, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+ temp1 = _simd16_blend_ps(temp1, perm1, 0x6666); // 0010 0010 0010 0010
+ perm1 = _simd16_permute2f128_ps(temp1, temp1, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
+ v1[i] = _simd16_blend_ps(temp1, perm1, 0x1818); // 0001 1000 0001 1000
- temp2 = _simd16_permute_ps_i(temp2, 0xC6); // (2, 1, 0, 3) => 01 10 00 11 => 0xC6
- perm2 = _simd16_permute2f128_ps(temp2, temp2, 0xB1);// (1, 0, 3, 2) => 01 00 11 10 => 0xB1
- temp2 = _simd16_blend_ps(temp2, perm2, 0x2222); // 0100 0100 0100 0100
- perm2 = _simd16_permute2f128_ps(temp2, temp2, 0x4E);// (2, 3, 0, 1) => 10 11 00 01 => 0x4E
- v2[i] = _simd16_blend_ps(temp2, perm2, 0x1C1C); // 0011 1000 0011 1000
+ temp2 = _simd16_permute_ps_i(temp2, 0xC6); // (2, 1, 0, 3) => 01 10 00 11 => 0xC6
+ perm2 = _simd16_permute2f128_ps(temp2, temp2, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+ temp2 = _simd16_blend_ps(temp2, perm2, 0x2222); // 0100 0100 0100 0100
+ perm2 = _simd16_permute2f128_ps(temp2, temp2, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
+ v2[i] = _simd16_blend_ps(temp2, perm2, 0x1C1C); // 0011 1000 0011 1000
#endif
}
SetNextPaState_simd16(pa, PaTriList0_simd16, PaTriList0, PaTriListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
return true;
+
+ // clang-format on
}
#endif
void PaTriListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
#if USE_SIMD16_FRONTEND
- const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
- const simd16vector &c = PaGetSimdVector_simd16(pa, 2, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& c = PaGetSimdVector_simd16(pa, 2, slot);
if (pa.useAlternateOffset)
{
// hold at least 8 triangles worth of data. We want to assemble a single
// triangle with data in horizontal form.
- const simdvector &a = PaGetSimdVector(pa, 0, slot);
- const simdvector &b = PaGetSimdVector(pa, 1, slot);
- const simdvector &c = PaGetSimdVector(pa, 2, slot);
+ const simdvector& a = PaGetSimdVector(pa, 0, slot);
+ const simdvector& b = PaGetSimdVector(pa, 1, slot);
+ const simdvector& c = PaGetSimdVector(pa, 2, slot);
// Convert from vertical to horizontal.
// Tri Pattern - provoking vertex is always v0
bool PaTriStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SetNextPaState(pa, PaTriStrip1, PaTriStripSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
+ return false; // Not enough vertices to assemble 8 triangles.
}
bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
#else
- simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
- simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+ simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+ simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
#endif
simdscalar s;
- for(int i = 0; i < 4; ++i)
+ for (int i = 0; i < 4; ++i)
{
simdscalar a0 = a[i];
simdscalar b0 = b[i];
// v1 -> 13355779
// v2 -> 22446688
simdvector& v0 = verts[0];
- v0[i] = a0;
+ v0[i] = a0;
- // s -> 4567891011
+ // s -> 4567891011
s = _simd_permute2f128_ps(a0, b0, 0x21);
// s -> 23456789
s = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
return true;
}
-#if ENABLE_AVX512_SIMD16
+#if ENABLE_AVX512_SIMD16
bool PaTriStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SetNextPaState_simd16(pa, PaTriStrip1_simd16, PaTriStrip1, PaTriStripSingle0);
- return false; // Not enough vertices to assemble 16 triangles.
+ return false; // Not enough vertices to assemble 16 triangles.
}
bool PaTriStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
- const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ // clang-format off
+
+ const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
const simd16mask mask0 = 0xF000;
// v1 -> a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
// v2 -> a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
- simd16vector &v0 = verts[0];
- simd16vector &v1 = verts[1];
- simd16vector &v2 = verts[2];
+ simd16vector& v0 = verts[0];
+ simd16vector& v1 = verts[1];
+ simd16vector& v2 = verts[2];
// for simd16 x, y, z, and w
for (int i = 0; i < 4; i += 1)
{
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+ simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+ simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
- simd16scalar perm0 = _simd16_permute2f128_ps(tempa, tempa, 0x39);// (0 3 2 1) = 00 11 10 01 // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF a0 a1 a2 a3
- simd16scalar perm1 = _simd16_permute2f128_ps(tempb, tempb, 0x39);// (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
+ simd16scalar perm0 = _simd16_permute2f128_ps(tempa, tempa, 0x39); // (0 3 2 1) = 00 11 10 01 // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF a0 a1 a2 a3
+ simd16scalar perm1 = _simd16_permute2f128_ps(tempb, tempb, 0x39); // (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
- simd16scalar blend = _simd16_blend_ps(perm0, perm1, mask0); // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1 b2 b3
- simd16scalar shuff = _simd16_shuffle_ps(tempa, blend, _MM_SHUFFLE(1, 0, 3, 2)); // a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1
+ simd16scalar blend = _simd16_blend_ps(perm0, perm1, mask0); // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1 b2 b3
+ simd16scalar shuff = _simd16_shuffle_ps(tempa, blend, _MM_SHUFFLE(1, 0, 3, 2)); // a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1
- v0[i] = tempa; // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
- v1[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(3, 1, 3, 1)); // a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
- v2[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(2, 2, 2, 2)); // a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
+ v0[i] = tempa; // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
+ v1[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(3, 1, 3, 1)); // a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
+ v2[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(2, 2, 2, 2)); // a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
}
SetNextPaState_simd16(pa, PaTriStrip1_simd16, PaTriStrip1, PaTriStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
return true;
+
+ // clang-format on
}
#endif
void PaTriStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
#if USE_SIMD16_FRONTEND
- const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
if (pa.useAlternateOffset)
{
break;
};
#else
- const simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+ const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+ const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
// Convert from vertical to horizontal.
// Tri Pattern - provoking vertex is always v0
bool PaTriFan0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SetNextPaState(pa, PaTriFan1, PaTriFanSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
+ return false; // Not enough vertices to assemble 8 triangles.
}
bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
simdvector a;
simdvector b;
- const simd16vector &leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
+ const simd16vector& leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
#else
- const simdvector &leadVert = PaGetSimdVector(pa, pa.first, slot);
- const simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+ const simdvector& leadVert = PaGetSimdVector(pa, pa.first, slot);
+ const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+ const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
#endif
simdscalar s;
// need to fill vectors 1/2 with new verts, and v0 with anchor vert.
- for(int i = 0; i < 4; ++i)
+ for (int i = 0; i < 4; ++i)
{
simdscalar a0 = a[i];
simdscalar b0 = b[i];
simdscalar comp = leadVert[i];
simdvector& v0 = verts[0];
- v0[i] = _simd_shuffle_ps(comp, comp, _MM_SHUFFLE(0, 0, 0, 0));
- v0[i] = _simd_permute2f128_ps(v0[i], comp, 0x00);
+ v0[i] = _simd_shuffle_ps(comp, comp, _MM_SHUFFLE(0, 0, 0, 0));
+ v0[i] = _simd_permute2f128_ps(v0[i], comp, 0x00);
simdvector& v2 = verts[2];
- s = _simd_permute2f128_ps(a0, b0, 0x21);
- v2[i] = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
+ s = _simd_permute2f128_ps(a0, b0, 0x21);
+ v2[i] = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
simdvector& v1 = verts[1];
- v1[i] = _simd_shuffle_ps(a0, v2[i], _MM_SHUFFLE(2, 1, 2, 1));
+ v1[i] = _simd_shuffle_ps(a0, v2[i], _MM_SHUFFLE(2, 1, 2, 1));
}
SetNextPaState(pa, PaTriFan1, PaTriFanSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SetNextPaState_simd16(pa, PaTriFan1_simd16, PaTriFan1, PaTriFanSingle0);
- return false; // Not enough vertices to assemble 16 triangles.
+ return false; // Not enough vertices to assemble 16 triangles.
}
bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
- const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ // clang-format off
+
+ const simd16vector& a = PaGetSimdVector_simd16(pa, pa.first, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& c = PaGetSimdVector_simd16(pa, pa.cur, slot);
const simd16mask mask0 = 0xF000;
// v1 -> b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
// v2 -> b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
- simd16vector &v0 = verts[0];
- simd16vector &v1 = verts[1];
- simd16vector &v2 = verts[2];
+ simd16vector& v0 = verts[0];
+ simd16vector& v1 = verts[1];
+ simd16vector& v2 = verts[2];
// for simd16 x, y, z, and w
for (uint32_t i = 0; i < 4; i += 1)
{
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
- simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float *>(&c[i]));
+ simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+ simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
+ simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float*>(&c[i]));
- simd16scalar shuff = _simd16_shuffle_ps(tempa, tempa, _MM_SHUFFLE(0, 0, 0, 0)); // a0 a0 a0 a0 a4 a4 a4 a4 a0 a0 a0 a0 a4 a4 a4 a4
+ simd16scalar shuff = _simd16_shuffle_ps(tempa, tempa, _MM_SHUFFLE(0, 0, 0, 0)); // a0 a0 a0 a0 a4 a4 a4 a4 a0 a0 a0 a0 a4 a4 a4 a4
- v0[i] = _simd16_permute2f128_ps(shuff, shuff, 0x00); // a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0
+ v0[i] = _simd16_permute2f128_ps(shuff, shuff, 0x00); // a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0
- simd16scalar temp0 = _simd16_permute2f128_ps(tempb, tempb, 0x39);// (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
- simd16scalar temp1 = _simd16_permute2f128_ps(tempc, tempc, 0x39);// (0 3 2 1) = 00 11 10 01 // c4 c5 c6 c7 c8 c9 cA cB cC cD cE cF c0 c1 c2 c3
+ simd16scalar temp0 = _simd16_permute2f128_ps(tempb, tempb, 0x39); // (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
+ simd16scalar temp1 = _simd16_permute2f128_ps(tempc, tempc, 0x39); // (0 3 2 1) = 00 11 10 01 // c4 c5 c6 c7 c8 c9 cA cB cC cD cE cF c0 c1 c2 c3
- simd16scalar blend = _simd16_blend_ps(temp0, temp1, mask0); // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1 c2 c3
-#if 0
+ simd16scalar blend = _simd16_blend_ps(temp0, temp1, mask0); // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1 c2 c3
- v2[i] = _simd16_shuffle_ps(tempb, blend, _MM_SHUFFLE(1, 0, 3, 2)); // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
- v1[i] = _simd16_shuffle_ps(tempb, v2[i], _MM_SHUFFLE(2, 1, 2, 1)); // b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
-#else
-
- simd16scalar temp2 = _simd16_shuffle_ps(tempb, blend, _MM_SHUFFLE(1, 0, 3, 2)); // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
+ simd16scalar temp2 = _simd16_shuffle_ps(tempb, blend, _MM_SHUFFLE(1, 0, 3, 2)); // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
- v1[i] = _simd16_shuffle_ps(tempb, temp2, _MM_SHUFFLE(2, 1, 2, 1)); // b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
- v2[i] = temp2; // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
-#endif
+ v1[i] = _simd16_shuffle_ps(tempb, temp2, _MM_SHUFFLE(2, 1, 2, 1)); // b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
+ v2[i] = temp2; // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
}
SetNextPaState_simd16(pa, PaTriFan1_simd16, PaTriFan1, PaTriFanSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
return true;
+
+ // clang-format on
}
#endif
void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
#if USE_SIMD16_FRONTEND
- const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, pa.first, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& c = PaGetSimdVector_simd16(pa, pa.cur, slot);
if (pa.useAlternateOffset)
{
verts[2] = swizzleLaneN(c, primIndex - 14);
}
#else
- const simdvector &a = PaGetSimdVector(pa, pa.first, slot);
- const simdvector &b = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector &c = PaGetSimdVector(pa, pa.cur, slot);
+ const simdvector& a = PaGetSimdVector(pa, pa.first, slot);
+ const simdvector& b = PaGetSimdVector(pa, pa.prev, slot);
+ const simdvector& c = PaGetSimdVector(pa, pa.cur, slot);
// vert 0 from leading vertex
verts[0] = swizzleLane0(a);
bool PaQuadList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SetNextPaState(pa, PaQuadList1, PaQuadListSingle0);
- return false; // Not enough vertices to assemble 8 triangles.
+ return false; // Not enough vertices to assemble 8 triangles.
}
bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
#else
- simdvector &a = PaGetSimdVector(pa, 0, slot);
- simdvector &b = PaGetSimdVector(pa, 1, slot);
+ simdvector& a = PaGetSimdVector(pa, 0, slot);
+ simdvector& b = PaGetSimdVector(pa, 1, slot);
#endif
simdscalar s1, s2;
- for(int i = 0; i < 4; ++i)
+ for (int i = 0; i < 4; ++i)
{
simdscalar a0 = a[i];
simdscalar b0 = b[i];
s2 = _mm256_permute2f128_ps(a0, b0, 0x31);
simdvector& v0 = verts[0];
- v0[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(0, 0, 0, 0));
+ v0[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(0, 0, 0, 0));
simdvector& v1 = verts[1];
- v1[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(2, 1, 2, 1));
+ v1[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(2, 1, 2, 1));
simdvector& v2 = verts[2];
- v2[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(3, 2, 3, 2));
+ v2[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(3, 2, 3, 2));
}
SetNextPaState(pa, PaQuadList0, PaQuadListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
bool PaQuadList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SetNextPaState_simd16(pa, PaQuadList1_simd16, PaQuadList1, PaQuadListSingle0);
- return false; // Not enough vertices to assemble 16 triangles.
+ return false; // Not enough vertices to assemble 16 triangles.
}
bool PaQuadList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
- const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+ // clang-format off
+
+ const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
// v0 -> a0 a0 a4 a4 a8 a8 aC aC b0 b0 b0 b0 b0 b0 bC bC
// v1 -> a1 a2 a5 a6 a9 aA aD aE b1 b2 b5 b6 b9 bA bD bE
// v2 -> a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
- simd16vector &v0 = verts[0];
- simd16vector &v1 = verts[1];
- simd16vector &v2 = verts[2];
+ simd16vector& v0 = verts[0];
+ simd16vector& v1 = verts[1];
+ simd16vector& v2 = verts[2];
// for simd16 x, y, z, and w
for (uint32_t i = 0; i < 4; i += 1)
{
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+ simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+ simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
- simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88);// (2 0 2 0) = 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b8 b9 bA bB
- simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD);// (3 1 3 1) = 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
+ simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88); // (2 0 2 0) = 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b8 b9 bA bB
+ simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD); // (3 1 3 1) = 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
- v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(0, 0, 0, 0)); // a0 a0 a4 a4 a8 a8 aC aC b0 b0 b4 b4 b8 b8 bC bC
- v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 1, 2, 1)); // a1 a2 a5 a6 a9 aA aD aE b1 b2 b6 b6 b9 bA bD bE
- v2[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 2, 3, 2)); // a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
+ v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(0, 0, 0, 0)); // a0 a0 a4 a4 a8 a8 aC aC b0 b0 b4 b4 b8 b8 bC bC
+ v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 1, 2, 1)); // a1 a2 a5 a6 a9 aA aD aE b1 b2 b6 b6 b9 bA bD bE
+ v2[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 2, 3, 2)); // a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
}
SetNextPaState_simd16(pa, PaQuadList0_simd16, PaQuadList0, PaQuadListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
return true;
+
+ // clang-format on
}
#endif
void PaQuadListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
#if USE_SIMD16_FRONTEND
- const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
if (pa.useAlternateOffset)
{
break;
}
#else
- const simdvector &a = PaGetSimdVector(pa, 0, slot);
- const simdvector &b = PaGetSimdVector(pa, 1, slot);
+ const simdvector& a = PaGetSimdVector(pa, 0, slot);
+ const simdvector& b = PaGetSimdVector(pa, 1, slot);
switch (primIndex)
{
#if USE_SIMD16_FRONTEND
simdvector first;
- const simd16vector &first_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
+ const simd16vector& first_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
if (!pa.useAlternateOffset)
{
}
#else
- simdvector &first = PaGetSimdVector(pa, pa.first, slot);
+ simdvector& first = PaGetSimdVector(pa, pa.first, slot);
#endif
for (int i = 0; i < 4; i++)
{
- float *firstVtx = (float *)&(first[i]);
- float *targetVtx = (float *)&(verts[1][i]);
- targetVtx[lane] = firstVtx[0];
+ float* firstVtx = (float*)&(first[i]);
+ float* targetVtx = (float*)&(verts[1][i]);
+ targetVtx[lane] = firstVtx[0];
}
}
// loop reconnect now
const int lane = pa.numPrims - pa.numPrimsComplete - 1;
- const simd16vector &first = PaGetSimdVector_simd16(pa, pa.first, slot);
+ const simd16vector& first = PaGetSimdVector_simd16(pa, pa.first, slot);
for (int i = 0; i < 4; i++)
{
- float *firstVtx = (float *)&(first[i]);
- float *targetVtx = (float *)&(verts[1][i]);
- targetVtx[lane] = firstVtx[0];
+ float* firstVtx = (float*)&(first[i]);
+ float* targetVtx = (float*)&(verts[1][i]);
+ targetVtx[lane] = firstVtx[0];
}
}
- SetNextPaState_simd16(pa, PaLineLoop1_simd16, PaLineLoop1, PaLineLoopSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
+ SetNextPaState_simd16(
+ pa, PaLineLoop1_simd16, PaLineLoop1, PaLineLoopSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
return true;
}
if (pa.numPrimsComplete + primIndex == pa.numPrims - 1)
{
#if USE_SIMD16_FRONTEND
- const simd16vector &first = PaGetSimdVector_simd16(pa, pa.first, slot);
+ const simd16vector& first = PaGetSimdVector_simd16(pa, pa.first, slot);
verts[1] = swizzleLane0(first);
#else
- const simdvector &first = PaGetSimdVector(pa, pa.first, slot);
+ const simdvector& first = PaGetSimdVector(pa, pa.first, slot);
verts[1] = swizzleLane0(first);
#endif
bool PaLineList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SetNextPaState(pa, PaLineList1, PaLineListSingle0);
- return false; // Not enough vertices to assemble 8 lines
+ return false; // Not enough vertices to assemble 8 lines
}
bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
#else
- simdvector &a = PaGetSimdVector(pa, 0, slot);
- simdvector &b = PaGetSimdVector(pa, 1, slot);
+ simdvector& a = PaGetSimdVector(pa, 0, slot);
+ simdvector& b = PaGetSimdVector(pa, 1, slot);
#endif
/// @todo: verify provoking vertex is correct
bool PaLineList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SetNextPaState_simd16(pa, PaLineList1_simd16, PaLineList1, PaLineListSingle0);
- return false; // Not enough vertices to assemble 16 lines
+ return false; // Not enough vertices to assemble 16 lines
}
bool PaLineList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
- const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+ // clang-format off
+
+ const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
// v0 -> a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
// v1 -> a1 a3 a5 a7 a9 aB aD aF b1 b3 b4 b7 b9 bB bD bF
- simd16vector &v0 = verts[0];
- simd16vector &v1 = verts[1];
+ simd16vector& v0 = verts[0];
+ simd16vector& v1 = verts[1];
// for simd16 x, y, z, and w
for (int i = 0; i < 4; i += 1)
{
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+ simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+ simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
- simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88);// (2 0 2 0) 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b9 b9 bA bB
- simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD);// (3 1 3 1) 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
+ simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88); // (2 0 2 0) 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b9 b9 bA bB
+ simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD); // (3 1 3 1) 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
- v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
- v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 1, 3, 1)); // a1 a3 a5 a7 a9 aB aD aF b1 b3 b5 b7 b9 bB bD bF
+ v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
+ v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 1, 3, 1)); // a1 a3 a5 a7 a9 aB aD aF b1 b3 b5 b7 b9 bB bD bF
}
SetNextPaState_simd16(pa, PaLineList0_simd16, PaLineList0, PaLineListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
return true;
+
+ // clang-format on
}
#endif
void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
#if USE_SIMD16_FRONTEND
- const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
if (pa.useAlternateOffset)
{
break;
}
#else
- const simdvector &a = PaGetSimdVector(pa, 0, slot);
- const simdvector &b = PaGetSimdVector(pa, 1, slot);
+ const simdvector& a = PaGetSimdVector(pa, 0, slot);
+ const simdvector& b = PaGetSimdVector(pa, 1, slot);
switch (primIndex)
{
bool PaLineStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SetNextPaState(pa, PaLineStrip1, PaLineStripSingle0);
- return false; // Not enough vertices to assemble 8 lines
+ return false; // Not enough vertices to assemble 8 lines
}
bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
#else
- simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
- simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+ simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+ simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
#endif
/// @todo: verify provoking vertex is correct
verts[0] = a;
- for(uint32_t i = 0; i < 4; ++i)
+ for (uint32_t i = 0; i < 4; ++i)
{
// 1 2 3 x 5 6 7 x
__m256 vPermA = _mm256_permute_ps(a.v[i], 0x39); // indices hi->low 00 11 10 01 (0 3 2 1)
bool PaLineStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SetNextPaState_simd16(pa, PaLineStrip1_simd16, PaLineStrip1, PaLineStripSingle0);
- return false; // Not enough vertices to assemble 16 lines
+ return false; // Not enough vertices to assemble 16 lines
}
bool PaLineStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
+ // clang-format off
+
const simd16scalari perm = _simd16_set_epi32(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
- const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
const simd16mask mask0 = 0x0001;
// v0 -> a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
// v1 -> a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
- simd16vector &v0 = verts[0];
- simd16vector &v1 = verts[1];
+ simd16vector& v0 = verts[0];
+ simd16vector& v1 = verts[1];
- v0 = a; // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
+ v0 = a; // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
// for simd16 x, y, z, and w
for (int i = 0; i < 4; i += 1)
{
- simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
- simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+ simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+ simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
- simd16scalar temp = _simd16_blend_ps(tempa, tempb, mask0); // b0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
+ simd16scalar temp = _simd16_blend_ps(tempa, tempb, mask0); // b0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
- v1[i] = _simd16_permute_ps(temp, perm); // a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
+ v1[i] = _simd16_permute_ps(temp, perm); // a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
}
SetNextPaState_simd16(pa, PaLineStrip1_simd16, PaLineStrip1, PaLineStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
return true;
+
+ // clang-format on
}
#endif
void PaLineStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
#if USE_SIMD16_FRONTEND
- const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
- const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+ const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
if (pa.useAlternateOffset)
{
break;
}
#else
- const simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
- const simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+ const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+ const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
switch (primIndex)
{
#if USE_SIMD16_FRONTEND
simdvector a;
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
if (!pa.useAlternateOffset)
{
}
#else
- simdvector &a = PaGetSimdVector(pa, 0, slot);
+ simdvector& a = PaGetSimdVector(pa, 0, slot);
#endif
- verts[0] = a; // points only have 1 vertex.
+ verts[0] = a; // points only have 1 vertex.
SetNextPaState(pa, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
return true;
#if ENABLE_AVX512_SIMD16
bool PaPoints0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
- simd16vector &a = PaGetSimdVector_simd16(pa, pa.cur, slot);
+ simd16vector& a = PaGetSimdVector_simd16(pa, pa.cur, slot);
- verts[0] = a; // points only have 1 vertex.
+ verts[0] = a; // points only have 1 vertex.
- SetNextPaState_simd16(pa, PaPoints0_simd16, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
+ SetNextPaState_simd16(
+ pa, PaPoints0_simd16, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
return true;
}
void PaPointsSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
#if USE_SIMD16_FRONTEND
- const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
if (pa.useAlternateOffset)
{
verts[0] = swizzleLaneN(a, primIndex);
#else
- const simdvector &a = PaGetSimdVector(pa, 0, slot);
+ const simdvector& a = PaGetSimdVector(pa, 0, slot);
verts[0] = swizzleLaneN(a, primIndex);
#endif
SetNextPaState(pa, PaRectList1, PaRectListSingle0);
return false;
}
-
+
//////////////////////////////////////////////////////////////////////////
/// @brief State 1 for RECT_LIST topology.
/// Rect lists has the following format.
/// | \ | | \ | | \ | | \ |
/// v1 o---o v4 o---o v7 o---o v10 o---o
/// v0 v3 v6 v9
-///
+///
/// Only 3 vertices of the rectangle are supplied. The 4th vertex is implied.
-///
+///
/// tri0 = { v0, v1, v2 } tri1 = { v0, v2, w } <-- w = v0 - v1 + v2
/// tri2 = { v3, v4, v5 } tri3 = { v3, v5, x } <-- x = v3 - v4 + v5
/// etc.
-///
+///
/// PA outputs 3 simdvectors for each of the triangle vertices v0, v1, v2
/// where v0 contains all the first vertices for 8 triangles.
-///
+///
/// Result:
/// verts[0] = { v0, v0, v3, v3, v6, v6, v9, v9 }
/// verts[1] = { v1, v2, v4, v5, v7, v8, v10, v11 }
///
/// @param pa - State for PA state machine.
/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList1(
- PA_STATE_OPT& pa,
- uint32_t slot,
- simdvector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
- // SIMD vectors a and b are the last two vertical outputs from the vertex shader.
+// SIMD vectors a and b are the last two vertical outputs from the vertex shader.
#if USE_SIMD16_FRONTEND
simdvector a;
simdvector b;
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);;
+ b[i] = _simd16_extract_ps(b_16[i], 1);
+ ;
}
}
#else
- simdvector &a = PaGetSimdVector(pa, 0, slot); // a[] = { v0, v1, v2, v3, v4, v5, v6, v7 }
- simdvector &b = PaGetSimdVector(pa, 1, slot); // b[] = { v8, v9, v10, v11, v12, v13, v14, v15 }
+ simdvector& a = PaGetSimdVector(pa, 0, slot); // a[] = { v0, v1, v2, v3, v4, v5, v6, v7 }
+ simdvector& b = PaGetSimdVector(pa, 1, slot); // b[] = { v8, v9, v10, v11, v12, v13, v14, v15 }
#endif
__m256 tmp0, tmp1, tmp2;
// Loop over each component in the simdvector.
- for(int i = 0; i < 4; ++i)
+ for (int i = 0; i < 4; ++i)
{
- simdvector& v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
- tmp0 = _mm256_permute2f128_ps(b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
- v0[i] = _mm256_blend_ps(a[i], tmp0, 0x20); // v0 = { v0, *, *, v3, *, v9, v6, * } where * is don't care.
- tmp1 = _mm256_permute_ps(v0[i], 0xF0); // tmp1 = { v0, v0, v3, v3, *, *, *, * }
- v0[i] = _mm256_permute_ps(v0[i], 0x5A); // v0 = { *, *, *, *, v6, v6, v9, v9 }
- v0[i] = _mm256_blend_ps(tmp1, v0[i], 0xF0); // v0 = { v0, v0, v3, v3, v6, v6, v9, v9 }
+ simdvector& v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
+ tmp0 = _mm256_permute2f128_ps(
+ b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
+ v0[i] = _mm256_blend_ps(
+ a[i],
+ tmp0,
+ 0x20); // v0 = { v0, *, *, v3, *, v9, v6, * } where * is don't care.
+ tmp1 = _mm256_permute_ps(v0[i], 0xF0); // tmp1 = { v0, v0, v3, v3, *, *, *, * }
+ v0[i] = _mm256_permute_ps(v0[i], 0x5A); // v0 = { *, *, *, *, v6, v6, v9, v9 }
+ v0[i] =
+ _mm256_blend_ps(tmp1, v0[i], 0xF0); // v0 = { v0, v0, v3, v3, v6, v6, v9, v9 }
/// NOTE This is a bit expensive due to conflicts between vertices in 'a' and 'b'.
/// AVX2 should make this much cheaper.
- simdvector& v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
- v1[i] = _mm256_permute_ps(a[i], 0x09); // v1 = { v1, v2, *, *, *, *, *, * }
- tmp1 = _mm256_permute_ps(a[i], 0x43); // tmp1 = { *, *, *, *, v7, *, v4, v5 }
- tmp2 = _mm256_blend_ps(v1[i], tmp1, 0xF0); // tmp2 = { v1, v2, *, *, v7, *, v4, v5 }
- tmp1 = _mm256_permute2f128_ps(tmp2, tmp2, 0x1); // tmp1 = { v7, *, v4, v5, * *, *, * }
- v1[i] = _mm256_permute_ps(tmp0, 0xE0); // v1 = { *, *, *, *, *, v8, v10, v11 }
- v1[i] = _mm256_blend_ps(tmp2, v1[i], 0xE0); // v1 = { v1, v2, *, *, v7, v8, v10, v11 }
- v1[i] = _mm256_blend_ps(v1[i], tmp1, 0x0C); // v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
+ simdvector& v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
+ v1[i] = _mm256_permute_ps(a[i], 0x09); // v1 = { v1, v2, *, *, *, *, *, * }
+ tmp1 = _mm256_permute_ps(a[i], 0x43); // tmp1 = { *, *, *, *, v7, *, v4, v5 }
+ tmp2 = _mm256_blend_ps(v1[i], tmp1, 0xF0); // tmp2 = { v1, v2, *, *, v7, *, v4, v5 }
+ tmp1 = _mm256_permute2f128_ps(tmp2, tmp2, 0x1); // tmp1 = { v7, *, v4, v5, *, *, *, * }
+ v1[i] = _mm256_permute_ps(tmp0, 0xE0); // v1 = { *, *, *, *, *, v8, v10, v11 }
+ v1[i] = _mm256_blend_ps(tmp2, v1[i], 0xE0); // v1 = { v1, v2, *, *, v7, v8, v10, v11 }
+ v1[i] = _mm256_blend_ps(v1[i], tmp1, 0x0C); // v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
// verts[2] = { v2, w, v5, x, v8, y, v11, z }
- simdvector& v2 = verts[2]; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
- v2[i] = _mm256_permute_ps(tmp0, 0x30); // v2 = { *, *, *, *, v8, *, v11, * }
- tmp1 = _mm256_permute_ps(tmp2, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
- v2[i] = _mm256_blend_ps(tmp1, v2[i], 0xF0);
+ simdvector& v2 = verts[2]; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
+ v2[i] = _mm256_permute_ps(tmp0, 0x30); // v2 = { *, *, *, *, v8, *, v11, * }
+ tmp1 = _mm256_permute_ps(tmp2, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
+ v2[i] = _mm256_blend_ps(tmp1, v2[i], 0xF0);
// Need to compute 4th implied vertex for the rectangle.
tmp2 = _mm256_sub_ps(v0[i], v1[i]);
- tmp2 = _mm256_add_ps(tmp2, v2[i]); // tmp2 = { w, *, x, *, y, *, z, * }
- tmp2 = _mm256_permute_ps(tmp2, 0xA0); // tmp2 = { *, w, *, x, *, y, *, z }
- v2[i] = _mm256_blend_ps(v2[i], tmp2, 0xAA); // v2 = { v2, w, v5, x, v8, y, v11, z }
+ tmp2 = _mm256_add_ps(tmp2, v2[i]); // tmp2 = { w, *, x, *, y, *, z, * }
+ tmp2 = _mm256_permute_ps(tmp2, 0xA0); // tmp2 = { *, w, *, x, *, y, *, z }
+ v2[i] = _mm256_blend_ps(v2[i], tmp2, 0xAA); // v2 = { v2, w, v5, x, v8, y, v11, z }
}
SetNextPaState(pa, PaRectList1, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
/// Not implemented unless there is a use case for more then 8 rects.
/// @param pa - State for PA state machine.
/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList2(
- PA_STATE_OPT& pa,
- uint32_t slot,
- simdvector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
{
SWR_INVALID("Is rect list used for anything other then clears?");
SetNextPaState(pa, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
/// | \ | | \ | | \ | | \ |
/// v1 o---o v4 o---o v7 o---o v10 o---o
/// v0 v3 v6 v9
-///
+///
/// Only 3 vertices of the rectangle are supplied. The 4th vertex is implied.
-///
+///
/// tri0 = { v0, v1, v2 } tri1 = { v0, v2, w } <-- w = v0 - v1 + v2
/// tri2 = { v3, v4, v5 } tri3 = { v3, v5, x } <-- x = v3 - v4 + v5
/// etc.
-///
+///
/// PA outputs 3 simdvectors for each of the triangle vertices v0, v1, v2
/// where v0 contains all the first vertices for 8 triangles.
-///
+///
/// Result:
/// verts[0] = { v0, v0, v3, v3, v6, v6, v9, v9 }
/// verts[1] = { v1, v2, v4, v5, v7, v8, v10, v11 }
///
/// @param pa - State for PA state machine.
/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList1_simd16(
- PA_STATE_OPT& pa,
- uint32_t slot,
- simd16vector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
+ // clang-format off
+
simdvector a;
simdvector b;
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot); // a[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 }
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot); // a[] = { v0, v1, v2, v3, v4, v5, v6, v7,
+ // v8, v9, v10, v11, v12, v13, v14, v15 }
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot); // b[] = { v16...but not used by this implementation.. }
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot); // b[] = { v16...but not used by this implementation.. }
for (uint32_t i = 0; i < 4; i += 1)
{
}
}
- simd16vector &v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
- simd16vector &v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
- simd16vector &v2 = verts[2]; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
+ simd16vector& v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
+ simd16vector& v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
+ simd16vector& v2 = verts[2]; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
// Loop over each component in the simdvector.
for (int i = 0; i < 4; i += 1)
{
- simdscalar v0_lo; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
- simdscalar v1_lo; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
- simdscalar v2_lo; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
+ simdscalar v0_lo; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
+ simdscalar v1_lo; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
+ simdscalar v2_lo; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
__m256 tmp0, tmp1, tmp2;
- tmp0 = _mm256_permute2f128_ps(b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
- v0_lo = _mm256_blend_ps(a[i], tmp0, 0x20); // v0 = { v0, *, *, v3, *, v9, v6, * } where * is don't care.
- tmp1 = _mm256_permute_ps(v0_lo, 0xF0); // tmp1 = { v0, v0, v3, v3, *, *, *, * }
- v0_lo = _mm256_permute_ps(v0_lo, 0x5A); // v0 = { *, *, *, *, v6, v6, v9, v9 }
- v0_lo = _mm256_blend_ps(tmp1, v0_lo, 0xF0); // v0 = { v0, v0, v3, v3, v6, v6, v9, v9 }
+ tmp0 = _mm256_permute2f128_ps(b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
+ v0_lo = _mm256_blend_ps(a[i], tmp0, 0x20); // v0 = { v0, *, *, v3, *, v9, v6, * } where * is don't care.
+ tmp1 = _mm256_permute_ps(v0_lo, 0xF0); // tmp1 = { v0, v0, v3, v3, *, *, *, * }
+ v0_lo = _mm256_permute_ps(v0_lo, 0x5A); // v0 = { *, *, *, *, v6, v6, v9, v9 }
+ v0_lo = _mm256_blend_ps(tmp1, v0_lo, 0xF0); // v0 = { v0, v0, v3, v3, v6, v6, v9, v9 }
/// NOTE This is a bit expensive due to conflicts between vertices in 'a' and 'b'.
/// AVX2 should make this much cheaper.
- v1_lo = _mm256_permute_ps(a[i], 0x09); // v1 = { v1, v2, *, *, *, *, *, * }
- tmp1 = _mm256_permute_ps(a[i], 0x43); // tmp1 = { *, *, *, *, v7, *, v4, v5 }
- tmp2 = _mm256_blend_ps(v1_lo, tmp1, 0xF0); // tmp2 = { v1, v2, *, *, v7, *, v4, v5 }
- tmp1 = _mm256_permute2f128_ps(tmp2, tmp2, 0x1); // tmp1 = { v7, *, v4, v5, * *, *, * }
- v1_lo = _mm256_permute_ps(tmp0, 0xE0); // v1 = { *, *, *, *, *, v8, v10, v11 }
- v1_lo = _mm256_blend_ps(tmp2, v1_lo, 0xE0); // v1 = { v1, v2, *, *, v7, v8, v10, v11 }
- v1_lo = _mm256_blend_ps(v1_lo, tmp1, 0x0C); // v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
+ v1_lo = _mm256_permute_ps(a[i], 0x09); // v1 = { v1, v2, *, *, *, *, *, * }
+ tmp1 = _mm256_permute_ps(a[i], 0x43); // tmp1 = { *, *, *, *, v7, *, v4, v5 }
+ tmp2 = _mm256_blend_ps(v1_lo, tmp1, 0xF0); // tmp2 = { v1, v2, *, *, v7, *, v4, v5 }
+ tmp1 = _mm256_permute2f128_ps(tmp2, tmp2, 0x1); // tmp1 = { v7, *, v4, v5, *, *, *, * }
+ v1_lo = _mm256_permute_ps(tmp0, 0xE0); // v1 = { *, *, *, *, *, v8, v10, v11 }
+ v1_lo = _mm256_blend_ps(tmp2, v1_lo, 0xE0); // v1 = { v1, v2, *, *, v7, v8, v10, v11 }
+ v1_lo = _mm256_blend_ps(v1_lo, tmp1, 0x0C); // v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
// verts[2] = { v2, w, v5, x, v8, y, v11, z }
- v2_lo = _mm256_permute_ps(tmp0, 0x30); // v2 = { *, *, *, *, v8, *, v11, * }
- tmp1 = _mm256_permute_ps(tmp2, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
+ v2_lo = _mm256_permute_ps(tmp0, 0x30); // v2 = { *, *, *, *, v8, *, v11, * }
+ tmp1 = _mm256_permute_ps(tmp2, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
v2_lo = _mm256_blend_ps(tmp1, v2_lo, 0xF0);
// Need to compute 4th implied vertex for the rectangle.
- tmp2 = _mm256_sub_ps(v0_lo, v1_lo);
- tmp2 = _mm256_add_ps(tmp2, v2_lo); // tmp2 = { w, *, x, *, y, *, z, * }
- tmp2 = _mm256_permute_ps(tmp2, 0xA0); // tmp2 = { *, w, *, x, *, y, *, z }
- v2_lo = _mm256_blend_ps(v2_lo, tmp2, 0xAA); // v2 = { v2, w, v5, x, v8, y, v11, z }
+ tmp2 = _mm256_sub_ps(v0_lo, v1_lo);
+ tmp2 = _mm256_add_ps(tmp2, v2_lo); // tmp2 = { w, *, x, *, y, *, z, * }
+ tmp2 = _mm256_permute_ps(tmp2, 0xA0); // tmp2 = { *, w, *, x, *, y, *, z }
+ v2_lo = _mm256_blend_ps(v2_lo, tmp2, 0xAA); // v2 = { v2, w, v5, x, v8, y, v11, z }
v0[i] = _simd16_insert_ps(_simd16_setzero_ps(), v0_lo, 0);
v1[i] = _simd16_insert_ps(_simd16_setzero_ps(), v1_lo, 0);
SetNextPaState_simd16(pa, PaRectList1_simd16, PaRectList1, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
return true;
+
+ // clang-format on
}
//////////////////////////////////////////////////////////////////////////
/// Not implemented unless there is a use case for more then 8 rects.
/// @param pa - State for PA state machine.
/// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList2_simd16(
- PA_STATE_OPT& pa,
- uint32_t slot,
- simd16vector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
{
SWR_INVALID("Is rect list used for anything other then clears?");
- SetNextPaState_simd16(pa, PaRectList0_simd16, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
+ SetNextPaState_simd16(
+ pa, PaRectList0_simd16, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
return true;
}
/// @param pa - State for PA state machine.
/// @param slot - Index into VS output for a given attribute.
/// @param primIndex - Binner processes each triangle individually.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-void PaRectListSingle0(
- PA_STATE_OPT& pa,
- uint32_t slot,
- uint32_t primIndex,
- simd4scalar verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+void PaRectListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
{
- // We have 12 simdscalars contained within 3 simdvectors which
- // hold at least 8 triangles worth of data. We want to assemble a single
- // triangle with data in horizontal form.
+// We have 12 simdscalars contained within 3 simdvectors which
+// hold at least 8 triangles worth of data. We want to assemble a single
+// triangle with data in horizontal form.
#if USE_SIMD16_FRONTEND
simdvector a;
simdvector b;
if (!pa.useAlternateOffset)
{
- const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+ const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
}
else
{
- const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+ const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
for (uint32_t i = 0; i < 4; i += 1)
{
a[i] = _simd16_extract_ps(b_16[i], 0);
- b[i] = _simd16_extract_ps(b_16[i], 1);;
+ b[i] = _simd16_extract_ps(b_16[i], 1);
+ ;
}
}
#endif
// Convert from vertical to horizontal.
- switch(primIndex)
+ switch (primIndex)
{
case 0:
verts[0] = swizzleLane0(a);
};
}
-PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT *in_pDC, uint32_t in_numPrims, uint8_t* pStream, uint32_t in_streamSizeInVerts,
- uint32_t in_vertexStride, bool in_isStreaming, uint32_t numVertsPerPrim, PRIMITIVE_TOPOLOGY topo) :
- PA_STATE(in_pDC, pStream, in_streamSizeInVerts, in_vertexStride, numVertsPerPrim), numPrims(in_numPrims), numPrimsComplete(0), numSimdPrims(0),
- cur(0), prev(0), first(0), counter(0), reset(false), pfnPaFunc(nullptr), isStreaming(in_isStreaming)
+PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT* in_pDC,
+ uint32_t in_numPrims,
+ uint8_t* pStream,
+ uint32_t in_streamSizeInVerts,
+ uint32_t in_vertexStride,
+ bool in_isStreaming,
+ uint32_t numVertsPerPrim,
+ PRIMITIVE_TOPOLOGY topo) :
+ PA_STATE(in_pDC, pStream, in_streamSizeInVerts, in_vertexStride, numVertsPerPrim),
+ numPrims(in_numPrims), numPrimsComplete(0), numSimdPrims(0), cur(0), prev(0), first(0),
+ counter(0), reset(false), pfnPaFunc(nullptr), isStreaming(in_isStreaming)
{
const API_STATE& state = GetApiState(pDC);
#endif
switch (this->binTopology)
{
- case TOP_TRIANGLE_LIST:
- this->pfnPaFunc = PaTriList0;
+ case TOP_TRIANGLE_LIST:
+ this->pfnPaFunc = PaTriList0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriList0_simd16;
+ this->pfnPaFunc_simd16 = PaTriList0_simd16;
#endif
- break;
- case TOP_TRIANGLE_STRIP:
- this->pfnPaFunc = PaTriStrip0;
+ break;
+ case TOP_TRIANGLE_STRIP:
+ this->pfnPaFunc = PaTriStrip0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
+ this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
#endif
- break;
- case TOP_TRIANGLE_FAN:
- this->pfnPaFunc = PaTriFan0;
+ break;
+ case TOP_TRIANGLE_FAN:
+ this->pfnPaFunc = PaTriFan0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriFan0_simd16;
+ this->pfnPaFunc_simd16 = PaTriFan0_simd16;
#endif
- break;
- case TOP_QUAD_LIST:
- this->pfnPaFunc = PaQuadList0;
+ break;
+ case TOP_QUAD_LIST:
+ this->pfnPaFunc = PaQuadList0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaQuadList0_simd16;
+ this->pfnPaFunc_simd16 = PaQuadList0_simd16;
#endif
- this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
- break;
- case TOP_QUAD_STRIP:
- // quad strip pattern when decomposed into triangles is the same as verts strips
- this->pfnPaFunc = PaTriStrip0;
+ this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
+ break;
+ case TOP_QUAD_STRIP:
+ // quad strip pattern when decomposed into triangles is the same as verts strips
+ this->pfnPaFunc = PaTriStrip0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
+ this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
#endif
- this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
- break;
- case TOP_LINE_LIST:
- this->pfnPaFunc = PaLineList0;
+ this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
+ break;
+ case TOP_LINE_LIST:
+ this->pfnPaFunc = PaLineList0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaLineList0_simd16;
+ this->pfnPaFunc_simd16 = PaLineList0_simd16;
#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_LINE_STRIP:
- this->pfnPaFunc = PaLineStrip0;
+ this->numPrims = in_numPrims;
+ break;
+ case TOP_LINE_STRIP:
+ this->pfnPaFunc = PaLineStrip0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaLineStrip0_simd16;
+ this->pfnPaFunc_simd16 = PaLineStrip0_simd16;
#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_LINE_LOOP:
- this->pfnPaFunc = PaLineLoop0;
+ this->numPrims = in_numPrims;
+ break;
+ case TOP_LINE_LOOP:
+ this->pfnPaFunc = PaLineLoop0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaLineLoop0_simd16;
+ this->pfnPaFunc_simd16 = PaLineLoop0_simd16;
#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_POINT_LIST:
- this->pfnPaFunc = PaPoints0;
+ this->numPrims = in_numPrims;
+ break;
+ case TOP_POINT_LIST:
+ this->pfnPaFunc = PaPoints0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPoints0_simd16;
+ this->pfnPaFunc_simd16 = PaPoints0_simd16;
#endif
- this->numPrims = in_numPrims;
- break;
- case TOP_RECT_LIST:
- this->pfnPaFunc = PaRectList0;
+ this->numPrims = in_numPrims;
+ break;
+ case TOP_RECT_LIST:
+ this->pfnPaFunc = PaRectList0;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaRectList0_simd16;
+ this->pfnPaFunc_simd16 = PaRectList0_simd16;
#endif
- this->numPrims = in_numPrims * 2;
- break;
+ this->numPrims = in_numPrims * 2;
+ break;
- case TOP_PATCHLIST_1:
- this->pfnPaFunc = PaPatchList<1>;
+ case TOP_PATCHLIST_1:
+ this->pfnPaFunc = PaPatchList<1>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<1>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<1>;
#endif
- break;
- case TOP_PATCHLIST_2:
- this->pfnPaFunc = PaPatchList<2>;
+ break;
+ case TOP_PATCHLIST_2:
+ this->pfnPaFunc = PaPatchList<2>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<2>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<2>;
#endif
- break;
- case TOP_PATCHLIST_3:
- this->pfnPaFunc = PaPatchList<3>;
+ break;
+ case TOP_PATCHLIST_3:
+ this->pfnPaFunc = PaPatchList<3>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<3>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<3>;
#endif
- break;
- case TOP_PATCHLIST_4:
- this->pfnPaFunc = PaPatchList<4>;
+ break;
+ case TOP_PATCHLIST_4:
+ this->pfnPaFunc = PaPatchList<4>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<4>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<4>;
#endif
- break;
- case TOP_PATCHLIST_5:
- this->pfnPaFunc = PaPatchList<5>;
+ break;
+ case TOP_PATCHLIST_5:
+ this->pfnPaFunc = PaPatchList<5>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<5>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<5>;
#endif
- break;
- case TOP_PATCHLIST_6:
- this->pfnPaFunc = PaPatchList<6>;
+ break;
+ case TOP_PATCHLIST_6:
+ this->pfnPaFunc = PaPatchList<6>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<6>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<6>;
#endif
- break;
- case TOP_PATCHLIST_7:
- this->pfnPaFunc = PaPatchList<7>;
+ break;
+ case TOP_PATCHLIST_7:
+ this->pfnPaFunc = PaPatchList<7>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<7>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<7>;
#endif
- break;
- case TOP_PATCHLIST_8:
- this->pfnPaFunc = PaPatchList<8>;
+ break;
+ case TOP_PATCHLIST_8:
+ this->pfnPaFunc = PaPatchList<8>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<8>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<8>;
#endif
- break;
- case TOP_PATCHLIST_9:
- this->pfnPaFunc = PaPatchList<9>;
+ break;
+ case TOP_PATCHLIST_9:
+ this->pfnPaFunc = PaPatchList<9>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<9>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<9>;
#endif
- break;
- case TOP_PATCHLIST_10:
- this->pfnPaFunc = PaPatchList<10>;
+ break;
+ case TOP_PATCHLIST_10:
+ this->pfnPaFunc = PaPatchList<10>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<10>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<10>;
#endif
- break;
- case TOP_PATCHLIST_11:
- this->pfnPaFunc = PaPatchList<11>;
+ break;
+ case TOP_PATCHLIST_11:
+ this->pfnPaFunc = PaPatchList<11>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<11>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<11>;
#endif
- break;
- case TOP_PATCHLIST_12:
- this->pfnPaFunc = PaPatchList<12>;
+ break;
+ case TOP_PATCHLIST_12:
+ this->pfnPaFunc = PaPatchList<12>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<12>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<12>;
#endif
- break;
- case TOP_PATCHLIST_13:
- this->pfnPaFunc = PaPatchList<13>;
+ break;
+ case TOP_PATCHLIST_13:
+ this->pfnPaFunc = PaPatchList<13>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<13>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<13>;
#endif
- break;
- case TOP_PATCHLIST_14:
- this->pfnPaFunc = PaPatchList<14>;
+ break;
+ case TOP_PATCHLIST_14:
+ this->pfnPaFunc = PaPatchList<14>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<14>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<14>;
#endif
- break;
- case TOP_PATCHLIST_15:
- this->pfnPaFunc = PaPatchList<15>;
+ break;
+ case TOP_PATCHLIST_15:
+ this->pfnPaFunc = PaPatchList<15>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<15>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<15>;
#endif
- break;
- case TOP_PATCHLIST_16:
- this->pfnPaFunc = PaPatchList<16>;
+ break;
+ case TOP_PATCHLIST_16:
+ this->pfnPaFunc = PaPatchList<16>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<16>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<16>;
#endif
- break;
- case TOP_PATCHLIST_17:
- this->pfnPaFunc = PaPatchList<17>;
+ break;
+ case TOP_PATCHLIST_17:
+ this->pfnPaFunc = PaPatchList<17>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<17>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<17>;
#endif
- break;
- case TOP_PATCHLIST_18:
- this->pfnPaFunc = PaPatchList<18>;
+ break;
+ case TOP_PATCHLIST_18:
+ this->pfnPaFunc = PaPatchList<18>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<18>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<18>;
#endif
- break;
- case TOP_PATCHLIST_19:
- this->pfnPaFunc = PaPatchList<19>;
+ break;
+ case TOP_PATCHLIST_19:
+ this->pfnPaFunc = PaPatchList<19>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<19>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<19>;
#endif
- break;
- case TOP_PATCHLIST_20:
- this->pfnPaFunc = PaPatchList<20>;
+ break;
+ case TOP_PATCHLIST_20:
+ this->pfnPaFunc = PaPatchList<20>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<20>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<20>;
#endif
- break;
- case TOP_PATCHLIST_21:
- this->pfnPaFunc = PaPatchList<21>;
+ break;
+ case TOP_PATCHLIST_21:
+ this->pfnPaFunc = PaPatchList<21>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<21>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<21>;
#endif
- break;
- case TOP_PATCHLIST_22:
- this->pfnPaFunc = PaPatchList<22>;
+ break;
+ case TOP_PATCHLIST_22:
+ this->pfnPaFunc = PaPatchList<22>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<22>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<22>;
#endif
- break;
- case TOP_PATCHLIST_23:
- this->pfnPaFunc = PaPatchList<23>;
+ break;
+ case TOP_PATCHLIST_23:
+ this->pfnPaFunc = PaPatchList<23>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<23>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<23>;
#endif
- break;
- case TOP_PATCHLIST_24:
- this->pfnPaFunc = PaPatchList<24>;
+ break;
+ case TOP_PATCHLIST_24:
+ this->pfnPaFunc = PaPatchList<24>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<24>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<24>;
#endif
- break;
- case TOP_PATCHLIST_25:
- this->pfnPaFunc = PaPatchList<25>;
+ break;
+ case TOP_PATCHLIST_25:
+ this->pfnPaFunc = PaPatchList<25>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<25>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<25>;
#endif
- break;
- case TOP_PATCHLIST_26:
- this->pfnPaFunc = PaPatchList<26>;
+ break;
+ case TOP_PATCHLIST_26:
+ this->pfnPaFunc = PaPatchList<26>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<26>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<26>;
#endif
- break;
- case TOP_PATCHLIST_27:
- this->pfnPaFunc = PaPatchList<27>;
+ break;
+ case TOP_PATCHLIST_27:
+ this->pfnPaFunc = PaPatchList<27>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<27>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<27>;
#endif
- break;
- case TOP_PATCHLIST_28:
- this->pfnPaFunc = PaPatchList<28>;
+ break;
+ case TOP_PATCHLIST_28:
+ this->pfnPaFunc = PaPatchList<28>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<28>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<28>;
#endif
- break;
- case TOP_PATCHLIST_29:
- this->pfnPaFunc = PaPatchList<29>;
+ break;
+ case TOP_PATCHLIST_29:
+ this->pfnPaFunc = PaPatchList<29>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<29>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<29>;
#endif
- break;
- case TOP_PATCHLIST_30:
- this->pfnPaFunc = PaPatchList<30>;
+ break;
+ case TOP_PATCHLIST_30:
+ this->pfnPaFunc = PaPatchList<30>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<30>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<30>;
#endif
- break;
- case TOP_PATCHLIST_31:
- this->pfnPaFunc = PaPatchList<31>;
+ break;
+ case TOP_PATCHLIST_31:
+ this->pfnPaFunc = PaPatchList<31>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<31>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<31>;
#endif
- break;
- case TOP_PATCHLIST_32:
- this->pfnPaFunc = PaPatchList<32>;
+ break;
+ case TOP_PATCHLIST_32:
+ this->pfnPaFunc = PaPatchList<32>;
#if ENABLE_AVX512_SIMD16
- this->pfnPaFunc_simd16 = PaPatchList_simd16<32>;
+ this->pfnPaFunc_simd16 = PaPatchList_simd16<32>;
#endif
- break;
+ break;
- default:
- SWR_INVALID("Invalid topology: %d", this->binTopology);
- break;
+ default:
+ SWR_INVALID("Invalid topology: %d", this->binTopology);
+ break;
};
this->pfnPaFuncReset = this->pfnPaFunc;
#if USE_SIMD16_FRONTEND
simd16scalari id16 = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
- simd16scalari id82 = _simd16_set_epi32( 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0);
+ simd16scalari id82 = _simd16_set_epi32(7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0);
#else
simdscalari id8 = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
simdscalari id4 = _simd_set_epi32(3, 3, 2, 2, 1, 1, 0, 0);
#endif
- switch(this->binTopology)
+ switch (this->binTopology)
{
- case TOP_TRIANGLE_LIST:
- case TOP_TRIANGLE_STRIP:
- case TOP_TRIANGLE_FAN:
- case TOP_LINE_STRIP:
- case TOP_LINE_LIST:
- case TOP_LINE_LOOP:
+ case TOP_TRIANGLE_LIST:
+ case TOP_TRIANGLE_STRIP:
+ case TOP_TRIANGLE_FAN:
+ case TOP_LINE_STRIP:
+ case TOP_LINE_LIST:
+ case TOP_LINE_LOOP:
#if USE_SIMD16_FRONTEND
- this->primIDIncr = 16;
- this->primID = id16;
+ this->primIDIncr = 16;
+ this->primID = id16;
#else
- this->primIDIncr = 8;
- this->primID = id8;
+ this->primIDIncr = 8;
+ this->primID = id8;
#endif
- break;
- case TOP_QUAD_LIST:
- case TOP_QUAD_STRIP:
- case TOP_RECT_LIST:
+ break;
+ case TOP_QUAD_LIST:
+ case TOP_QUAD_STRIP:
+ case TOP_RECT_LIST:
#if USE_SIMD16_FRONTEND
- this->primIDIncr = 8;
- this->primID = id82;
+ this->primIDIncr = 8;
+ this->primID = id82;
#else
- this->primIDIncr = 4;
- this->primID = id4;
+ this->primIDIncr = 4;
+ this->primID = id4;
#endif
- break;
- case TOP_POINT_LIST:
+ break;
+ case TOP_POINT_LIST:
#if USE_SIMD16_FRONTEND
- this->primIDIncr = 16;
- this->primID = id16;
+ this->primIDIncr = 16;
+ this->primID = id16;
#else
- this->primIDIncr = 8;
- this->primID = id8;
-#endif
- break;
- case TOP_PATCHLIST_1:
- case TOP_PATCHLIST_2:
- case TOP_PATCHLIST_3:
- case TOP_PATCHLIST_4:
- case TOP_PATCHLIST_5:
- case TOP_PATCHLIST_6:
- case TOP_PATCHLIST_7:
- case TOP_PATCHLIST_8:
- case TOP_PATCHLIST_9:
- case TOP_PATCHLIST_10:
- case TOP_PATCHLIST_11:
- case TOP_PATCHLIST_12:
- case TOP_PATCHLIST_13:
- case TOP_PATCHLIST_14:
- case TOP_PATCHLIST_15:
- case TOP_PATCHLIST_16:
- case TOP_PATCHLIST_17:
- case TOP_PATCHLIST_18:
- case TOP_PATCHLIST_19:
- case TOP_PATCHLIST_20:
- case TOP_PATCHLIST_21:
- case TOP_PATCHLIST_22:
- case TOP_PATCHLIST_23:
- case TOP_PATCHLIST_24:
- case TOP_PATCHLIST_25:
- case TOP_PATCHLIST_26:
- case TOP_PATCHLIST_27:
- case TOP_PATCHLIST_28:
- case TOP_PATCHLIST_29:
- case TOP_PATCHLIST_30:
- case TOP_PATCHLIST_31:
- case TOP_PATCHLIST_32:
- // Always run KNOB_SIMD_WIDTH number of patches at a time.
+ this->primIDIncr = 8;
+ this->primID = id8;
+#endif
+ break;
+ case TOP_PATCHLIST_1:
+ case TOP_PATCHLIST_2:
+ case TOP_PATCHLIST_3:
+ case TOP_PATCHLIST_4:
+ case TOP_PATCHLIST_5:
+ case TOP_PATCHLIST_6:
+ case TOP_PATCHLIST_7:
+ case TOP_PATCHLIST_8:
+ case TOP_PATCHLIST_9:
+ case TOP_PATCHLIST_10:
+ case TOP_PATCHLIST_11:
+ case TOP_PATCHLIST_12:
+ case TOP_PATCHLIST_13:
+ case TOP_PATCHLIST_14:
+ case TOP_PATCHLIST_15:
+ case TOP_PATCHLIST_16:
+ case TOP_PATCHLIST_17:
+ case TOP_PATCHLIST_18:
+ case TOP_PATCHLIST_19:
+ case TOP_PATCHLIST_20:
+ case TOP_PATCHLIST_21:
+ case TOP_PATCHLIST_22:
+ case TOP_PATCHLIST_23:
+ case TOP_PATCHLIST_24:
+ case TOP_PATCHLIST_25:
+ case TOP_PATCHLIST_26:
+ case TOP_PATCHLIST_27:
+ case TOP_PATCHLIST_28:
+ case TOP_PATCHLIST_29:
+ case TOP_PATCHLIST_30:
+ case TOP_PATCHLIST_31:
+ case TOP_PATCHLIST_32:
+ // Always run KNOB_SIMD_WIDTH number of patches at a time.
#if USE_SIMD16_FRONTEND
- this->primIDIncr = 16;
- this->primID = id16;
+ this->primIDIncr = 16;
+ this->primID = id16;
#else
- this->primIDIncr = 8;
- this->primID = id8;
+ this->primIDIncr = 8;
+ this->primID = id8;
#endif
- break;
+ break;
- default:
- SWR_INVALID("Invalid topology: %d", this->binTopology);
- break;
+ default:
+ SWR_INVALID("Invalid topology: %d", this->binTopology);
+ break;
};
-
}
#endif
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rasterizer.cpp
-*
-* @brief Implementation for the rasterizer.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rasterizer.cpp
+ *
+ * @brief Implementation for the rasterizer.
+ *
+ ******************************************************************************/
#include <vector>
#include <algorithm>
#include "memory/tilingtraits.h"
#include "rasterizer_impl.h"
-PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
+PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT]
+ [STATE_VALID_TRI_EDGE_COUNT][2];
-void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
+void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
{
- const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData);
+ const TRIANGLE_WORK_DESC& workDesc = *((TRIANGLE_WORK_DESC*)pData);
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
{
// bloat line to two tris and call the triangle rasterizer twice
RDTSC_BEGIN(BERasterizeLine, pDC->drawId);
- const API_STATE &state = GetApiState(pDC);
- const SWR_RASTSTATE &rastState = state.rastState;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_RASTSTATE& rastState = state.rastState;
// macrotile dimensioning
uint32_t macroX, macroY;
MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
- int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
- int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
- int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
+ int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
+ int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
+ int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
- const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
+ const SWR_RECT& scissorInFixedPoint =
+ state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
// create a copy of the triangle buffer to write our adjusted vertices to
OSALIGNSIMD(float) newTriBuffer[4 * 4];
TRIANGLE_WORK_DESC newWorkDesc = workDesc;
- newWorkDesc.pTriBuffer = &newTriBuffer[0];
+ newWorkDesc.pTriBuffer = &newTriBuffer[0];
// create a copy of the attrib buffer to write our adjusted attribs to
OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
__m128 vX, vY, vZ, vRecipW;
- vX = _mm_load_ps(workDesc.pTriBuffer);
- vY = _mm_load_ps(workDesc.pTriBuffer + 4);
- vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
+ vX = _mm_load_ps(workDesc.pTriBuffer);
+ vY = _mm_load_ps(workDesc.pTriBuffer + 4);
+ vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
// triangle 0
// v0,v1 -> v0,v0,v1
- __m128 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0));
- __m128 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0));
- __m128 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0));
+ __m128 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0));
+ __m128 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0));
+ __m128 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0));
__m128 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 1, 0, 0));
__m128 vLineWidth = _mm_set1_ps(pDC->pState->state.rastState.lineWidth);
- __m128 vAdjust = _mm_mul_ps(vLineWidth, vBloat0);
+ __m128 vAdjust = _mm_mul_ps(vLineWidth, vBloat0);
if (workDesc.triFlags.yMajor)
{
vXa = _mm_add_ps(vAdjust, vXa);
}
// Store user clip distances for triangle 0
- float newClipBuffer[3 * 8];
+ float newClipBuffer[3 * 8];
uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
if (numClipDist)
{
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
- SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false));
+ pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
+ rastState.bIsCenterPattern,
+ false,
+ SWR_INPUT_COVERAGE_NONE,
+ EdgeValToEdgeState(ALL_EDGES_VALID),
+ (pDC->pState->state.scissorsTileAligned == false));
// make sure this macrotile intersects the triangle
__m128i vXai = fpToFixedPoint(vXa);
OSALIGNSIMD(SWR_RECT) bboxA;
calcBoundingBoxInt(vXai, vYai, bboxA);
- if (!(bboxA.xmin > macroBoxRight ||
- bboxA.xmin > scissorInFixedPoint.xmax ||
- bboxA.xmax - 1 < macroBoxLeft ||
- bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
- bboxA.ymin > macroBoxBottom ||
- bboxA.ymin > scissorInFixedPoint.ymax ||
- bboxA.ymax - 1 < macroBoxTop ||
- bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
+ if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
+ bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
+ bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
+ bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
+ {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
// triangle 1
// v0,v1 -> v1,v1,v0
- vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1));
- vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1));
- vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1));
+ vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1));
+ vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1));
+ vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1));
vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 0, 1, 1));
vAdjust = _mm_mul_ps(vLineWidth, vBloat1);
vYai = fpToFixedPoint(vYa);
calcBoundingBoxInt(vXai, vYai, bboxA);
- if (!(bboxA.xmin > macroBoxRight ||
- bboxA.xmin > scissorInFixedPoint.xmax ||
- bboxA.xmax - 1 < macroBoxLeft ||
- bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
- bboxA.ymin > macroBoxBottom ||
- bboxA.ymin > scissorInFixedPoint.ymax ||
- bboxA.ymax - 1 < macroBoxTop ||
- bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
+ if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
+ bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
+ bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
+ bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
+ {
// rasterize triangle
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
}
RDTSC_BEGIN(BERasterizeLine, 1);
}
-void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
+void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
{
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
}
#endif
- const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
- const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
+ const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
+ const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
- // map x,y relative offsets from start of raster tile to bit position in
+ // map x,y relative offsets from start of raster tile to bit position in
// coverage mask for the point
- static const uint32_t coverageMap[8][8] = {
- { 0, 1, 4, 5, 8, 9, 12, 13 },
- { 2, 3, 6, 7, 10, 11, 14, 15 },
- { 16, 17, 20, 21, 24, 25, 28, 29 },
- { 18, 19, 22, 23, 26, 27, 30, 31 },
- { 32, 33, 36, 37, 40, 41, 44, 45 },
- { 34, 35, 38, 39, 42, 43, 46, 47 },
- { 48, 49, 52, 53, 56, 57, 60, 61 },
- { 50, 51, 54, 55, 58, 59, 62, 63 }
- };
+ static const uint32_t coverageMap[8][8] = {{0, 1, 4, 5, 8, 9, 12, 13},
+ {2, 3, 6, 7, 10, 11, 14, 15},
+ {16, 17, 20, 21, 24, 25, 28, 29},
+ {18, 19, 22, 23, 26, 27, 30, 31},
+ {32, 33, 36, 37, 40, 41, 44, 45},
+ {34, 35, 38, 39, 42, 43, 46, 47},
+ {48, 49, 52, 53, 56, 57, 60, 61},
+ {50, 51, 54, 55, 58, 59, 62, 63}};
OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
// @todo use structs for readability
uint32_t tileAlignedX = *(uint32_t*)workDesc.pTriBuffer;
uint32_t tileAlignedY = *(uint32_t*)(workDesc.pTriBuffer + 1);
- float z = *(workDesc.pTriBuffer + 2);
+ float z = *(workDesc.pTriBuffer + 2);
// construct triangle descriptor for point
// no interpolation, set up i,j for constant interpolation of z and attribs
// no persp divide needed for points
triDesc.pAttribs = triDesc.pPerspAttribs = workDesc.pAttribs;
- triDesc.triFlags = workDesc.triFlags;
- triDesc.recipDet = 1.0f;
+ triDesc.triFlags = workDesc.triFlags;
+ triDesc.recipDet = 1.0f;
triDesc.OneOverW[0] = triDesc.OneOverW[1] = triDesc.OneOverW[2] = 1.0f;
triDesc.I[0] = triDesc.I[1] = triDesc.I[2] = 0.0f;
triDesc.J[0] = triDesc.J[1] = triDesc.J[2] = 0.0f;
triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
RenderOutputBuffers renderBuffers;
- GetRenderHotTiles(pDC, workerId, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
- renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
+ GetRenderHotTiles(pDC,
+ workerId,
+ macroTile,
+ tileAlignedX >> KNOB_TILE_X_DIM_SHIFT,
+ tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
+ renderBuffers,
+ triDesc.triFlags.renderTargetArrayIndex);
RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
RDTSC_END(BEPixelBackend, 0);
}
-void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
+void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
{
- const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
- const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
- const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+ const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
+ const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
+ const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
bool isPointSpriteTexCoordEnabled = backendState.pointSpriteTexCoordMask != 0;
// create a copy of the triangle buffer to write our adjusted vertices to
OSALIGNSIMD(float) newTriBuffer[4 * 4];
TRIANGLE_WORK_DESC newWorkDesc = workDesc;
- newWorkDesc.pTriBuffer = &newTriBuffer[0];
+ newWorkDesc.pTriBuffer = &newTriBuffer[0];
// create a copy of the attrib buffer to write our adjusted attribs to
OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
newWorkDesc.pAttribs = &newAttribBuffer[0];
newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
- newWorkDesc.numAttribs = workDesc.numAttribs;
- newWorkDesc.triFlags = workDesc.triFlags;
+ newWorkDesc.numAttribs = workDesc.numAttribs;
+ newWorkDesc.triFlags = workDesc.triFlags;
// construct two tris by bloating point by point size
float halfPointSize = workDesc.triFlags.pointSize * 0.5f;
- float lowerX = x - halfPointSize;
- float upperX = x + halfPointSize;
- float lowerY = y - halfPointSize;
- float upperY = y + halfPointSize;
+ float lowerX = x - halfPointSize;
+ float upperX = x + halfPointSize;
+ float lowerY = y - halfPointSize;
+ float upperY = y + halfPointSize;
// tri 0
- float *pBuf = &newTriBuffer[0];
- *pBuf++ = lowerX;
- *pBuf++ = lowerX;
- *pBuf++ = upperX;
+ float* pBuf = &newTriBuffer[0];
+ *pBuf++ = lowerX;
+ *pBuf++ = lowerX;
+ *pBuf++ = upperX;
pBuf++;
*pBuf++ = lowerY;
*pBuf++ = upperY;
// setup triangle rasterizer function
PFN_WORK_FUNC pfnTriRast;
// conservative rast not supported for points/lines
- pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
- SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false));
+ pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
+ rastState.bIsCenterPattern,
+ false,
+ SWR_INPUT_COVERAGE_NONE,
+ EdgeValToEdgeState(ALL_EDGES_VALID),
+ (pDC->pState->state.scissorsTileAligned == false));
// overwrite texcoords for point sprites
if (isPointSpriteTexCoordEnabled)
newWorkDesc.pAttribs = &newAttribBuffer[0];
// overwrite texcoord for point sprites
- uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
- DWORD texCoordAttrib = 0;
+ uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
+ DWORD texCoordAttrib = 0;
while (_BitScanForward(&texCoordAttrib, texCoordMask))
{
pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
// tri 1
- pBuf = &newTriBuffer[0];
+ pBuf = &newTriBuffer[0];
*pBuf++ = lowerX;
*pBuf++ = upperX;
*pBuf++ = upperX;
if (isPointSpriteTexCoordEnabled)
{
- uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
- DWORD texCoordAttrib = 0;
+ uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
+ DWORD texCoordAttrib = 0;
while (_BitScanForward(&texCoordAttrib, texCoordMask))
{
pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
pTexAttrib[1] = _mm_set_ps(1, 0, 1, 1);
pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
-
}
else
{
}
// Selector for correct templated RasterizeTriangle function
-PFN_WORK_FUNC GetRasterizerFunc(
- SWR_MULTISAMPLE_COUNT numSamples,
- bool IsCenter,
- bool IsConservative,
- SWR_INPUT_COVERAGE InputCoverage,
- uint32_t EdgeEnable,
- bool RasterizeScissorEdges
-)
+PFN_WORK_FUNC GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples,
+ bool IsCenter,
+ bool IsConservative,
+ SWR_INPUT_COVERAGE InputCoverage,
+ uint32_t EdgeEnable,
+ bool RasterizeScissorEdges)
{
SWR_ASSERT(numSamples >= 0 && numSamples < SWR_MULTISAMPLE_TYPE_COUNT);
SWR_ASSERT(InputCoverage >= 0 && InputCoverage < SWR_INPUT_COVERAGE_COUNT);
SWR_ASSERT(EdgeEnable < STATE_VALID_TRI_EDGE_COUNT);
- PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage][EdgeEnable][RasterizeScissorEdges];
+ PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage]
+ [EdgeEnable][RasterizeScissorEdges];
SWR_ASSERT(func);
return func;
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rasterizer.h
-*
-* @brief Definitions for the rasterizer.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rasterizer.h
+ *
+ * @brief Definitions for the rasterizer.
+ *
+ ******************************************************************************/
#pragma once
#include "context.h"
#include "conservativeRast.h"
#include "multisample.h"
-void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
+void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
+void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
+void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
void InitRasterizerFunctions();
INLINE
enum TriEdgesValues
{
- NO_VALID_EDGES = 0,
- E0_E1_VALID = 0x3,
- E0_E2_VALID = 0x5,
- E1_E2_VALID = 0x6,
+ NO_VALID_EDGES = 0,
+ E0_E1_VALID = 0x3,
+ E0_E2_VALID = 0x5,
+ E1_E2_VALID = 0x6,
ALL_EDGES_VALID = 0x7,
VALID_TRI_EDGE_COUNT,
};
// Selector for correct templated RasterizeTriangle function
-PFN_WORK_FUNC GetRasterizerFunc(
- SWR_MULTISAMPLE_COUNT numSamples,
- bool IsCenter,
- bool IsConservative,
- SWR_INPUT_COVERAGE InputCoverage,
- uint32_t EdgeEnable,
- bool RasterizeScissorEdges);
+PFN_WORK_FUNC GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples,
+ bool IsCenter,
+ bool IsConservative,
+ SWR_INPUT_COVERAGE InputCoverage,
+ uint32_t EdgeEnable,
+ bool RasterizeScissorEdges);
//////////////////////////////////////////////////////////////////////////
-/// @brief ValidTriEdges convenience typedefs used for templated function
+/// @brief ValidTriEdges convenience typedefs used for templated function
/// specialization supported Fixed Point precisions
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT;
-typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT;
-typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT;
-typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT;
-typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT;
+typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT;
+typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT;
+typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT;
+typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT;
typedef std::integral_constant<uint32_t, STATE_ALL_EDGES_VALID> StateAllEdgesValidT;
-typedef std::integral_constant<uint32_t, STATE_E0_E1_VALID> StateE0E1ValidT;
-typedef std::integral_constant<uint32_t, STATE_E0_E2_VALID> StateE0E2ValidT;
-typedef std::integral_constant<uint32_t, STATE_E1_E2_VALID> StateE1E2ValidT;
-typedef std::integral_constant<uint32_t, STATE_NO_VALID_EDGES> StateNoEdgesValidT;
+typedef std::integral_constant<uint32_t, STATE_E0_E1_VALID> StateE0E1ValidT;
+typedef std::integral_constant<uint32_t, STATE_E0_E2_VALID> StateE0E2ValidT;
+typedef std::integral_constant<uint32_t, STATE_E1_E2_VALID> StateE1E2ValidT;
+typedef std::integral_constant<uint32_t, STATE_NO_VALID_EDGES> StateNoEdgesValidT;
// some specializations to convert from edge state to edge bitmask values
template <typename EdgeMask>
struct EdgeMaskVal
{
- static_assert(EdgeMask::value > STATE_ALL_EDGES_VALID, "Primary EdgeMaskVal shouldn't be instantiated");
+ static_assert(EdgeMask::value > STATE_ALL_EDGES_VALID,
+ "Primary EdgeMaskVal shouldn't be instantiated");
};
template <>
INLINE uint32_t EdgeValToEdgeState(uint32_t val)
{
SWR_ASSERT(val < VALID_TRI_EDGE_COUNT, "Unexpected tri edge mask");
- static const uint32_t edgeValToEdgeState[VALID_TRI_EDGE_COUNT] = { 0, 0, 0, 1, 0, 2, 3, 4 };
- return edgeValToEdgeState[val];
+ static const uint32_t edgeValToEdgeState[VALID_TRI_EDGE_COUNT] = {0, 0, 0, 1, 0, 2, 3, 4};
+ return edgeValToEdgeState[val];
}
//////////////////////////////////////////////////////////////////////////
/// @struct RasterScissorEdgesT
-/// @brief Primary RasterScissorEdgesT templated struct that holds compile
-/// time information about the number of edges needed to be rasterized,
-/// If either the scissor rect or conservative rast is enabled,
+/// @brief Primary RasterScissorEdgesT templated struct that holds compile
+/// time information about the number of edges needed to be rasterized,
+/// If either the scissor rect or conservative rast is enabled,
/// the scissor test is enabled and the rasterizer will test
/// 3 triangle edges + 4 scissor edges for coverage.
/// @tparam RasterScissorEdgesT: number of multisamples
template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT>
struct RasterEdgeTraits
{
- typedef std::true_type RasterizeScissorEdgesT;
+ typedef std::true_type RasterizeScissorEdgesT;
typedef std::integral_constant<uint32_t, 7> NumEdgesT;
- //typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
+ // typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
typedef typename EdgeMaskVal<EdgeMaskT>::T ValidEdgeMaskT;
};
//////////////////////////////////////////////////////////////////////////
/// @brief specialization of RasterEdgeTraits. If neither scissor rect
-/// nor conservative rast is enabled, only test 3 triangle edges
+/// nor conservative rast is enabled, only test 3 triangle edges
/// for coverage
template <typename EdgeMaskT>
struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT>
{
- typedef std::false_type RasterizeScissorEdgesT;
+ typedef std::false_type RasterizeScissorEdgesT;
typedef std::integral_constant<uint32_t, 3> NumEdgesT;
// no need for degenerate edge masking in non-conservative case; rasterize all triangle edges
typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT;
//////////////////////////////////////////////////////////////////////////
/// @struct RasterizerTraits
-/// @brief templated struct that holds compile time information used
+/// @brief templated struct that holds compile time information used
/// during rasterization. Inherits EdgeTraits and ConservativeRastBETraits.
/// @tparam NumSamplesT: number of multisamples
/// @tparam ConservativeT: is this a conservative rasterization
/// @tparam InputCoverageT: what type of input coverage is the PS expecting?
/// (only used with conservative rasterization)
/// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor?
-template <typename NumSamplesT, typename CenterPatternT, typename ConservativeT, typename InputCoverageT, typename EdgeEnableT, typename RasterScissorEdgesT>
+template <typename NumSamplesT,
+ typename CenterPatternT,
+ typename ConservativeT,
+ typename InputCoverageT,
+ typename EdgeEnableT,
+ typename RasterScissorEdgesT>
struct _RasterizerTraits : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
- public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, EdgeEnableT>
+ public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, EdgeEnableT>
{
- typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), CenterPatternT::value> MT;
+ typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value),
+ CenterPatternT::value>
+ MT;
/// Fixed point precision the rasterizer is using
typedef FixedPointTraits<Fixed_16_8> PrecisionT;
/// Fixed point precision of the edge tests used during rasterization
typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT;
- // If conservative rast or MSAA center pattern is enabled, only need a single sample coverage test, with the result copied to all samples
- typedef std::integral_constant<int, ConservativeT::value ? 1 : MT::numCoverageSamples> NumCoverageSamplesT;
+ // If conservative rast or MSAA center pattern is enabled, only need a single sample coverage
+ // test, with the result copied to all samples
+ typedef std::integral_constant<int, ConservativeT::value ? 1 : MT::numCoverageSamples>
+ NumCoverageSamplesT;
- static_assert(EdgePrecisionT::BitsT::value >= ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value,
- "Rasterizer edge fixed point precision < required conservative rast precision");
+ static_assert(
+ EdgePrecisionT::BitsT::value >=
+ ConservativeRastBETraits<ConservativeT,
+ InputCoverageT>::ConservativePrecisionT::BitsT::value,
+ "Rasterizer edge fixed point precision < required conservative rast precision");
/// constants used to offset between different types of raster tiles
- static const int colorRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
- static const int depthRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
- static const int stencilRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
- static const int colorRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * colorRasterTileStep};
- static const int depthRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM)* depthRasterTileStep};
- static const int stencilRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * stencilRasterTileStep};
+ static const int colorRasterTileStep{
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)) *
+ MT::numSamples};
+ static const int depthRasterTileStep{
+ (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)) *
+ MT::numSamples};
+ static const int stencilRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM *
+ (FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)) *
+ MT::numSamples};
+ static const int colorRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
+ colorRasterTileStep};
+ static const int depthRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
+ depthRasterTileStep};
+ static const int stencilRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
+ stencilRasterTileStep};
};
-template <uint32_t NumSamplesT, uint32_t CenterPatternT, uint32_t ConservativeT, uint32_t InputCoverageT, uint32_t EdgeEnableT, uint32_t RasterScissorEdgesT>
-struct RasterizerTraits final : public _RasterizerTraits <
- std::integral_constant<uint32_t, NumSamplesT>,
- std::integral_constant<bool, CenterPatternT != 0>,
- std::integral_constant<bool, ConservativeT != 0>,
- std::integral_constant<uint32_t, InputCoverageT>,
- std::integral_constant<uint32_t, EdgeEnableT>,
- std::integral_constant<bool, RasterScissorEdgesT != 0> >
-{};
+template <uint32_t NumSamplesT,
+ uint32_t CenterPatternT,
+ uint32_t ConservativeT,
+ uint32_t InputCoverageT,
+ uint32_t EdgeEnableT,
+ uint32_t RasterScissorEdgesT>
+struct RasterizerTraits final
+ : public _RasterizerTraits<std::integral_constant<uint32_t, NumSamplesT>,
+ std::integral_constant<bool, CenterPatternT != 0>,
+ std::integral_constant<bool, ConservativeT != 0>,
+ std::integral_constant<uint32_t, InputCoverageT>,
+ std::integral_constant<uint32_t, EdgeEnableT>,
+ std::integral_constant<bool, RasterScissorEdgesT != 0>>
+{
+};
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rasterizer.cpp
-*
-* @brief Implementation for the rasterizer.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rasterizer.cpp
+ *
+ * @brief Implementation for the rasterizer.
+ *
+ ******************************************************************************/
#include <vector>
#include <algorithm>
#include "tilemgr.h"
#include "memory/tilingtraits.h"
-extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
+extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT]
+ [STATE_VALID_TRI_EDGE_COUNT][2];
template <uint32_t numSamples = 1>
-void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
+void GetRenderHotTiles(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroID,
+ uint32_t x,
+ uint32_t y,
+ RenderOutputBuffers& renderBuffers,
+ uint32_t renderTargetArrayIndex);
template <typename RT>
-void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers);
+void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers& buffers);
template <typename RT>
-void StepRasterTileY(uint32_t colorHotTileMask, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow);
+void StepRasterTileY(uint32_t colorHotTileMask,
+ RenderOutputBuffers& buffers,
+ RenderOutputBuffers& startBufferRow);
-#define MASKTOVEC(i3,i2,i1,i0) {-i0,-i1,-i2,-i3}
-static const __m256d gMaskToVecpd[] =
-{
+#define MASKTOVEC(i3, i2, i1, i0) \
+ { \
+ -i0, -i1, -i2, -i3 \
+ }
+static const __m256d gMaskToVecpd[] = {
MASKTOVEC(0, 0, 0, 0),
MASKTOVEC(0, 0, 0, 1),
MASKTOVEC(0, 0, 1, 0),
struct EDGE
{
- double a, b; // a, b edge coefficients in fix8
- double stepQuadX; // step to adjacent horizontal quad in fix16
- double stepQuadY; // step to adjacent vertical quad in fix16
- double stepRasterTileX; // step to adjacent horizontal raster tile in fix16
- double stepRasterTileY; // step to adjacent vertical raster tile in fix16
+ double a, b; // a, b edge coefficients in fix8
+ double stepQuadX; // step to adjacent horizontal quad in fix16
+ double stepQuadY; // step to adjacent vertical quad in fix16
+ double stepRasterTileX; // step to adjacent horizontal raster tile in fix16
+ double stepRasterTileY; // step to adjacent vertical raster tile in fix16
__m256d vQuadOffsets; // offsets for 4 samples of a quad
__m256d vRasterTileOffsets; // offsets for the 4 corners of a raster tile
//////////////////////////////////////////////////////////////////////////
/// @brief rasterize a raster tile partially covered by the triangle
-/// @param vEdge0-2 - edge equations evaluated at sample pos at each of the 4 corners of a raster tile
+/// @param vEdge0-2 - edge equations evaluated at sample pos at each of the 4 corners of a raster
+/// tile
/// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C)
/// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad.
/// Used to step between quads when sweeping over the raster tile.
-template<uint32_t NumEdges, typename EdgeMaskT>
-INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdges], EDGE *pRastEdges)
+template <uint32_t NumEdges, typename EdgeMaskT>
+INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT* pDC,
+ double startEdges[NumEdges],
+ EDGE* pRastEdges)
{
uint64_t coverageMask = 0;
// fast unrolled version for 8x8 tile
#if KNOB_TILE_X_DIM == 8 && KNOB_TILE_Y_DIM == 8
- int edgeMask[NumEdges];
+ int edgeMask[NumEdges];
uint64_t mask;
- auto eval_lambda = [&](int e){edgeMask[e] = _mm256_movemask_pd(vEdges[e]);};
- auto update_lambda = [&](int e){mask &= edgeMask[e];};
- auto incx_lambda = [&](int e){vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]);};
- auto incy_lambda = [&](int e){vEdges[e] = _mm256_add_pd(vEdges[e], vStepY[e]);};
- auto decx_lambda = [&](int e){vEdges[e] = _mm256_sub_pd(vEdges[e], vStepX[e]);};
+ auto eval_lambda = [&](int e) { edgeMask[e] = _mm256_movemask_pd(vEdges[e]); };
+ auto update_lambda = [&](int e) { mask &= edgeMask[e]; };
+ auto incx_lambda = [&](int e) { vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]); };
+ auto incy_lambda = [&](int e) { vEdges[e] = _mm256_add_pd(vEdges[e], vStepY[e]); };
+ auto decx_lambda = [&](int e) { vEdges[e] = _mm256_sub_pd(vEdges[e], vStepX[e]); };
// evaluate which pixels in the quad are covered
-#define EVAL \
- UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);
+#define EVAL UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);
// update coverage mask
// if edge 0 is degenerate and will be skipped; init the mask
-#define UPDATE_MASK(bit) \
- if(std::is_same<EdgeMaskT, E1E2ValidT>::value || std::is_same<EdgeMaskT, NoEdgesValidT>::value){\
- mask = 0xf;\
- }\
- else{\
- mask = edgeMask[0]; \
- }\
- UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
- coverageMask |= (mask << bit);
-
- // step in the +x direction to the next quad
-#define INCX \
- UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);
-
- // step in the +y direction to the next quad
-#define INCY \
- UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);
-
- // step in the -x direction to the next quad
-#define DECX \
- UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);
-
- // sweep 2x2 quad back and forth through the raster tile,
+#define UPDATE_MASK(bit) \
+ if (std::is_same<EdgeMaskT, E1E2ValidT>::value || \
+ std::is_same<EdgeMaskT, NoEdgesValidT>::value) \
+ { \
+ mask = 0xf; \
+ } \
+ else \
+ { \
+ mask = edgeMask[0]; \
+ } \
+ UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
+ coverageMask |= (mask << bit);
+
+ // step in the +x direction to the next quad
+#define INCX UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);
+
+ // step in the +y direction to the next quad
+#define INCY UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);
+
+ // step in the -x direction to the next quad
+#define DECX UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);
+
+ // sweep 2x2 quad back and forth through the raster tile,
// computing coverage masks for the entire tile
// raster tile
- // 0 1 2 3 4 5 6 7
+ // 0 1 2 3 4 5 6 7
// x x
- // x x ------------------>
+ // x x ------------------>
// x x |
// <-----------------x x V
// ..
UPDATE_MASK(12);
INCY;
- //row 1
+ // row 1
EVAL;
UPDATE_MASK(28);
DECX;
UPDATE_MASK(48);
#else
uint32_t bit = 0;
- for (uint32_t y = 0; y < KNOB_TILE_Y_DIM/2; ++y)
+ for (uint32_t y = 0; y < KNOB_TILE_Y_DIM / 2; ++y)
{
__m256d vStartOfRowEdge[NumEdges];
for (uint32_t e = 0; e < NumEdges; ++e)
vStartOfRowEdge[e] = vEdges[e];
}
- for (uint32_t x = 0; x < KNOB_TILE_X_DIM/2; ++x)
+ for (uint32_t x = 0; x < KNOB_TILE_X_DIM / 2; ++x)
{
int edgeMask[NumEdges];
for (uint32_t e = 0; e < NumEdges; ++e)
{
vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]);
}
- bit+=4;
+ bit += 4;
}
// step to the next row
}
#endif
return coverageMask;
-
}
// Top left rule:
// Top: if an edge is horizontal, and it is above other edges in tri pixel space, it is a 'top' edge
-// Left: if an edge is not horizontal, and it is on the left side of the triangle in pixel space, it is a 'left' edge
-// Top left: a sample is in if it is a top or left edge.
-// Out: !(horizontal && above) = !horizontal && below
-// Out: !horizontal && left = !(!horizontal && left) = horizontal and right
-INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256d &vEdge)
+// Left: if an edge is not horizontal, and it is on the left side of the triangle in pixel space, it
+// is a 'left' edge Top left: a sample is in if it is a top or left edge. Out: !(horizontal &&
+// above) = !horizontal && below Out: !horizontal && left = !(!horizontal && left) = horizontal and
+// right
+INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256d& vEdge)
{
// if vA < 0, vC--
// if vA == 0 && vB < 0, vC--
- __m256d vEdgeOut = vEdge;
+ __m256d vEdgeOut = vEdge;
__m256d vEdgeAdjust = _mm256_sub_pd(vEdge, _mm256_set1_pd(1.0));
// if vA < 0 (line is not horizontal and below)
// if vA == 0 && vB < 0 (line is horizontal and we're on the left edge of a tri)
__m128i vCmp = _mm_cmpeq_epi32(vA, _mm_setzero_si128());
- int msk2 = _mm_movemask_ps(_mm_castsi128_ps(vCmp));
+ int msk2 = _mm_movemask_ps(_mm_castsi128_ps(vCmp));
msk2 &= _mm_movemask_ps(_mm_castsi128_ps(vB));
// if either of these are true and we're on the line (edge == 0), bump it outside the line
//////////////////////////////////////////////////////////////////////////
/// @brief calculates difference in precision between the result of manh
/// calculation and the edge precision, based on compile time trait values
-template<typename RT>
+template <typename RT>
constexpr int64_t ManhToEdgePrecisionAdjust()
{
- static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
+ static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >=
+ RT::EdgePrecisionT::BitsT::value,
"Inadequate precision of result of manh calculation ");
- return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
+ return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) -
+ RT::EdgePrecisionT::BitsT::value);
}
//////////////////////////////////////////////////////////////////////////
/// @struct adjustEdgeConservative
-/// @brief Primary template definition used for partially specializing
+/// @brief Primary template definition used for partially specializing
/// the adjustEdgeConservative function. This struct should never
/// be instantiated.
/// @tparam RT: rasterizer traits
//////////////////////////////////////////////////////////////////////////
/// @brief Performs calculations to adjust each edge of a triangle away
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
- /// direction.
+ /// direction.
///
/// Uncertainty regions arise from fixed point rounding, which
/// can snap a vertex +/- by min fixed point value.
/// Adding 1/2 pixel in x/y bumps the edge equation tests out towards the pixel corners.
- /// This allows the rasterizer to test for coverage only at the pixel center,
+ /// This allows the rasterizer to test for coverage only at the pixel center,
/// instead of having to test individual pixel corners for conservative coverage
- INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
+ INLINE adjustEdgeConservative(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
{
- // Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away
- // from the pixel center (in the direction of the edge normal A/B)
+ // Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge
+ // away from the pixel center (in the direction of the edge normal A/B)
// edge = Ax + Bx + C - (manh/e)
// manh = manhattan distance = abs(A) + abs(B)
// e = absolute rounding error from snapping from float to fixed point precision
- // 'fixed point' multiply (in double to be avx1 friendly)
+ // 'fixed point' multiply (in double to be avx1 friendly)
// need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
- __m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
- __m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)),
- _mm256_mul_pd(vBai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)));
-
- static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
+ __m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)),
+ vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
+ __m256d manh =
+ _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)),
+ _mm256_mul_pd(vBai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)));
+
+ static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >=
+ RT::EdgePrecisionT::BitsT::value,
"Inadequate precision of result of manh calculation ");
- // rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
- // since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
+ // rasterizer incoming edge precision is x.16, so we need to get our edge offset into the
+ // same precision since we're doing fixed math in double format, multiply by multiples of
+ // 1/2 instead of a bit shift right
manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5));
- // move the edge away from the pixel center by the required conservative precision + 1/2 pixel
- // this allows the rasterizer to do a single conservative coverage test to see if the primitive
- // intersects the pixel at all
+ // move the edge away from the pixel center by the required conservative precision + 1/2
+ // pixel this allows the rasterizer to do a single conservative coverage test to see if the
+ // primitive intersects the pixel at all
vEdge = _mm256_sub_pd(vEdge, manh);
};
};
template <typename RT>
struct adjustEdgeConservative<RT, std::integral_constant<int32_t, 0>>
{
- INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge) {};
+ INLINE adjustEdgeConservative(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge){};
};
//////////////////////////////////////////////////////////////////////////
-/// @brief calculates the distance a degenerate BBox needs to be adjusted
+/// @brief calculates the distance a degenerate BBox needs to be adjusted
/// for conservative rast based on compile time trait values
-template<typename RT>
+template <typename RT>
constexpr int64_t ConservativeScissorOffset()
{
- static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0, "Rasterizer precision > conservative precision");
- // if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox when calculating scissor edges
- typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1> DegenerateEdgeOffsetT;
+ static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0,
+ "Rasterizer precision > conservative precision");
+ // if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox
+ // when calculating scissor edges
+ typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1>
+ DegenerateEdgeOffsetT;
// 1/2 pixel edge offset + conservative offset - degenerateTriangle
- return RT::ConservativeEdgeOffsetT::value - (DegenerateEdgeOffsetT::value << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
+ return RT::ConservativeEdgeOffsetT::value -
+ (DegenerateEdgeOffsetT::value
+ << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
}
//////////////////////////////////////////////////////////////////////////
/// @brief Performs calculations to adjust each a vector of evaluated edges out
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
-/// direction.
+/// direction.
template <typename RT>
-INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge)
+INLINE void adjustScissorEdge(const double a, const double b, __m256d& vEdge)
{
int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
- int64_t manh = ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >> ManhToEdgePrecisionAdjust<RT>();
+ int64_t manh =
+ ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >>
+ ManhToEdgePrecisionAdjust<RT>();
vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh));
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs calculations to adjust each a scalar evaluated edge out
/// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
-/// direction.
+/// direction.
template <typename RT, typename OffsetT>
INLINE double adjustScalarEdge(const double a, const double b, const double Edge)
{
int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
- int64_t manh = ((aabs * OffsetT::value) + (babs * OffsetT::value)) >> ManhToEdgePrecisionAdjust<RT>();
+ int64_t manh =
+ ((aabs * OffsetT::value) + (babs * OffsetT::value)) >> ManhToEdgePrecisionAdjust<RT>();
return (Edge - manh);
};
template <typename RT, typename EdgeOffsetT>
struct adjustEdgesFix16
{
- INLINE adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
+ INLINE adjustEdgesFix16(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
{
- static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
- "Edge equation expected to be in x.16 fixed point");
+ static_assert(
+ std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
+ "Edge equation expected to be in x.16 fixed point");
- static_assert(RT::IsConservativeT::value, "Edge offset assumes conservative rasterization is enabled");
+ static_assert(RT::IsConservativeT::value,
+ "Edge offset assumes conservative rasterization is enabled");
// need to apply any edge offsets before applying the top-left rule
adjustEdgeConservative<RT, EdgeOffsetT>(vAi, vBi, vEdge);
template <typename RT>
struct adjustEdgesFix16<RT, std::integral_constant<int32_t, 0>>
{
- INLINE adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
+ INLINE adjustEdgesFix16(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
{
adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
}
return std::max(dzdx, dzdy);
}
-INLINE float ComputeBiasFactor(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pDesc, const float* z)
+INLINE float
+ComputeBiasFactor(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pDesc, const float* z)
{
if (pState->depthFormat == R24_UNORM_X8_TYPELESS)
{
SWR_ASSERT(pState->depthFormat == R32_FLOAT);
// for f32 depth, factor = 2^(exponent(max(abs(z) - 23)
- float zMax = std::max(fabsf(z[0]), std::max(fabsf(z[1]), fabsf(z[2])));
+ float zMax = std::max(fabsf(z[0]), std::max(fabsf(z[1]), fabsf(z[2])));
uint32_t zMaxInt = *(uint32_t*)&zMax;
zMaxInt &= 0x7f800000;
zMax = *(float*)&zMaxInt;
}
}
-INLINE float ComputeDepthBias(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pTri, const float* z)
+INLINE float
+ComputeDepthBias(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pTri, const float* z)
{
if (pState->depthBias == 0 && pState->slopeScaledDepthBias == 0)
{
static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4;
// try to avoid _chkstk insertions; make this thread local
-static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib];
+static THREAD
+OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib];
INLINE
void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
__m256d vQuadStepXFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.a), vQuadOffsetsXIntFix8);
__m256d vQuadStepYFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.b), vQuadOffsetsYIntFix8);
- edge.vQuadOffsets = _mm256_add_pd(vQuadStepXFix16, vQuadStepYFix16);
+ edge.vQuadOffsets = _mm256_add_pd(vQuadStepXFix16, vQuadStepYFix16);
// compute raster tile offsets
- const __m256d vTileOffsetsXIntFix8 = _mm256_set_pd((KNOB_TILE_X_DIM - 1)*FIXED_POINT_SCALE, 0, (KNOB_TILE_X_DIM - 1)*FIXED_POINT_SCALE, 0);
- const __m256d vTileOffsetsYIntFix8 = _mm256_set_pd((KNOB_TILE_Y_DIM - 1)*FIXED_POINT_SCALE, (KNOB_TILE_Y_DIM - 1)*FIXED_POINT_SCALE, 0, 0);
+ const __m256d vTileOffsetsXIntFix8 = _mm256_set_pd(
+ (KNOB_TILE_X_DIM - 1) * FIXED_POINT_SCALE, 0, (KNOB_TILE_X_DIM - 1) * FIXED_POINT_SCALE, 0);
+ const __m256d vTileOffsetsYIntFix8 = _mm256_set_pd(
+ (KNOB_TILE_Y_DIM - 1) * FIXED_POINT_SCALE, (KNOB_TILE_Y_DIM - 1) * FIXED_POINT_SCALE, 0, 0);
__m256d vTileStepXFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.a), vTileOffsetsXIntFix8);
__m256d vTileStepYFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.b), vTileOffsetsYIntFix8);
}
//////////////////////////////////////////////////////////////////////////
-/// @brief Primary template definition used for partially specializing
-/// the UpdateEdgeMasks function. Offset evaluated edges from UL pixel
+/// @brief Primary template definition used for partially specializing
+/// the UpdateEdgeMasks function. Offset evaluated edges from UL pixel
/// corner to sample position, and test for coverage
/// @tparam sampleCount: multisample count
template <typename NumSamplesT>
-INLINE void UpdateEdgeMasks(const __m256d (&vEdgeTileBbox)[3], const __m256d* vEdgeFix16,
- int32_t &mask0, int32_t &mask1, int32_t &mask2)
+INLINE void UpdateEdgeMasks(const __m256d (&vEdgeTileBbox)[3],
+ const __m256d* vEdgeFix16,
+ int32_t& mask0,
+ int32_t& mask1,
+ int32_t& mask2)
{
__m256d vSampleBboxTest0, vSampleBboxTest1, vSampleBboxTest2;
// evaluate edge equations at the tile multisample bounding box
vSampleBboxTest0 = _mm256_add_pd(vEdgeTileBbox[0], vEdgeFix16[0]);
vSampleBboxTest1 = _mm256_add_pd(vEdgeTileBbox[1], vEdgeFix16[1]);
vSampleBboxTest2 = _mm256_add_pd(vEdgeTileBbox[2], vEdgeFix16[2]);
- mask0 = _mm256_movemask_pd(vSampleBboxTest0);
- mask1 = _mm256_movemask_pd(vSampleBboxTest1);
- mask2 = _mm256_movemask_pd(vSampleBboxTest2);
+ mask0 = _mm256_movemask_pd(vSampleBboxTest0);
+ mask1 = _mm256_movemask_pd(vSampleBboxTest1);
+ mask2 = _mm256_movemask_pd(vSampleBboxTest2);
}
//////////////////////////////////////////////////////////////////////////
/// @brief UpdateEdgeMasks<SingleSampleT> specialization, instantiated
/// when only rasterizing a single coverage test point
template <>
-INLINE void UpdateEdgeMasks<SingleSampleT>(const __m256d(&)[3], const __m256d* vEdgeFix16,
- int32_t &mask0, int32_t &mask1, int32_t &mask2)
+INLINE void UpdateEdgeMasks<SingleSampleT>(
+ const __m256d (&)[3], const __m256d* vEdgeFix16, int32_t& mask0, int32_t& mask1, int32_t& mask2)
{
mask0 = _mm256_movemask_pd(vEdgeFix16[0]);
mask1 = _mm256_movemask_pd(vEdgeFix16[1]);
//////////////////////////////////////////////////////////////////////////
/// @struct ComputeScissorEdges
/// @brief Primary template definition. Allows the function to be generically
-/// called. When paired with below specializations, will result in an empty
+/// called. When paired with below specializations, will result in an empty
/// inlined function if scissor is not enabled
/// @tparam RasterScissorEdgesT: is scissor enabled?
/// @tparam IsConservativeT: is conservative rast enabled?
template <typename RasterScissorEdgesT, typename IsConservativeT, typename RT>
struct ComputeScissorEdges
{
- INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
- EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]){};
+ INLINE ComputeScissorEdges(const SWR_RECT& triBBox,
+ const SWR_RECT& scissorBBox,
+ const int32_t x,
+ const int32_t y,
+ EDGE (&rastEdges)[RT::NumEdgesT::value],
+ __m256d (&vEdgeFix16)[7]){};
};
//////////////////////////////////////////////////////////////////////////
-/// @brief ComputeScissorEdges<std::true_type, std::true_type, RT> partial
+/// @brief ComputeScissorEdges<std::true_type, std::true_type, RT> partial
/// specialization. Instantiated when conservative rast and scissor are enabled
template <typename RT>
struct ComputeScissorEdges<std::true_type, std::true_type, RT>
{
//////////////////////////////////////////////////////////////////////////
- /// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
+ /// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
/// evaluate edge equations and offset them away from pixel center.
- INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
- EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
+ INLINE ComputeScissorEdges(const SWR_RECT& triBBox,
+ const SWR_RECT& scissorBBox,
+ const int32_t x,
+ const int32_t y,
+ EDGE (&rastEdges)[RT::NumEdgesT::value],
+ __m256d (&vEdgeFix16)[7])
{
// if conservative rasterizing, triangle bbox intersected with scissor bbox is used
SWR_RECT scissor;
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
- vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
- vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
- vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
- vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
-
- // if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
+ vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) +
+ (rastEdges[3].b * (y - scissor.ymin)));
+ vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) +
+ (rastEdges[4].b * (y - scissor.ymax)));
+ vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) +
+ (rastEdges[5].b * (y - scissor.ymax)));
+ vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) +
+ (rastEdges[6].b * (y - scissor.ymin)));
+
+ // if conservative rasterizing, need to bump the scissor edges out by the conservative
+ // uncertainty distance, else do nothing
adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]);
adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]);
};
//////////////////////////////////////////////////////////////////////////
-/// @brief ComputeScissorEdges<std::true_type, std::false_type, RT> partial
+/// @brief ComputeScissorEdges<std::true_type, std::false_type, RT> partial
/// specialization. Instantiated when scissor is enabled and conservative rast
/// is disabled.
template <typename RT>
{
//////////////////////////////////////////////////////////////////////////
/// @brief Compute scissor edge vectors and evaluate edge equations
- INLINE ComputeScissorEdges(const SWR_RECT &, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
- EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
+ INLINE ComputeScissorEdges(const SWR_RECT&,
+ const SWR_RECT& scissorBBox,
+ const int32_t x,
+ const int32_t y,
+ EDGE (&rastEdges)[RT::NumEdgesT::value],
+ __m256d (&vEdgeFix16)[7])
{
- const SWR_RECT &scissor = scissorBBox;
- POS topLeft{scissor.xmin, scissor.ymin};
- POS bottomLeft{scissor.xmin, scissor.ymax};
- POS topRight{scissor.xmax, scissor.ymin};
- POS bottomRight{scissor.xmax, scissor.ymax};
+ const SWR_RECT& scissor = scissorBBox;
+ POS topLeft{scissor.xmin, scissor.ymin};
+ POS bottomLeft{scissor.xmin, scissor.ymax};
+ POS topRight{scissor.xmax, scissor.ymin};
+ POS bottomRight{scissor.xmax, scissor.ymax};
// construct 4 scissor edges in ccw direction
ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
ComputeEdgeData(topRight, topLeft, rastEdges[6]);
- vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
- vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
- vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
- vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
+ vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) +
+ (rastEdges[3].b * (y - scissor.ymin)));
+ vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) +
+ (rastEdges[4].b * (y - scissor.ymax)));
+ vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) +
+ (rastEdges[5].b * (y - scissor.ymax)));
+ vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) +
+ (rastEdges[6].b * (y - scissor.ymin)));
// Upper left rule for scissor
vEdgeFix16[3] = _mm256_sub_pd(vEdgeFix16[3], _mm256_set1_pd(1.0));
template <>
INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
{
- return (!(mask0 && mask1 && mask2)) ? true : false;;
+ return (!(mask0 && mask1 && mask2)) ? true : false;
+ ;
};
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/// @brief Primary function template for TrivialAcceptTest. Always returns
-/// false, since it will only be called for degenerate tris, and as such
+/// false, since it will only be called for degenerate tris, and as such
/// will never cover the entire raster tile
template <typename ScissorEnableT>
INLINE bool TrivialAcceptTest(const int, const int, const int)
template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
struct GenerateSVInnerCoverage
{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t &){};
+ INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t&){};
};
//////////////////////////////////////////////////////////////////////////
/// @brief Specialization of GenerateSVInnerCoverage where all edges
-/// are non-degenerate and SVInnerCoverage is requested. Offsets the evaluated
+/// are non-degenerate and SVInnerCoverage is requested. Offsets the evaluated
/// edge values from OuterConservative to InnerConservative and rasterizes.
template <typename RT>
struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
{
- INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, uint32_t workerId, EDGE* pRastEdges, double* pStartQuadEdges, uint64_t &innerCoverageMask)
+ INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ EDGE* pRastEdges,
+ double* pStartQuadEdges,
+ uint64_t& innerCoverageMask)
{
double startQuadEdgesAdj[RT::NumEdgesT::value];
- for(uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
+ for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
- startQuadEdgesAdj[e] = adjustScalarEdge<RT, typename RT::InnerConservativeEdgeOffsetT>(pRastEdges[e].a, pRastEdges[e].b, pStartQuadEdges[e]);
+ startQuadEdgesAdj[e] = adjustScalarEdge<RT, typename RT::InnerConservativeEdgeOffsetT>(
+ pRastEdges[e].a, pRastEdges[e].b, pStartQuadEdges[e]);
}
// not trivial accept or reject, must rasterize full tile
RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
- innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges);
+ innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
+ pDC, startQuadEdgesAdj, pRastEdges);
RDTSC_END(BERasterizePartial, 0);
}
};
template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
struct UpdateEdgeMasksInnerConservative
{
- INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3], const __m256d*,
- const __m128i, const __m128i, int32_t &, int32_t &, int32_t &){};
+ INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3],
+ const __m256d*,
+ const __m128i,
+ const __m128i,
+ int32_t&,
+ int32_t&,
+ int32_t&){};
};
//////////////////////////////////////////////////////////////////////////
/// @brief Specialization of UpdateEdgeMasksInnerConservative where all edges
-/// are non-degenerate and SVInnerCoverage is requested. Offsets the edges
-/// evaluated at raster tile corners to inner conservative position and
+/// are non-degenerate and SVInnerCoverage is requested. Offsets the edges
+/// evaluated at raster tile corners to inner conservative position and
/// updates edge masks
template <typename RT>
struct UpdateEdgeMasksInnerConservative<RT, AllEdgesValidT, InnerConservativeCoverageT>
{
- INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3], const __m256d* vEdgeFix16,
- const __m128i vAi, const __m128i vBi, int32_t &mask0, int32_t &mask1, int32_t &mask2)
+ INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3],
+ const __m256d* vEdgeFix16,
+ const __m128i vAi,
+ const __m128i vBi,
+ int32_t& mask0,
+ int32_t& mask1,
+ int32_t& mask2)
{
__m256d vTempEdge[3]{vEdgeFix16[0], vEdgeFix16[1], vEdgeFix16[2]};
- // instead of keeping 2 copies of evaluated edges around, just compensate for the outer
+ // instead of keeping 2 copies of evaluated edges around, just compensate for the outer
// conservative evaluated edge when adjusting the edge in for inner conservative tests
- adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(vAi, vBi, vTempEdge[0]);
- adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(vAi, vBi, vTempEdge[1]);
- adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(vAi, vBi, vTempEdge[2]);
-
- UpdateEdgeMasks<typename RT::NumCoverageSamplesT>(vEdgeTileBbox, vTempEdge, mask0, mask1, mask2);
+ adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
+ vAi, vBi, vTempEdge[0]);
+ adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
+ vAi, vBi, vTempEdge[1]);
+ adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
+ vAi, vBi, vTempEdge[2]);
+
+ UpdateEdgeMasks<typename RT::NumCoverageSamplesT>(
+ vEdgeTileBbox, vTempEdge, mask0, mask1, mask2);
}
};
//////////////////////////////////////////////////////////////////////////
-/// @brief Specialization of UpdateEdgeMasksInnerConservative where SVInnerCoverage
-/// is requested but at least one edge is degenerate. Since a degenerate triangle cannot
+/// @brief Specialization of UpdateEdgeMasksInnerConservative where SVInnerCoverage
+/// is requested but at least one edge is degenerate. Since a degenerate triangle cannot
/// cover an entire raster tile, set mask0 to 0 to force it down the
/// rastierizePartialTile path
template <typename RT, typename ValidEdgeMaskT>
struct UpdateEdgeMasksInnerConservative<RT, ValidEdgeMaskT, InnerConservativeCoverageT>
{
- INLINE UpdateEdgeMasksInnerConservative(const __m256d (&)[3], const __m256d*,
- const __m128i, const __m128i, int32_t &mask0, int32_t &, int32_t &)
+ INLINE UpdateEdgeMasksInnerConservative(const __m256d (&)[3],
+ const __m256d*,
+ const __m128i,
+ const __m128i,
+ int32_t& mask0,
+ int32_t&,
+ int32_t&)
{
// set one mask to zero to force the triangle down the rastierizePartialTile path
mask0 = 0;
template <typename RT>
void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
{
- const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
+ const TRIANGLE_WORK_DESC& workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
#if KNOB_ENABLE_TOSS_POINTS
if (KNOB_TOSS_BIN_TRIS)
{
RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId);
RDTSC_BEGIN(BETriangleSetup, pDC->drawId);
- const API_STATE &state = GetApiState(pDC);
- const SWR_RASTSTATE &rastState = state.rastState;
+ const API_STATE& state = GetApiState(pDC);
+ const SWR_RASTSTATE& rastState = state.rastState;
const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
triDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
__m128 vX, vY, vZ, vRecipW;
-
+
// pTriBuffer data layout: grouped components of the 3 triangle points and 1 don't care
// eg: vX = [x0 x1 x2 dc]
- vX = _mm_load_ps(workDesc.pTriBuffer);
- vY = _mm_load_ps(workDesc.pTriBuffer + 4);
- vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
+ vX = _mm_load_ps(workDesc.pTriBuffer);
+ vY = _mm_load_ps(workDesc.pTriBuffer + 4);
+ vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
// convert to fixed point
- static_assert(std::is_same<typename RT::PrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Rasterizer expects 16.8 fixed point precision");
+ static_assert(std::is_same<typename RT::PrecisionT, FixedPointTraits<Fixed_16_8>>::value,
+ "Rasterizer expects 16.8 fixed point precision");
__m128i vXi = fpToFixedPoint(vX);
__m128i vYi = fpToFixedPoint(vY);
__m128i vAi, vBi;
triangleSetupABInt(vXi, vYi, vAi, vBi);
-
+
// determinant
float det = calcDeterminantInt(vAi, vBi);
// Verts in Pixel Coordinate Space at this point
- // Det > 0 = CW winding order
+ // Det > 0 = CW winding order
// Convert CW triangles to CCW
if (det > 0.0)
{
// Finish triangle setup - C edge coef
triangleSetupC(vX, vY, vA, vB, vC);
- if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
+ if (RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
{
- // If we have degenerate edge(s) to rasterize, set I and J coefs
+ // If we have degenerate edge(s) to rasterize, set I and J coefs
// to 0 for constant interpolation of attributes
triDesc.I[0] = 0.0f;
triDesc.I[1] = 0.0f;
}
else
{
- // only extract coefs for 2 of the barycentrics; the 3rd can be
+ // only extract coefs for 2 of the barycentrics; the 3rd can be
// determined from the barycentric equation:
// i + j + k = 1 <=> k = 1 - j - i
_MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
_MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
// compute recipDet, used to calculate barycentric i and j in the backend
- triDesc.recipDet = 1.0f/det;
+ triDesc.recipDet = 1.0f / det;
}
OSALIGNSIMD(float) oneOverW[4];
triDesc.OneOverW[1] = oneOverW[1] - oneOverW[2];
triDesc.OneOverW[2] = oneOverW[2];
- // calculate perspective correct coefs per vertex attrib
- float* pPerspAttribs = perspAttribsTLS;
- float* pAttribs = workDesc.pAttribs;
+ // calculate perspective correct coefs per vertex attrib
+ float* pPerspAttribs = perspAttribsTLS;
+ float* pAttribs = workDesc.pAttribs;
triDesc.pPerspAttribs = pPerspAttribs;
- triDesc.pAttribs = pAttribs;
- float *pRecipW = workDesc.pTriBuffer + 12;
- triDesc.pRecipW = pRecipW;
- __m128 vOneOverWV0 = _mm_broadcast_ss(pRecipW);
- __m128 vOneOverWV1 = _mm_broadcast_ss(pRecipW+=1);
- __m128 vOneOverWV2 = _mm_broadcast_ss(pRecipW+=1);
- for(uint32_t i = 0; i < workDesc.numAttribs; i++)
+ triDesc.pAttribs = pAttribs;
+ float* pRecipW = workDesc.pTriBuffer + 12;
+ triDesc.pRecipW = pRecipW;
+ __m128 vOneOverWV0 = _mm_broadcast_ss(pRecipW);
+ __m128 vOneOverWV1 = _mm_broadcast_ss(pRecipW += 1);
+ __m128 vOneOverWV2 = _mm_broadcast_ss(pRecipW += 1);
+ for (uint32_t i = 0; i < workDesc.numAttribs; i++)
{
__m128 attribA = _mm_load_ps(pAttribs);
- __m128 attribB = _mm_load_ps(pAttribs+=4);
- __m128 attribC = _mm_load_ps(pAttribs+=4);
- pAttribs+=4;
+ __m128 attribB = _mm_load_ps(pAttribs += 4);
+ __m128 attribC = _mm_load_ps(pAttribs += 4);
+ pAttribs += 4;
attribA = _mm_mul_ps(attribA, vOneOverWV0);
attribB = _mm_mul_ps(attribB, vOneOverWV1);
attribC = _mm_mul_ps(attribC, vOneOverWV2);
_mm_store_ps(pPerspAttribs, attribA);
- _mm_store_ps(pPerspAttribs+=4, attribB);
- _mm_store_ps(pPerspAttribs+=4, attribC);
- pPerspAttribs+=4;
+ _mm_store_ps(pPerspAttribs += 4, attribB);
+ _mm_store_ps(pPerspAttribs += 4, attribC);
+ pPerspAttribs += 4;
}
// compute bary Z
triDesc.Z[0] = a[0] - a[2];
triDesc.Z[1] = a[1] - a[2];
triDesc.Z[2] = a[2];
-
+
// add depth bias
triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8);
OSALIGNSIMD(SWR_RECT) bbox;
calcBoundingBoxInt(vXi, vYi, bbox);
- const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
+ const SWR_RECT& scissorInFixedPoint =
+ state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
- if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
+ if (RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
{
- // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
- bbox.xmin--; bbox.xmax++; bbox.ymin--; bbox.ymax++;
+ // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is
+ // valid
+ bbox.xmin--;
+ bbox.xmax++;
+ bbox.ymin--;
+ bbox.ymax++;
SWR_ASSERT(scissorInFixedPoint.xmin >= 0 && scissorInFixedPoint.ymin >= 0,
"Conservative rast degenerate handling requires a valid scissor rect");
}
triDesc.triFlags = workDesc.triFlags;
- // further constrain backend to intersecting bounding box of macro tile and scissored triangle bbox
+ // further constrain backend to intersecting bounding box of macro tile and scissored triangle
+ // bbox
uint32_t macroX, macroY;
MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
- int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
- int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
- int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
+ int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
+ int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
+ int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
intersect.xmin = std::max(intersect.xmin, macroBoxLeft);
intersect.xmax = std::min(intersect.xmax, macroBoxRight);
intersect.ymax = std::min(intersect.ymax, macroBoxBottom);
- SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
+ SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax &&
+ intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 &&
+ intersect.ymax >= 0);
RDTSC_END(BETriangleSetup, 0);
// update triangle desc
- uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
- uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+ uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
uint32_t numTilesX = maxTileX - minTileX + 1;
uint32_t numTilesY = maxTileY - minTileY + 1;
- if (numTilesX == 0 || numTilesY == 0)
+ if (numTilesX == 0 || numTilesY == 0)
{
RDTSC_EVENT(BEEmptyTriangle, 1, 0);
RDTSC_END(BERasterizeTriangle, 1);
// single sample rasterization evaluates edges at pixel center,
// multisample evaluates edges UL pixel corner and steps to each sample position
- if(std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
+ if (std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
{
// Add 0.5, in fixed point, to offset to pixel center
x += (FIXED_POINT_SCALE / 2);
__m128i vTopLeftY = _mm_set1_epi32(y);
// evaluate edge equations at top-left pixel using 64bit math
- //
+ //
// line = Ax + By + C
// solving for C:
// C = -Ax - By
// line = Ax - By - Ax0 - By0
// line = A(x - x0) + B(y - y0)
// dX = (x-x0), dY = (y-y0)
- // so all this simplifies to
+ // so all this simplifies to
// edge = A(dX) + B(dY), our first test at the top left of the bbox we're rasterizing within
__m128i vDeltaX = _mm_sub_epi32(vTopLeftX, vXi);
__m128i vDeltaY = _mm_sub_epi32(vTopLeftY, vYi);
// evaluate A(dx) and B(dY) for all points
- __m256d vAipd = _mm256_cvtepi32_pd(vAi);
- __m256d vBipd = _mm256_cvtepi32_pd(vBi);
+ __m256d vAipd = _mm256_cvtepi32_pd(vAi);
+ __m256d vBipd = _mm256_cvtepi32_pd(vBi);
__m256d vDeltaXpd = _mm256_cvtepi32_pd(vDeltaX);
__m256d vDeltaYpd = _mm256_cvtepi32_pd(vDeltaY);
__m256d vAiDeltaXFix16 = _mm256_mul_pd(vAipd, vDeltaXpd);
__m256d vBiDeltaYFix16 = _mm256_mul_pd(vBipd, vDeltaYpd);
- __m256d vEdge = _mm256_add_pd(vAiDeltaXFix16, vBiDeltaYFix16);
+ __m256d vEdge = _mm256_add_pd(vAiDeltaXFix16, vBiDeltaYFix16);
// apply any edge adjustments(top-left, crast, etc)
adjustEdgesFix16<RT, typename RT::ConservativeEdgeOffsetT>(vAi, vBi, vEdge);
ComputeEdgeData(aAi[2], aBi[2], rastEdges[2]);
// Compute and store triangle edge data if scissor needs to rasterized
- ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>
- (bbox, scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
+ ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>(
+ bbox, scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
// Evaluate edge equations at sample positions of each of the 4 corners of a raster tile
// used to for testing if entire raster tile is inside a triangle
__m256d vEdgeTileBbox[3];
if (NumCoverageSamplesT::value > 1)
{
- const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
- const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
- const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
+ const SWR_MULTISAMPLE_POS& samplePos = rastState.samplePositions;
+ const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
+ const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
__m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
__m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
// used to for testing if entire raster tile is inside a triangle
for (uint32_t e = 0; e < 3; ++e)
{
- __m256d vResultAxFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vTileSampleBBoxXFix8);
- __m256d vResultByFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vTileSampleBBoxYFix8);
+ __m256d vResultAxFix16 =
+ _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vTileSampleBBoxXFix8);
+ __m256d vResultByFix16 =
+ _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vTileSampleBBoxYFix8);
vEdgeTileBbox[e] = _mm256_add_pd(vResultAxFix16, vResultByFix16);
// adjust for msaa tile bbox edges outward for conservative rast, if enabled
- adjustEdgeConservative<RT, typename RT::ConservativeEdgeOffsetT>(vAi, vBi, vEdgeTileBbox[e]);
+ adjustEdgeConservative<RT, typename RT::ConservativeEdgeOffsetT>(
+ vAi, vBi, vEdgeTileBbox[e]);
}
}
RDTSC_END(BEStepSetup, 0);
- uint32_t tY = minTileY;
- uint32_t tX = minTileX;
+ uint32_t tY = minTileY;
+ uint32_t tX = minTileX;
uint32_t maxY = maxTileY;
uint32_t maxX = maxTileX;
RenderOutputBuffers renderBuffers, currentRenderBufferRow;
- GetRenderHotTiles<RT::MT::numSamples>(pDC, workerId, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
+ GetRenderHotTiles<RT::MT::numSamples>(pDC,
+ workerId,
+ macroTile,
+ minTileX,
+ minTileY,
+ renderBuffers,
+ triDesc.triFlags.renderTargetArrayIndex);
currentRenderBufferRow = renderBuffers;
// rasterize and generate coverage masks per sample
for (uint32_t sampleNum = 0; sampleNum < NumCoverageSamplesT::value; sampleNum++)
{
// trivial reject, at least one edge has all 4 corners of raster tile outside
- bool trivialReject = TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);
+ bool trivialReject =
+ TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);
if (!trivialReject)
{
// trivial accept mask
triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL;
- // Update the raster tile edge masks based on inner conservative edge offsets, if enabled
- UpdateEdgeMasksInnerConservative<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>
- (vEdgeTileBbox, vEdgeFix16, vAi, vBi, mask0, mask1, mask2);
+ // Update the raster tile edge masks based on inner conservative edge offsets,
+ // if enabled
+ UpdateEdgeMasksInnerConservative<RT,
+ typename RT::ValidEdgeMaskT,
+ typename RT::InputCoverageT>(
+ vEdgeTileBbox, vEdgeFix16, vAi, vBi, mask0, mask1, mask2);
// @todo Make this a bit smarter to allow use of trivial accept when:
// 1) scissor/vp intersection rect is raster tile aligned
// 2) raster tile is entirely within scissor/vp intersection rect
if (TrivialAcceptTest<typename RT::RasterizeScissorEdgesT>(mask0, mask1, mask2))
{
- // trivial accept, all 4 corners of all 3 edges are negative
+ // trivial accept, all 4 corners of all 3 edges are negative
// i.e. raster tile completely inside triangle
triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum];
- if(std::is_same<typename RT::InputCoverageT, InnerConservativeCoverageT>::value)
+ if (std::is_same<typename RT::InputCoverageT,
+ InnerConservativeCoverageT>::value)
{
triDesc.innerCoverageMask = 0xffffffffffffffffULL;
}
else
{
__m256d vEdgeAtSample[RT::NumEdgesT::value];
- if(std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
+ if (std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
{
- // should get optimized out for single sample case (global value numbering or copy propagation)
+ // should get optimized out for single sample case (global value
+ // numbering or copy propagation)
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
vEdgeAtSample[e] = vEdgeFix16[e];
}
else
{
- const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
- __m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
- __m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
+ const SWR_MULTISAMPLE_POS& samplePos = rastState.samplePositions;
+ __m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
+ __m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
__m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
__m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
// step edge equation tests from UL tile corner to pixel sample position
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
- __m256d vResultAxFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vSampleOffsetX);
- __m256d vResultByFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vSampleOffsetY);
+ __m256d vResultAxFix16 =
+ _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vSampleOffsetX);
+ __m256d vResultByFix16 =
+ _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vSampleOffsetY);
vEdgeAtSample[e] = _mm256_add_pd(vResultAxFix16, vResultByFix16);
vEdgeAtSample[e] = _mm256_add_pd(vEdgeFix16[e], vEdgeAtSample[e]);
}
}
- double startQuadEdges[RT::NumEdgesT::value];
+ double startQuadEdges[RT::NumEdgesT::value];
const __m256i vLane0Mask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
// not trivial accept or reject, must rasterize full tile
RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
- triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
+ triDesc.coverageMask[sampleNum] =
+ rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
+ pDC, startQuadEdges, rastEdges);
RDTSC_END(BERasterizePartial, 0);
- triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
-
+ triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
+
// Output SV InnerCoverage, if needed
- GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
+ GenerateSVInnerCoverage<RT,
+ typename RT::ValidEdgeMaskT,
+ typename RT::InputCoverageT>(
+ pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
}
}
else
{
- // if we're calculating coverage per sample, need to store it off. otherwise no covered samples, don't need to do anything
- if(NumCoverageSamplesT::value > 1)
+ // if we're calculating coverage per sample, need to store it off. otherwise no
+ // covered samples, don't need to do anything
+ if (NumCoverageSamplesT::value > 1)
{
triDesc.coverageMask[sampleNum] = 0;
}
}
#if KNOB_ENABLE_TOSS_POINTS
- if(KNOB_TOSS_RS)
+ if (KNOB_TOSS_RS)
{
gToss = triDesc.coverageMask[0];
}
else
#endif
- if(triDesc.anyCoveredSamples)
+ if (triDesc.anyCoveredSamples)
{
- // if conservative rast and MSAA are enabled, conservative coverage for a pixel means all samples in that pixel are covered
- // copy conservative coverage result to all samples
- if(RT::IsConservativeT::value)
+ // if conservative rast and MSAA are enabled, conservative coverage for a pixel
+ // means all samples in that pixel are covered copy conservative coverage result to
+ // all samples
+ if (RT::IsConservativeT::value)
{
- auto copyCoverage = [&](int sample){triDesc.coverageMask[sample] = triDesc.coverageMask[0]; };
+ auto copyCoverage = [&](int sample) {
+ triDesc.coverageMask[sample] = triDesc.coverageMask[0];
+ };
UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage);
}
AR_EVENT(RasterTileCount(pDC->drawId, 1));
RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
- backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers);
+ backendFuncs.pfnBackend(pDC,
+ workerId,
+ tileX << KNOB_TILE_X_DIM_SHIFT,
+ tileY << KNOB_TILE_Y_DIM_SHIFT,
+ triDesc,
+ renderBuffers);
RDTSC_END(BEPixelBackend, 0);
}
// step to the next tile in X
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
- vEdgeFix16[e] = _mm256_add_pd(vEdgeFix16[e], _mm256_set1_pd(rastEdges[e].stepRasterTileX));
+ vEdgeFix16[e] =
+ _mm256_add_pd(vEdgeFix16[e], _mm256_set1_pd(rastEdges[e].stepRasterTileX));
}
StepRasterTileX<RT>(state.colorHottileEnable, renderBuffers);
}
// step to the next tile in Y
for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
{
- vEdgeFix16[e] = _mm256_add_pd(vStartOfRowEdge[e], _mm256_set1_pd(rastEdges[e].stepRasterTileY));
+ vEdgeFix16[e] =
+ _mm256_add_pd(vStartOfRowEdge[e], _mm256_set1_pd(rastEdges[e].stepRasterTileY));
}
StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow);
}
// Get pointers to hot tile memory for color RT, depth, stencil
template <uint32_t numSamples>
-void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
+void GetRenderHotTiles(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroID,
+ uint32_t tileX,
+ uint32_t tileY,
+ RenderOutputBuffers& renderBuffers,
+ uint32_t renderTargetArrayIndex)
{
- const API_STATE& state = GetApiState(pDC);
- SWR_CONTEXT *pContext = pDC->pContext;
+ const API_STATE& state = GetApiState(pDC);
+ SWR_CONTEXT* pContext = pDC->pContext;
HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
uint32_t mx, my;
// compute tile offset for active hottile buffers
const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
- uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
- offset*=numSamples;
-
- unsigned long rtSlot = 0;
- uint32_t colorHottileEnableMask = state.colorHottileEnable;
- while(_BitScanForward(&rtSlot, colorHottileEnableMask))
+ uint32_t offset = ComputeTileOffset2D<
+ TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp>>(
+ pitch, tileX, tileY);
+ offset *= numSamples;
+
+ unsigned long rtSlot = 0;
+ uint32_t colorHottileEnableMask = state.colorHottileEnable;
+ while (_BitScanForward(&rtSlot, colorHottileEnableMask))
{
- HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true,
- numSamples, renderTargetArrayIndex);
- pColor->state = HOTTILE_DIRTY;
+ HOTTILE* pColor = pContext->pHotTileMgr->GetHotTile(
+ pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroID,
+ (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
+ true,
+ numSamples,
+ renderTargetArrayIndex);
+ pColor->state = HOTTILE_DIRTY;
renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
-
+
colorHottileEnableMask &= ~(1 << rtSlot);
}
- if(state.depthHottileEnable)
+ if (state.depthHottileEnable)
{
- const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
- uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
- offset*=numSamples;
- HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true,
- numSamples, renderTargetArrayIndex);
- pDepth->state = HOTTILE_DIRTY;
+ const uint32_t pitch =
+ KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
+ uint32_t offset = ComputeTileOffset2D<
+ TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp>>(
+ pitch, tileX, tileY);
+ offset *= numSamples;
+ HOTTILE* pDepth = pContext->pHotTileMgr->GetHotTile(pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroID,
+ SWR_ATTACHMENT_DEPTH,
+ true,
+ numSamples,
+ renderTargetArrayIndex);
+ pDepth->state = HOTTILE_DIRTY;
SWR_ASSERT(pDepth->pBuffer != nullptr);
renderBuffers.pDepth = pDepth->pBuffer + offset;
}
- if(state.stencilHottileEnable)
+ if (state.stencilHottileEnable)
{
- const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
- uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
- offset*=numSamples;
- HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true,
- numSamples, renderTargetArrayIndex);
- pStencil->state = HOTTILE_DIRTY;
+ const uint32_t pitch =
+ KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
+ uint32_t offset = ComputeTileOffset2D<
+ TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp>>(
+ pitch, tileX, tileY);
+ offset *= numSamples;
+ HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroID,
+ SWR_ATTACHMENT_STENCIL,
+ true,
+ numSamples,
+ renderTargetArrayIndex);
+ pStencil->state = HOTTILE_DIRTY;
SWR_ASSERT(pStencil->pBuffer != nullptr);
renderBuffers.pStencil = pStencil->pBuffer + offset;
}
}
template <typename RT>
-INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers)
+INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers& buffers)
{
DWORD rt = 0;
while (_BitScanForward(&rt, colorHotTileMask))
colorHotTileMask &= ~(1 << rt);
buffers.pColor[rt] += RT::colorRasterTileStep;
}
-
+
buffers.pDepth += RT::depthRasterTileStep;
buffers.pStencil += RT::stencilRasterTileStep;
}
template <typename RT>
-INLINE void StepRasterTileY(uint32_t colorHotTileMask, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow)
+INLINE void StepRasterTileY(uint32_t colorHotTileMask,
+ RenderOutputBuffers& buffers,
+ RenderOutputBuffers& startBufferRow)
{
DWORD rt = 0;
while (_BitScanForward(&rt, colorHotTileMask))
startBufferRow.pStencil += RT::stencilRasterTileRowStep;
buffers.pStencil = startBufferRow.pStencil;
}
-
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#include "rdtsc_core.h"
#include "common/rdtsc_buckets.h"
// must match CORE_BUCKETS enum order
BUCKET_DESC gCoreBuckets[] = {
- { "APIClearRenderTarget", "", true, 0xff0b8bea },
- { "APIDraw", "", true, 0xff000066 },
- { "APIDrawWakeAllThreads", "", false, 0xffffffff },
- { "APIDrawIndexed", "", true, 0xff000066 },
- { "APIDispatch", "", true, 0xff660000 },
- { "APIStoreTiles", "", true, 0xff00ffff },
- { "APIGetDrawContext", "", false, 0xffffffff },
- { "APISync", "", true, 0xff6666ff },
- { "APIWaitForIdle", "", true, 0xff0000ff },
- { "FEProcessDraw", "", true, 0xff009900 },
- { "FEProcessDrawIndexed", "", true, 0xff009900 },
- { "FEFetchShader", "", false, 0xffffffff },
- { "FEVertexShader", "", false, 0xffffffff },
- { "FEHullShader", "", false, 0xffffffff },
- { "FETessellation", "", false, 0xffffffff },
- { "FEDomainShader", "", false, 0xffffffff },
- { "FEGeometryShader", "", false, 0xffffffff },
- { "FEStreamout", "", false, 0xffffffff },
- { "FEPAAssemble", "", false, 0xffffffff },
- { "FEBinPoints", "", false, 0xff29b854 },
- { "FEBinLines", "", false, 0xff29b854 },
- { "FEBinTriangles", "", false, 0xff29b854 },
- { "FETriangleSetup", "", false, 0xffffffff },
- { "FEViewportCull", "", false, 0xffffffff },
- { "FEGuardbandClip", "", false, 0xffffffff },
- { "FEClipPoints", "", false, 0xffffffff },
- { "FEClipLines", "", false, 0xffffffff },
- { "FEClipTriangles", "", false, 0xffffffff },
- { "FEClipRectangles", "", false, 0xffffffff },
- { "FECullZeroAreaAndBackface", "", false, 0xffffffff },
- { "FECullBetweenCenters", "", false, 0xffffffff },
- { "FEEarlyRastEnter", "", false, 0xffffffff },
- { "FEEarlyRastExit", "", false, 0xffffffff },
- { "FEProcessStoreTiles", "", true, 0xff39c864 },
- { "FEProcessInvalidateTiles", "", true, 0xffffffff },
- { "WorkerWorkOnFifoBE", "", false, 0xff40261c },
- { "WorkerFoundWork", "", false, 0xff573326 },
- { "BELoadTiles", "", true, 0xffb0e2ff },
- { "BEDispatch", "", true, 0xff00a2ff },
- { "BEClear", "", true, 0xff00ccbb },
- { "BERasterizeLine", "", true, 0xffb26a4e },
- { "BERasterizeTriangle", "", true, 0xffb26a4e },
- { "BETriangleSetup", "", false, 0xffffffff },
- { "BEStepSetup", "", false, 0xffffffff },
- { "BECullZeroArea", "", false, 0xffffffff },
- { "BEEmptyTriangle", "", false, 0xffffffff },
- { "BETrivialAccept", "", false, 0xffffffff },
- { "BETrivialReject", "", false, 0xffffffff },
- { "BERasterizePartial", "", false, 0xffffffff },
- { "BEPixelBackend", "", false, 0xffffffff },
- { "BESetup", "", false, 0xffffffff },
- { "BEBarycentric", "", false, 0xffffffff },
- { "BEEarlyDepthTest", "", false, 0xffffffff },
- { "BEPixelShader", "", false, 0xffffffff },
- { "BESingleSampleBackend", "", false, 0xffffffff },
- { "BEPixelRateBackend", "", false, 0xffffffff },
- { "BESampleRateBackend", "", false, 0xffffffff },
- { "BENullBackend", "", false, 0xffffffff },
- { "BELateDepthTest", "", false, 0xffffffff },
- { "BEOutputMerger", "", false, 0xffffffff },
- { "BEStoreTiles", "", true, 0xff00cccc },
- { "BEEndTile", "", false, 0xffffffff },
+ {"APIClearRenderTarget", "", true, 0xff0b8bea},
+ {"APIDraw", "", true, 0xff000066},
+ {"APIDrawWakeAllThreads", "", false, 0xffffffff},
+ {"APIDrawIndexed", "", true, 0xff000066},
+ {"APIDispatch", "", true, 0xff660000},
+ {"APIStoreTiles", "", true, 0xff00ffff},
+ {"APIGetDrawContext", "", false, 0xffffffff},
+ {"APISync", "", true, 0xff6666ff},
+ {"APIWaitForIdle", "", true, 0xff0000ff},
+ {"FEProcessDraw", "", true, 0xff009900},
+ {"FEProcessDrawIndexed", "", true, 0xff009900},
+ {"FEFetchShader", "", false, 0xffffffff},
+ {"FEVertexShader", "", false, 0xffffffff},
+ {"FEHullShader", "", false, 0xffffffff},
+ {"FETessellation", "", false, 0xffffffff},
+ {"FEDomainShader", "", false, 0xffffffff},
+ {"FEGeometryShader", "", false, 0xffffffff},
+ {"FEStreamout", "", false, 0xffffffff},
+ {"FEPAAssemble", "", false, 0xffffffff},
+ {"FEBinPoints", "", false, 0xff29b854},
+ {"FEBinLines", "", false, 0xff29b854},
+ {"FEBinTriangles", "", false, 0xff29b854},
+ {"FETriangleSetup", "", false, 0xffffffff},
+ {"FEViewportCull", "", false, 0xffffffff},
+ {"FEGuardbandClip", "", false, 0xffffffff},
+ {"FEClipPoints", "", false, 0xffffffff},
+ {"FEClipLines", "", false, 0xffffffff},
+ {"FEClipTriangles", "", false, 0xffffffff},
+ {"FEClipRectangles", "", false, 0xffffffff},
+ {"FECullZeroAreaAndBackface", "", false, 0xffffffff},
+ {"FECullBetweenCenters", "", false, 0xffffffff},
+ {"FEEarlyRastEnter", "", false, 0xffffffff},
+ {"FEEarlyRastExit", "", false, 0xffffffff},
+ {"FEProcessStoreTiles", "", true, 0xff39c864},
+ {"FEProcessInvalidateTiles", "", true, 0xffffffff},
+ {"WorkerWorkOnFifoBE", "", false, 0xff40261c},
+ {"WorkerFoundWork", "", false, 0xff573326},
+ {"BELoadTiles", "", true, 0xffb0e2ff},
+ {"BEDispatch", "", true, 0xff00a2ff},
+ {"BEClear", "", true, 0xff00ccbb},
+ {"BERasterizeLine", "", true, 0xffb26a4e},
+ {"BERasterizeTriangle", "", true, 0xffb26a4e},
+ {"BETriangleSetup", "", false, 0xffffffff},
+ {"BEStepSetup", "", false, 0xffffffff},
+ {"BECullZeroArea", "", false, 0xffffffff},
+ {"BEEmptyTriangle", "", false, 0xffffffff},
+ {"BETrivialAccept", "", false, 0xffffffff},
+ {"BETrivialReject", "", false, 0xffffffff},
+ {"BERasterizePartial", "", false, 0xffffffff},
+ {"BEPixelBackend", "", false, 0xffffffff},
+ {"BESetup", "", false, 0xffffffff},
+ {"BEBarycentric", "", false, 0xffffffff},
+ {"BEEarlyDepthTest", "", false, 0xffffffff},
+ {"BEPixelShader", "", false, 0xffffffff},
+ {"BESingleSampleBackend", "", false, 0xffffffff},
+ {"BEPixelRateBackend", "", false, 0xffffffff},
+ {"BESampleRateBackend", "", false, 0xffffffff},
+ {"BENullBackend", "", false, 0xffffffff},
+ {"BELateDepthTest", "", false, 0xffffffff},
+ {"BEOutputMerger", "", false, 0xffffffff},
+ {"BEStoreTiles", "", true, 0xff00cccc},
+ {"BEEndTile", "", false, 0xffffffff},
};
-static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])), "RDTSC Bucket enum and description table size mismatched.");
+static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])),
+ "RDTSC Bucket enum and description table size mismatched.");
/// @todo bucketmanager and mapping should probably be a part of the SWR context
std::vector<uint32_t> gBucketMap;
-BucketManager gBucketMgr;
+BucketManager gBucketMgr;
-uint32_t gCurrentFrame = 0;
-bool gBucketsInitialized = false;
+uint32_t gCurrentFrame = 0;
+bool gBucketsInitialized = false;
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#pragma once
#include "knobs.h"
#endif
extern std::vector<uint32_t> gBucketMap;
-extern BucketManager gBucketMgr;
-extern BUCKET_DESC gCoreBuckets[];
-extern uint32_t gCurrentFrame;
-extern bool gBucketsInitialized;
+extern BucketManager gBucketMgr;
+extern BUCKET_DESC gCoreBuckets[];
+extern uint32_t gCurrentFrame;
+extern bool gBucketsInitialized;
INLINE void rdtscReset()
{
{
gCurrentFrame++;
- if (gCurrentFrame == KNOB_BUCKETS_START_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
+ if (gCurrentFrame == KNOB_BUCKETS_START_FRAME &&
+ KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
{
gBucketMgr.StartCapture();
}
- if (gCurrentFrame == KNOB_BUCKETS_END_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
+ if (gCurrentFrame == KNOB_BUCKETS_END_FRAME &&
+ KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
{
gBucketMgr.StopCapture();
gBucketMgr.PrintReport("rdtsc.txt");
/****************************************************************************
-* Copyright (C) 2016 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file arena.h
-*
-* @brief RingBuffer
-* The RingBuffer class manages all aspects of the ring buffer including
-* the head/tail indices, etc.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file arena.h
+ *
+ * @brief RingBuffer
+ * The RingBuffer class manages all aspects of the ring buffer including
+ * the head/tail indices, etc.
+ *
+ ******************************************************************************/
#pragma once
-template<typename T>
+template <typename T>
class RingBuffer
{
public:
- RingBuffer()
- : mpRingBuffer(nullptr), mNumEntries(0), mRingHead(0), mRingTail(0)
- {
- }
+ RingBuffer() : mpRingBuffer(nullptr), mNumEntries(0), mRingHead(0), mRingTail(0) {}
- ~RingBuffer()
- {
- Destroy();
- }
+ ~RingBuffer() { Destroy(); }
void Init(uint32_t numEntries)
{
SWR_ASSERT(numEntries > 0);
- SWR_ASSERT(((1ULL << 32) % numEntries) == 0, "%d is not evenly divisible into 2 ^ 32. Wrap errors will occur!", numEntries);
- mNumEntries = numEntries;
- mpRingBuffer = (T*)AlignedMalloc(sizeof(T)*numEntries, 64);
+ SWR_ASSERT(((1ULL << 32) % numEntries) == 0,
+ "%d is not evenly divisible into 2 ^ 32. Wrap errors will occur!",
+ numEntries);
+ mNumEntries = numEntries;
+ mpRingBuffer = (T*)AlignedMalloc(sizeof(T) * numEntries, 64);
SWR_ASSERT(mpRingBuffer != nullptr);
- memset(mpRingBuffer, 0, sizeof(T)*numEntries);
+ memset(mpRingBuffer, 0, sizeof(T) * numEntries);
}
void Destroy()
InterlockedIncrement(&mRingTail); // There are multiple consumers.
}
- INLINE bool IsEmpty()
- {
- return (GetHead() == GetTail());
- }
+ INLINE bool IsEmpty() { return (GetHead() == GetTail()); }
INLINE bool IsFull()
{
INLINE uint32_t GetHead() volatile { return mRingHead; }
protected:
- T* mpRingBuffer;
+ T* mpRingBuffer;
uint32_t mNumEntries;
- OSALIGNLINE(volatile uint32_t) mRingHead; // Consumer Counter
- OSALIGNLINE(volatile uint32_t) mRingTail; // Producer Counter
+ OSALIGNLINE(volatile uint32_t) mRingHead; // Consumer Counter
+ OSALIGNLINE(volatile uint32_t) mRingTail; // Producer Counter
};
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file state.h
-*
-* @brief Definitions for API state.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file state.h
+ *
+ * @brief Definitions for API state.
+ *
+ ******************************************************************************/
+// Skipping clang-format due to parsing by simplistic python scripts
+// clang-format off
#pragma once
#include "common/formats.h"
//////////////////////////////////////////////////////////////////////////
enum PRIMITIVE_TOPOLOGY
{
- TOP_UNKNOWN = 0x0,
- TOP_POINT_LIST = 0x1,
- TOP_LINE_LIST = 0x2,
- TOP_LINE_STRIP = 0x3,
- TOP_TRIANGLE_LIST = 0x4,
- TOP_TRIANGLE_STRIP = 0x5,
- TOP_TRIANGLE_FAN = 0x6,
- TOP_QUAD_LIST = 0x7,
- TOP_QUAD_STRIP = 0x8,
- TOP_LINE_LIST_ADJ = 0x9,
- TOP_LISTSTRIP_ADJ = 0xA,
- TOP_TRI_LIST_ADJ = 0xB,
- TOP_TRI_STRIP_ADJ = 0xC,
- TOP_TRI_STRIP_REVERSE = 0xD,
- TOP_POLYGON = 0xE,
- TOP_RECT_LIST = 0xF,
- TOP_LINE_LOOP = 0x10,
- TOP_POINT_LIST_BF = 0x11,
- TOP_LINE_STRIP_CONT = 0x12,
- TOP_LINE_STRIP_BF = 0x13,
- TOP_LINE_STRIP_CONT_BF = 0x14,
+ TOP_UNKNOWN = 0x0,
+ TOP_POINT_LIST = 0x1,
+ TOP_LINE_LIST = 0x2,
+ TOP_LINE_STRIP = 0x3,
+ TOP_TRIANGLE_LIST = 0x4,
+ TOP_TRIANGLE_STRIP = 0x5,
+ TOP_TRIANGLE_FAN = 0x6,
+ TOP_QUAD_LIST = 0x7,
+ TOP_QUAD_STRIP = 0x8,
+ TOP_LINE_LIST_ADJ = 0x9,
+ TOP_LISTSTRIP_ADJ = 0xA,
+ TOP_TRI_LIST_ADJ = 0xB,
+ TOP_TRI_STRIP_ADJ = 0xC,
+ TOP_TRI_STRIP_REVERSE = 0xD,
+ TOP_POLYGON = 0xE,
+ TOP_RECT_LIST = 0xF,
+ TOP_LINE_LOOP = 0x10,
+ TOP_POINT_LIST_BF = 0x11,
+ TOP_LINE_STRIP_CONT = 0x12,
+ TOP_LINE_STRIP_BF = 0x13,
+ TOP_LINE_STRIP_CONT_BF = 0x14,
TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
- TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
-
- TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
- TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
- TOP_PATCHLIST_2 = 0x21,
- TOP_PATCHLIST_3 = 0x22,
- TOP_PATCHLIST_4 = 0x23,
- TOP_PATCHLIST_5 = 0x24,
- TOP_PATCHLIST_6 = 0x25,
- TOP_PATCHLIST_7 = 0x26,
- TOP_PATCHLIST_8 = 0x27,
- TOP_PATCHLIST_9 = 0x28,
- TOP_PATCHLIST_10 = 0x29,
- TOP_PATCHLIST_11 = 0x2A,
- TOP_PATCHLIST_12 = 0x2B,
- TOP_PATCHLIST_13 = 0x2C,
- TOP_PATCHLIST_14 = 0x2D,
- TOP_PATCHLIST_15 = 0x2E,
- TOP_PATCHLIST_16 = 0x2F,
- TOP_PATCHLIST_17 = 0x30,
- TOP_PATCHLIST_18 = 0x31,
- TOP_PATCHLIST_19 = 0x32,
- TOP_PATCHLIST_20 = 0x33,
- TOP_PATCHLIST_21 = 0x34,
- TOP_PATCHLIST_22 = 0x35,
- TOP_PATCHLIST_23 = 0x36,
- TOP_PATCHLIST_24 = 0x37,
- TOP_PATCHLIST_25 = 0x38,
- TOP_PATCHLIST_26 = 0x39,
- TOP_PATCHLIST_27 = 0x3A,
- TOP_PATCHLIST_28 = 0x3B,
- TOP_PATCHLIST_29 = 0x3C,
- TOP_PATCHLIST_30 = 0x3D,
- TOP_PATCHLIST_31 = 0x3E,
- TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
+ TOP_TRIANGLE_DISC = 0x17, /// @todo What is this??
+
+ TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
+ TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches
+ TOP_PATCHLIST_2 = 0x21,
+ TOP_PATCHLIST_3 = 0x22,
+ TOP_PATCHLIST_4 = 0x23,
+ TOP_PATCHLIST_5 = 0x24,
+ TOP_PATCHLIST_6 = 0x25,
+ TOP_PATCHLIST_7 = 0x26,
+ TOP_PATCHLIST_8 = 0x27,
+ TOP_PATCHLIST_9 = 0x28,
+ TOP_PATCHLIST_10 = 0x29,
+ TOP_PATCHLIST_11 = 0x2A,
+ TOP_PATCHLIST_12 = 0x2B,
+ TOP_PATCHLIST_13 = 0x2C,
+ TOP_PATCHLIST_14 = 0x2D,
+ TOP_PATCHLIST_15 = 0x2E,
+ TOP_PATCHLIST_16 = 0x2F,
+ TOP_PATCHLIST_17 = 0x30,
+ TOP_PATCHLIST_18 = 0x31,
+ TOP_PATCHLIST_19 = 0x32,
+ TOP_PATCHLIST_20 = 0x33,
+ TOP_PATCHLIST_21 = 0x34,
+ TOP_PATCHLIST_22 = 0x35,
+ TOP_PATCHLIST_23 = 0x36,
+ TOP_PATCHLIST_24 = 0x37,
+ TOP_PATCHLIST_25 = 0x38,
+ TOP_PATCHLIST_26 = 0x39,
+ TOP_PATCHLIST_27 = 0x3A,
+ TOP_PATCHLIST_28 = 0x3B,
+ TOP_PATCHLIST_29 = 0x3C,
+ TOP_PATCHLIST_30 = 0x3D,
+ TOP_PATCHLIST_31 = 0x3E,
+ TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches
};
//////////////////////////////////////////////////////////////////////////
SWR_NUM_OUTER_TESS_FACTORS,
};
-
/////////////////////////////////////////////////////////////////////////
/// simdvertex
/// @brief Defines a vertex element that holds all the data for SIMD vertices.
enum SWR_VTX_SLOTS
{
VERTEX_SGV_SLOT = 0,
- VERTEX_SGV_RTAI_COMP = 0,
- VERTEX_SGV_VAI_COMP = 1,
- VERTEX_SGV_POINT_SIZE_COMP = 2,
+ VERTEX_SGV_RTAI_COMP = 0,
+ VERTEX_SGV_VAI_COMP = 1,
+ VERTEX_SGV_POINT_SIZE_COMP = 2,
VERTEX_POSITION_SLOT = 1,
VERTEX_POSITION_END_SLOT = 1,
VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
// SoAoSoA
struct simdvertex
{
- simdvector attrib[SWR_VTX_NUM_SLOTS];
+ simdvector attrib[SWR_VTX_NUM_SLOTS];
};
#if ENABLE_AVX512_SIMD16
struct simd16vertex
{
- simd16vector attrib[SWR_VTX_NUM_SLOTS];
+ simd16vector attrib[SWR_VTX_NUM_SLOTS];
};
#endif
-template<typename SIMD_T>
+template <typename SIMD_T>
struct SIMDVERTEX_T
{
- typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
+ typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
};
//////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
struct SWR_VS_CONTEXT
{
- simdvertex* pVin; // IN: SIMD input vertex data store
- simdvertex* pVout; // OUT: SIMD output vertex data store
+ simdvertex* pVin; // IN: SIMD input vertex data store
+ simdvertex* pVout; // OUT: SIMD output vertex data store
- uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
- simdscalari VertexID; // IN: Vertex ID
- simdscalari mask; // IN: Active mask for shader
+ uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD
+ simdscalari VertexID; // IN: Vertex ID
+ simdscalari mask; // IN: Active mask for shader
// SIMD16 Frontend fields.
- uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
- simd16scalari mask16; // IN: Active mask for shader (16-wide)
- simd16scalari VertexID16; // IN: Vertex ID (16-wide)
+ uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in
+ // simd16vertex output
+ simd16scalari mask16; // IN: Active mask for shader (16-wide)
+ simd16scalari VertexID16; // IN: Vertex ID (16-wide)
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
+ SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
};
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
struct SWR_TESSELLATION_FACTORS
{
- float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
- float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
+ float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
+ float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
};
#define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
struct ScalarPatch
{
SWR_TESSELLATION_FACTORS tessFactors;
- ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
- ScalarCPoint patchData;
+ ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
+ ScalarCPoint patchData;
};
//////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
struct SWR_HS_CONTEXT
{
- simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
- simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
- simdscalari mask; // IN: Active mask for shader
- ScalarPatch* pCPout; // OUT: Output control point patch
- // SIMD-sized-array of SCALAR patches
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
+ simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
+ simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call
+ simdscalari mask; // IN: Active mask for shader
+ ScalarPatch* pCPout; // OUT: Output control point patch SIMD-sized-array of SCALAR patches
+ SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
};
//////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
struct SWR_GS_CONTEXT
{
- simdvector* pVerts; // IN: input primitive data for SIMD prims
- uint32_t inputVertStride; // IN: input vertex stride, in attributes
- simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
- uint32_t InstanceID; // IN: input instance ID
- simdscalari mask; // IN: Active mask for shader
- uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
+ simdvector* pVerts; // IN: input primitive data for SIMD prims
+ uint32_t inputVertStride; // IN: input vertex stride, in attributes
+ simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call
+ uint32_t InstanceID; // IN: input instance ID
+ simdscalari mask; // IN: Active mask for shader
+ uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
+ SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
};
struct PixelPositions
/////////////////////////////////////////////////////////////////////////
struct SWR_PS_CONTEXT
{
- PixelPositions vX; // IN: x location(s) of pixels
- PixelPositions vY; // IN: x location(s) of pixels
- simdscalar vZ; // INOUT: z location of pixels
- simdscalari activeMask; // OUT: mask for kill
- simdscalar inputMask; // IN: input coverage mask for all samples
- simdscalari oMask; // OUT: mask for output coverage
+ PixelPositions vX; // IN: x location(s) of pixels
+ PixelPositions vY; // IN: x location(s) of pixels
+ simdscalar vZ; // INOUT: z location of pixels
+ simdscalari activeMask; // OUT: mask for kill
+ simdscalar inputMask; // IN: input coverage mask for all samples
+ simdscalari oMask; // OUT: mask for output coverage
- PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
+ PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
PixelPositions vJ;
- PixelPositions vOneOverW; // IN: 1/w
+ PixelPositions vOneOverW; // IN: 1/w
const float* pAttribs; // IN: pointer to attribute barycentric coefficients
const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
const float* pRecipW; // IN: pointer to 1/w coord for each vertex
- const float *I; // IN: Barycentric A, B, and C coefs used to compute I
- const float *J; // IN: Barycentric A, B, and C coefs used to compute J
- float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
+ const float* I; // IN: Barycentric A, B, and C coefs used to compute I
+ const float* J; // IN: Barycentric A, B, and C coefs used to compute J
+ float recipDet; // IN: 1/Det, used when barycentric interpolating attributes
const float* pSamplePosX; // IN: array of sample positions
const float* pSamplePosY; // IN: array of sample positions
- simdvector shaded[SWR_NUM_RENDERTARGETS];
- // OUT: result color per rendertarget
+ simdvector shaded[SWR_NUM_RENDERTARGETS]; // OUT: result color per rendertarget
- uint32_t frontFace; // IN: front- 1, back- 0
- uint32_t sampleIndex; // IN: sampleIndex
- uint32_t renderTargetArrayIndex; // IN: render target array index from GS
- uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
+ uint32_t frontFace; // IN: front- 1, back- 0
+ uint32_t sampleIndex; // IN: sampleIndex
+ uint32_t renderTargetArrayIndex; // IN: render target array index from GS
+ uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer
uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
+ SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
};
//////////////////////////////////////////////////////////////////////////
// count into the shader. When the count reaches 0 then all thread groups in the
// dispatch call have been completed.
- uint32_t tileCounter; // The tile counter value for this thread group.
+ uint32_t tileCounter; // The tile counter value for this thread group.
// Dispatch dimensions used by shader to compute system values from the tile counter.
uint32_t dispatchDims[3];
uint8_t* pTGSM; // Thread Group Shared Memory pointer.
uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support
- uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible
- // for subdividing scratch space per instance/simd
+ uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is
+ // responsible for subdividing scratch space per instance/simd
uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
- SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
+ SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
};
// enums
enum SWR_TILE_MODE
{
- SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
- SWR_TILE_MODE_WMAJOR, // W major tiling
- SWR_TILE_MODE_XMAJOR, // X major tiling
- SWR_TILE_MODE_YMAJOR, // Y major tiling
- SWR_TILE_SWRZ, // SWR-Z tiling
+ SWR_TILE_NONE = 0x0, // Linear mode (no tiling)
+ SWR_TILE_MODE_WMAJOR, // W major tiling
+ SWR_TILE_MODE_XMAJOR, // X major tiling
+ SWR_TILE_MODE_YMAJOR, // Y major tiling
+ SWR_TILE_SWRZ, // SWR-Z tiling
SWR_TILE_MODE_COUNT
};
enum SWR_SURFACE_TYPE
{
- SURFACE_1D = 0,
- SURFACE_2D = 1,
- SURFACE_3D = 2,
- SURFACE_CUBE = 3,
- SURFACE_BUFFER = 4,
+ SURFACE_1D = 0,
+ SURFACE_2D = 1,
+ SURFACE_3D = 2,
+ SURFACE_CUBE = 3,
+ SURFACE_BUFFER = 4,
SURFACE_STRUCTURED_BUFFER = 5,
- SURFACE_NULL = 7
+ SURFACE_NULL = 7
};
enum SWR_ZFUNCTION
//////////////////////////////////////////////////////////////////////////
struct SWR_SURFACE_STATE
{
- gfxptr_t xpBaseAddress;
- SWR_SURFACE_TYPE type; // @llvm_enum
- SWR_FORMAT format; // @llvm_enum
- uint32_t width;
- uint32_t height;
- uint32_t depth;
- uint32_t numSamples;
- uint32_t samplePattern;
- uint32_t pitch;
- uint32_t qpitch;
- uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
- uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
- float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
- uint32_t lod; // for render targets, the lod being rendered to
- uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
- SWR_TILE_MODE tileMode; // @llvm_enum
- uint32_t halign;
- uint32_t valign;
- uint32_t xOffset;
- uint32_t yOffset;
+ gfxptr_t xpBaseAddress;
+ SWR_SURFACE_TYPE type; // @llvm_enum
+ SWR_FORMAT format; // @llvm_enum
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+ uint32_t numSamples;
+ uint32_t samplePattern;
+ uint32_t pitch;
+ uint32_t qpitch;
+ uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
+ uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
+ float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be
+ // accessed by sampler
+ uint32_t lod; // for render targets, the lod being rendered to
+ uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
+ SWR_TILE_MODE tileMode; // @llvm_enum
+ uint32_t halign;
+ uint32_t valign;
+ uint32_t xOffset;
+ uint32_t yOffset;
uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
- gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
- SWR_AUX_MODE auxMode; // @llvm_enum
+ gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
+ SWR_AUX_MODE auxMode; // @llvm_enum
- bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
+ bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
};
// vertex fetch state
uint32_t index;
uint32_t pitch;
uint32_t size;
- uint32_t minVertex; // min vertex (for bounds checking)
- uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
- uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices
+ uint32_t minVertex; // min vertex (for bounds checking)
+ uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks
+ uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for
+ // partially OOB vertices
};
struct SWR_INDEX_BUFFER_STATE
gfxptr_t xpIndices;
// Format type for indices (e.g. UINT16, UINT32, etc.)
SWR_FORMAT format; // @llvm_enum
- uint32_t size;
+ uint32_t size;
};
-
//////////////////////////////////////////////////////////////////////////
/// SWR_FETCH_CONTEXT
/// @brief Input to fetch shader.
/////////////////////////////////////////////////////////////////////////
struct SWR_FETCH_CONTEXT
{
- const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
- gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws
- gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking
- uint32_t CurInstance; // IN: current instance
- uint32_t BaseVertex; // IN: base vertex
- uint32_t StartVertex; // IN: start vertex
- uint32_t StartInstance; // IN: start instance
- simdscalari VertexID; // OUT: vector of vertex IDs
- simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
+ const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers
+ gfxptr_t xpIndices; // IN: pointer to int32 index buffer for indexed draws
+ gfxptr_t xpLastIndex; // IN: pointer to end of index buffer, used for bounds checking
+ uint32_t CurInstance; // IN: current instance
+ uint32_t BaseVertex; // IN: base vertex
+ uint32_t StartVertex; // IN: start vertex
+ uint32_t StartInstance; // IN: start instance
+ simdscalari VertexID; // OUT: vector of vertex IDs
+ simdscalari CutMask; // OUT: vector mask of indices which have the cut index value
#if USE_SIMD16_SHADERS
-// simd16scalari VertexID; // OUT: vector of vertex IDs
-// simd16scalari CutMask; // OUT: vector mask of indices which have the cut index value
- simdscalari VertexID2; // OUT: vector of vertex IDs
- simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
+ // simd16scalari VertexID; // OUT: vector of vertex IDs
+ // simd16scalari CutMask; // OUT: vector mask of indices which have the
+ // cut index value
+ simdscalari VertexID2; // OUT: vector of vertex IDs
+ simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value
#endif
};
uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
// Pipeline Stats
- uint64_t PsInvocations; // Number of Pixel Shader invocations
- uint64_t CsInvocations; // Number of Compute Shader invocations
+ uint64_t PsInvocations; // Number of Pixel Shader invocations
+ uint64_t CsInvocations; // Number of Compute Shader invocations
};
uint64_t SoNumPrimsWritten[4];
};
-//////////////////////////////////////////////////////////////////////////
-/// STREAMOUT_BUFFERS
-/////////////////////////////////////////////////////////////////////////
+ //////////////////////////////////////////////////////////////////////////
+ /// STREAMOUT_BUFFERS
+ /////////////////////////////////////////////////////////////////////////
#define MAX_SO_STREAMS 4
#define MAX_SO_BUFFERS 4
/////////////////////////////////////////////////////////////////////////
struct SWR_STREAMOUT_CONTEXT
{
- uint32_t* pPrimData;
+ uint32_t* pPrimData;
SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
// Num prims written for this stream
bool gsEnable;
// If true, geometry shader emits a single stream, with separate cut buffer.
- // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
- // to map vertices to streams
+ // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a
+ // separate StreamID buffer to map vertices to streams
bool isSingleStream;
// Number of input attributes per vertex. Used by the frontend to
uint32_t inputVertStride;
// Output topology - can be point, tristrip, linestrip, or rectlist
- PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
+ PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
// Maximum number of verts that can be emitted by a single instance of the GS
uint32_t maxNumVerts;
// Total amount of memory to allocate for one instance of the shader output in bytes
uint32_t allocationSize;
- // Offset to the start of the attributes of the input vertices, in simdvector units, as read by the GS
+ // Offset to the start of the attributes of the input vertices, in simdvector units, as read by
+ // the GS
uint32_t vertexAttribOffset;
// Offset to the attributes as stored by the preceding shader stage.
uint32_t srcVertexAttribOffset;
- // Size of the control data section which contains cut or streamID data, in simdscalar units. Should be sized to handle
- // the maximum number of verts output by the GS. Can be 0 if there are no cuts or streamID bits.
+ // Size of the control data section which contains cut or streamID data, in simdscalar units.
+ // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are
+ // no cuts or streamID bits.
uint32_t controlDataSize;
// Offset to the control data section, in bytes
// Offset to the start of the vertex section, in bytes
uint32_t outputVertexOffset;
- // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
- // expected to store the final vertex count in the first dword of the gs output stream.
+ // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero,
+ // shader is expected to store the final vertex count in the first dword of the gs output
+ // stream.
uint32_t staticVertexCount;
uint32_t pad;
};
-static_assert(sizeof(SWR_GS_STATE) == 64,
- "Adjust padding to keep size (or remove this assert)");
-
+static_assert(sizeof(SWR_GS_STATE) == 64, "Adjust padding to keep size (or remove this assert)");
//////////////////////////////////////////////////////////////////////////
/// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
/////////////////////////////////////////////////////////////////////////
struct SWR_TS_STATE
{
- bool tsEnable;
+ bool tsEnable;
- SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
- SWR_TS_PARTITIONING partitioning; // @llvm_enum
- SWR_TS_DOMAIN domain; // @llvm_enum
+ SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
+ SWR_TS_PARTITIONING partitioning; // @llvm_enum
+ SWR_TS_DOMAIN domain; // @llvm_enum
- PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
+ PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
- uint32_t numHsInputAttribs;
- uint32_t numHsOutputAttribs;
- uint32_t numDsOutputAttribs;
- uint32_t dsAllocationSize;
- uint32_t dsOutVtxAttribOffset;
+ uint32_t numHsInputAttribs;
+ uint32_t numHsOutputAttribs;
+ uint32_t numDsOutputAttribs;
+ uint32_t dsAllocationSize;
+ uint32_t dsOutVtxAttribOffset;
// Offset to the start of the attributes of the input vertices, in simdvector units
- uint32_t vertexAttribOffset;
+ uint32_t vertexAttribOffset;
};
// output merger state
uint8_t writeDisableBlue : 1;
uint8_t writeDisableAlpha : 1;
};
-static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
+static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1,
+ "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
enum SWR_MULTISAMPLE_COUNT
{
uint32_t sampleMask;
// all RT's have the same sample count
///@todo move this to Output Merger state when we refactor
- SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
+ SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
};
struct SWR_BLEND_CONTEXT
{
- const SWR_BLEND_STATE* pBlendState;
- simdvector* src;
- simdvector* src1;
- simdvector* src0alpha;
- uint32_t sampleNum;
- simdvector* pDst;
- simdvector* result;
- simdscalari* oMask;
- simdscalari* pMask;
- uint32_t isAlphaTested;
- uint32_t isAlphaBlended;
+ const SWR_BLEND_STATE* pBlendState;
+ simdvector* src;
+ simdvector* src1;
+ simdvector* src0alpha;
+ uint32_t sampleNum;
+ simdvector* pDst;
+ simdvector* result;
+ simdscalari* oMask;
+ simdscalari* pMask;
+ uint32_t isAlphaTested;
+ uint32_t isAlphaBlended;
};
//////////////////////////////////////////////////////////////////////////
typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
-typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
-typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
+typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
+typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
-
//////////////////////////////////////////////////////////////////////////
/// FRONTEND_STATE
/////////////////////////////////////////////////////////////////////////
struct SWR_MULTISAMPLE_POS
{
public:
- INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
- INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
- INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
- INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
- INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
- INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
- INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
- INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
- typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
- INLINE sampleArrayT X() const { return _x; }; // @llvm_func
- INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
+ INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
+ INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
+ INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
+ INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
+ INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
+ INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
+ INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
+ INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
+ typedef const float (&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
+ INLINE sampleArrayT X() const { return _x; }; // @llvm_func
+ INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
- INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
- INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
+ INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
+ INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
INLINE void PrecalcSampleData(int numSamples); //@llvm_func
private:
template <typename MaskT>
INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
- INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
+ INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func
// scalar sample values
uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
- float _x[SWR_MAX_NUM_MULTISAMPLES];
- float _y[SWR_MAX_NUM_MULTISAMPLES];
+ float _x[SWR_MAX_NUM_MULTISAMPLES];
+ float _y[SWR_MAX_NUM_MULTISAMPLES];
// precalc'd / vectorized samples
- __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
- __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
+ __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
+ __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
- __m128i tileSampleOffsetsX;
- __m128i tileSampleOffsetsY;
+ __m128i tileSampleOffsetsX;
+ __m128i tileSampleOffsetsY;
};
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
struct SWR_RASTSTATE
{
- uint32_t cullMode : 2;
- uint32_t fillMode : 2;
- uint32_t frontWinding : 1;
- uint32_t scissorEnable : 1;
- uint32_t depthClipEnable : 1;
- uint32_t clipHalfZ : 1;
- uint32_t pointParam : 1;
- uint32_t pointSpriteEnable : 1;
- uint32_t pointSpriteTopOrigin : 1;
- uint32_t forcedSampleCount : 1;
- uint32_t pixelOffset : 1;
- uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
- uint32_t conservativeRast : 1;
+ uint32_t cullMode : 2;
+ uint32_t fillMode : 2;
+ uint32_t frontWinding : 1;
+ uint32_t scissorEnable : 1;
+ uint32_t depthClipEnable : 1;
+ uint32_t clipHalfZ : 1;
+ uint32_t pointParam : 1;
+ uint32_t pointSpriteEnable : 1;
+ uint32_t pointSpriteTopOrigin : 1;
+ uint32_t forcedSampleCount : 1;
+ uint32_t pixelOffset : 1;
+ uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
+ uint32_t conservativeRast : 1;
float pointSize;
float lineWidth;
- float depthBias;
- float slopeScaledDepthBias;
- float depthBiasClamp;
- SWR_FORMAT depthFormat; // @llvm_enum
+ float depthBias;
+ float slopeScaledDepthBias;
+ float depthBiasClamp;
+ SWR_FORMAT depthFormat; // @llvm_enum
// sample count the rasterizer is running at
- SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
- uint32_t pixelLocation; // UL or Center
- SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
- bool bIsCenterPattern; // @llvm_enum
+ SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
+ uint32_t pixelLocation; // UL or Center
+ SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct
+ bool bIsCenterPattern; // @llvm_enum
};
// backend state
struct SWR_BACKEND_STATE
{
- uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation
- uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
+ uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant
+ // interpolation
+ uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be
+ // interpreted as tex coordinates
- bool swizzleEnable; // when enabled, core will parse the swizzle map when
- // setting up attributes for the backend, otherwise
- // all attributes up to numAttributes will be sent
- uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
- uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components
+ bool swizzleEnable; // when enabled, core will parse the swizzle map when
+ // setting up attributes for the backend, otherwise
+ // all attributes up to numAttributes will be sent
+ uint8_t numAttributes; // total number of attributes to send to backend (up to 32)
+ uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some
+ // calculations for unneeded components
- bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend
- bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
+ bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the
+ // backend
+ bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning
// User clip/cull distance enables
uint8_t cullDistanceMask;
// and that the next fields are dword aligned.
uint8_t pad[10];
- // Offset to the start of the attributes of the input vertices, in simdvector units
+ // Offset to the start of the attributes of the input vertices, in simdvector units
uint32_t vertexAttribOffset;
// Offset to clip/cull attrib section of the vertex, in simdvector units
SWR_ATTRIB_SWIZZLE swizzleMap[32];
};
static_assert(sizeof(SWR_BACKEND_STATE) == 128,
- "Adjust padding to keep size (or remove this assert)");
+ "Adjust padding to keep size (or remove this assert)");
union SWR_DEPTH_STENCIL_STATE
enum SWR_BARYCENTRICS_MASK
{
- SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
- SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
+ SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
+ SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
};
struct SWR_PS_STATE
{
// dword 0-1
- PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
+ PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
// dword 2
- uint32_t killsPixel : 1; // pixel shader can kill pixels
- uint32_t inputCoverage : 2; // ps uses input coverage
- uint32_t writesODepth : 1; // pixel shader writes to depth
- uint32_t usesSourceDepth : 1; // pixel shader reads depth
- uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
- uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
- uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with
- uint32_t usesUAV : 1; // pixel shader accesses UAV
- uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
+ uint32_t killsPixel : 1; // pixel shader can kill pixels
+ uint32_t inputCoverage : 2; // ps uses input coverage
+ uint32_t writesODepth : 1; // pixel shader writes to depth
+ uint32_t usesSourceDepth : 1; // pixel shader reads depth
+ uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel
+ uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
+ uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate
+ // attributes with
+ uint32_t usesUAV : 1; // pixel shader accesses UAV
+ uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test
- uint8_t renderTargetMask; // Mask of render targets written
+ uint8_t renderTargetMask; // Mask of render targets written
};
// depth bounds state
struct SWR_DEPTH_BOUNDS_STATE
{
- bool depthBoundsTestEnable;
- float depthBoundsTestMinValue;
- float depthBoundsTestMaxValue;
+ bool depthBoundsTestEnable;
+ float depthBoundsTestMinValue;
+ float depthBoundsTestMaxValue;
};
-
+// clang-format on
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file state.h
-*
-* @brief Definitions for API state - complex function implementation.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file state.h
+ *
+ * @brief Definitions for API state - complex function implementation.
+ *
+ ******************************************************************************/
#pragma once
#include "core/state.h"
#include "common/simdintrin.h"
-
template <typename MaskT>
INLINE __m128i SWR_MULTISAMPLE_POS::expandThenBlend4(uint32_t* min, uint32_t* max)
{
INLINE void SWR_MULTISAMPLE_POS::PrecalcSampleData(int numSamples)
{
- for(int i = 0; i < numSamples; i++)
+ for (int i = 0; i < numSamples; i++)
{
_vXi[i] = _mm_set1_epi32(_xi[i]);
_vYi[i] = _mm_set1_epi32(_yi[i]);
- _vX[i] = _simd_set1_ps(_x[i]);
- _vY[i] = _simd_set1_ps(_y[i]);
+ _vX[i] = _simd_set1_ps(_x[i]);
+ _vY[i] = _simd_set1_ps(_y[i]);
}
// precalculate the raster tile BB for the rasterizer.
- CalcTileSampleOffsets(numSamples);
+ CalcTileSampleOffsets(numSamples);
}
INLINE void SWR_MULTISAMPLE_POS::CalcTileSampleOffsets(int numSamples)
{
- auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
- auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
+ auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
+ auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
using xMask = std::integral_constant<int, 0xA>;
// BR(max), BL(min), UR(max), UL(min)
tileSampleOffsetsX = expandThenBlend4<xMask>(minXi, maxXi);
- auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
- auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
+ auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
+ auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
using yMask = std::integral_constant<int, 0xC>;
// BR(max), BL(min), UR(max), UL(min)
tileSampleOffsetsY = expandThenBlend4<yMask>(minYi, maxYi);
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tessellator.h
-*
-* @brief Tessellator fixed function unit interface definition
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tessellator.h
+ *
+ * @brief Tessellator fixed function unit interface definition
+ *
+ ******************************************************************************/
#pragma once
/// Allocate and initialize a new tessellation context
-HANDLE SWR_API TSInitCtx(
- SWR_TS_DOMAIN tsDomain, ///< [IN] Tessellation domain (isoline, quad, triangle)
- SWR_TS_PARTITIONING tsPartitioning, ///< [IN] Tessellation partitioning algorithm
- SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology, ///< [IN] Tessellation output topology
- void* pContextMem, ///< [IN] Memory to use for the context
- size_t& memSize); ///< [INOUT] In: Amount of memory in pContextMem. Out: Mem required
+HANDLE SWR_API
+ TSInitCtx(SWR_TS_DOMAIN tsDomain, ///< [IN] Tessellation domain (isoline, quad, triangle)
+ SWR_TS_PARTITIONING tsPartitioning, ///< [IN] Tessellation partitioning algorithm
+ SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology, ///< [IN] Tessellation output topology
+ void* pContextMem, ///< [IN] Memory to use for the context
+ size_t& memSize); ///< [INOUT] In: Amount of memory in pContextMem. Out: Mem required
/// Destroy & de-allocate tessellation context
-void SWR_API TSDestroyCtx(
- HANDLE tsCtx); ///< [IN] Tessellation context to be destroyed
+void SWR_API TSDestroyCtx(HANDLE tsCtx); ///< [IN] Tessellation context to be destroyed
struct SWR_TS_TESSELLATED_DATA
{
uint32_t NumDomainPoints;
uint32_t* ppIndices[3];
- float* pDomainPointsU;
- float* pDomainPointsV;
+ float* pDomainPointsU;
+ float* pDomainPointsV;
// For Tri: pDomainPointsW[i] = 1.0f - pDomainPointsU[i] - pDomainPointsV[i]
};
/// Perform Tessellation
-void SWR_API TSTessellate(
- HANDLE tsCtx, ///< [IN] Tessellation Context
- const SWR_TESSELLATION_FACTORS& tsTessFactors, ///< [IN] Tessellation Factors
- SWR_TS_TESSELLATED_DATA& tsTessellatedData); ///< [OUT] Tessellated Data
-
+void SWR_API
+ TSTessellate(HANDLE tsCtx, ///< [IN] Tessellation Context
+ const SWR_TESSELLATION_FACTORS& tsTessFactors, ///< [IN] Tessellation Factors
+ SWR_TS_TESSELLATED_DATA& tsTessellatedData); ///< [OUT] Tessellated Data
/// @TODO - Implement OSS tessellator
-INLINE HANDLE SWR_API TSInitCtx(
- SWR_TS_DOMAIN tsDomain,
- SWR_TS_PARTITIONING tsPartitioning,
- SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology,
- void* pContextMem,
- size_t& memSize)
+INLINE HANDLE SWR_API TSInitCtx(SWR_TS_DOMAIN tsDomain,
+ SWR_TS_PARTITIONING tsPartitioning,
+ SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology,
+ void* pContextMem,
+ size_t& memSize)
{
SWR_NOT_IMPL;
return NULL;
}
-
INLINE void SWR_API TSDestroyCtx(HANDLE tsCtx)
{
SWR_NOT_IMPL;
}
-
-INLINE void SWR_API TSTessellate(
- HANDLE tsCtx,
- const SWR_TESSELLATION_FACTORS& tsTessFactors,
- SWR_TS_TESSELLATED_DATA& tsTessellatedData)
+INLINE void SWR_API TSTessellate(HANDLE tsCtx,
+ const SWR_TESSELLATION_FACTORS& tsTessFactors,
+ SWR_TS_TESSELLATED_DATA& tsTessellatedData)
{
SWR_NOT_IMPL;
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
#include <stdio.h>
#include <thread>
#include "tileset.h"
-
-
// ThreadId
struct Core
{
- uint32_t procGroup = 0;
- std::vector<uint32_t> threadIds;
+ uint32_t procGroup = 0;
+ std::vector<uint32_t> threadIds;
};
struct NumaNode
std::vector<KAFFINITY> threadMaskPerProcGroup;
- static std::mutex m;
+ static std::mutex m;
std::lock_guard<std::mutex> l(m);
DWORD bufSize = 0;
BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufSize);
SWR_ASSERT(ret == FALSE && GetLastError() == ERROR_INSUFFICIENT_BUFFER);
- PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem =
+ (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
SWR_ASSERT(pBufferMem);
ret = GetLogicalProcessorInformationEx(RelationProcessorCore, pBufferMem, &bufSize);
SWR_ASSERT(ret != FALSE, "Failed to get Processor Topology Information");
- uint32_t count = bufSize / pBufferMem->Size;
+ uint32_t count = bufSize / pBufferMem->Size;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = pBufferMem;
for (uint32_t i = 0; i < count; ++i)
SWR_ASSERT(pBuffer->Relationship == RelationProcessorCore);
for (uint32_t g = 0; g < pBuffer->Processor.GroupCount; ++g)
{
- auto& gmask = pBuffer->Processor.GroupMask[g];
- uint32_t threadId = 0;
+ auto& gmask = pBuffer->Processor.GroupMask[g];
+ uint32_t threadId = 0;
uint32_t procGroup = gmask.Group;
Core* pCore = nullptr;
threadMaskPerProcGroup[procGroup] |= (KAFFINITY(1) << threadId);
// Find Numa Node
- uint32_t numaId = 0;
+ uint32_t numaId = 0;
PROCESSOR_NUMBER procNum = {};
- procNum.Group = WORD(procGroup);
- procNum.Number = UCHAR(threadId);
+ procNum.Group = WORD(procGroup);
+ procNum.Number = UCHAR(threadId);
ret = GetNumaProcessorNodeEx(&procNum, (PUSHORT)&numaId);
SWR_ASSERT(ret);
{
out_nodes.resize(numaId + 1);
}
- auto& numaNode = out_nodes[numaId];
+ auto& numaNode = out_nodes[numaId];
numaNode.numaId = numaId;
uint32_t coreId = 0;
if (nullptr == pCore)
{
numaNode.cores.push_back(Core());
- pCore = &numaNode.cores.back();
+ pCore = &numaNode.cores.back();
pCore->procGroup = procGroup;
}
pCore->threadIds.push_back(threadId);
free(pBufferMem);
-
-#elif defined(__linux__) || defined (__gnu_linux__)
+#elif defined(__linux__) || defined(__gnu_linux__)
// Parse /proc/cpuinfo to get full topology
std::ifstream input("/proc/cpuinfo");
- std::string line;
- char* c;
- uint32_t procId = uint32_t(-1);
- uint32_t coreId = uint32_t(-1);
- uint32_t physId = uint32_t(-1);
+ std::string line;
+ char* c;
+ uint32_t procId = uint32_t(-1);
+ uint32_t coreId = uint32_t(-1);
+ uint32_t physId = uint32_t(-1);
while (std::getline(input, line))
{
if (line.find("processor") != std::string::npos)
{
auto data_start = line.find(": ") + 2;
- procId = std::strtoul(&line.c_str()[data_start], &c, 10);
+ procId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
if (line.find("core id") != std::string::npos)
{
auto data_start = line.find(": ") + 2;
- coreId = std::strtoul(&line.c_str()[data_start], &c, 10);
+ coreId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
if (line.find("physical id") != std::string::npos)
{
auto data_start = line.find(": ") + 2;
- physId = std::strtoul(&line.c_str()[data_start], &c, 10);
+ physId = std::strtoul(&line.c_str()[data_start], &c, 10);
continue;
}
if (line.length() == 0)
{
if (physId + 1 > out_nodes.size())
out_nodes.resize(physId + 1);
- auto& numaNode = out_nodes[physId];
+ auto& numaNode = out_nodes[physId];
numaNode.numaId = physId;
if (coreId + 1 > numaNode.cores.size())
numaNode.cores.resize(coreId + 1);
- auto& core = numaNode.cores[coreId];
+ auto& core = numaNode.cores[coreId];
core.procGroup = coreId;
core.threadIds.push_back(procId);
}
}
out_numThreadsPerProcGroup = 0;
- for (auto &node : out_nodes)
+ for (auto& node : out_nodes)
{
- for (auto &core : node.cores)
+ for (auto& core : node.cores)
{
out_numThreadsPerProcGroup += core.threadIds.size();
}
#elif defined(__APPLE__)
- auto numProcessors = 0;
- auto numCores = 0;
+ auto numProcessors = 0;
+ auto numCores = 0;
auto numPhysicalIds = 0;
- int value;
+ int value;
size_t size = sizeof(value);
int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
for (auto physId = 0; physId < numPhysicalIds; ++physId)
{
- auto &numaNode = out_nodes[physId];
- auto procId = 0;
+ auto& numaNode = out_nodes[physId];
+ auto procId = 0;
numaNode.cores.resize(numCores);
{
for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, ++procId)
{
- auto &core = numaNode.cores[coreId];
+ auto& core = numaNode.cores[coreId];
core.procGroup = coreId;
core.threadIds.push_back(procId);
out_numThreadsPerProcGroup = 0;
- for (auto &node : out_nodes)
+ for (auto& node : out_nodes)
{
- for (auto &core : node.cores)
+ for (auto& core : node.cores)
{
out_numThreadsPerProcGroup += core.threadIds.size();
}
#endif
// Prune empty cores and numa nodes
- for (auto node_it = out_nodes.begin(); node_it != out_nodes.end(); )
+ for (auto node_it = out_nodes.begin(); node_it != out_nodes.end();)
{
// Erase empty cores (first)
- for (auto core_it = node_it->cores.begin(); core_it != node_it->cores.end(); )
+ for (auto core_it = node_it->cores.begin(); core_it != node_it->cores.end();)
{
if (core_it->threadIds.size() == 0)
{
}
}
-void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
+void bindThread(SWR_CONTEXT* pContext,
+ uint32_t threadId,
+ uint32_t procGroupId = 0,
+ bool bindProcGroup = false)
{
// Only bind threads when MAX_WORKER_THREADS isn't set.
- if (pContext->threadInfo.SINGLE_THREADED || (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false))
+ if (pContext->threadInfo.SINGLE_THREADED ||
+ (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false))
{
return;
}
#if defined(_WIN32)
GROUP_AFFINITY affinity = {};
- affinity.Group = procGroupId;
+ affinity.Group = procGroupId;
#if !defined(_WIN64)
if (threadId >= 32)
{
// If MAX_WORKER_THREADS is set, only bind to the proc group,
// Not the individual HW thread.
- if (!bindProcGroup && !pContext->threadInfo.MAX_WORKER_THREADS)
+ if (!bindProcGroup && !pContext->threadInfo.MAX_WORKER_THREADS)
{
affinity.Mask = KAFFINITY(1) << threadId;
}
}
INLINE
-uint32_t GetEnqueuedDraw(SWR_CONTEXT *pContext)
+uint32_t GetEnqueuedDraw(SWR_CONTEXT* pContext)
{
return pContext->dcRing.GetHead();
}
INLINE
-DRAW_CONTEXT *GetDC(SWR_CONTEXT *pContext, uint32_t drawId)
+DRAW_CONTEXT* GetDC(SWR_CONTEXT* pContext, uint32_t drawId)
{
- return &pContext->dcRing[(drawId-1) % pContext->MAX_DRAWS_IN_FLIGHT];
+ return &pContext->dcRing[(drawId - 1) % pContext->MAX_DRAWS_IN_FLIGHT];
}
INLINE
// returns true if dependency not met
INLINE
-bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRetiredDraw)
+bool CheckDependency(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t lastRetiredDraw)
{
return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
}
-bool CheckDependencyFE(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRetiredDraw)
+bool CheckDependencyFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t lastRetiredDraw)
{
return pDC->dependentFE && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
}
}
DRAW_DYNAMIC_STATE& dynState = pDC->dynState;
- OSALIGNLINE(SWR_STATS) stats{ 0 };
+ OSALIGNLINE(SWR_STATS) stats{0};
// Sum up stats across all workers before sending to client.
for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
{
stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
- stats.PsInvocations += dynState.pStats[i].PsInvocations;
- stats.CsInvocations += dynState.pStats[i].CsInvocations;
+ stats.PsInvocations += dynState.pStats[i].PsInvocations;
+ stats.CsInvocations += dynState.pStats[i].CsInvocations;
}
if (pDC->retireCallback.pfnCallbackFunc)
{
pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
- pDC->retireCallback.userData2,
- pDC->retireCallback.userData3);
+ pDC->retireCallback.userData2,
+ pDC->retireCallback.userData3);
}
}
_ReadWriteBarrier();
- pContext->dcRing.Dequeue(); // Remove from tail
+ pContext->dcRing.Dequeue(); // Remove from tail
}
return result;
return CompleteDrawContextInl(pContext, 0, pDC);
}
-INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE, uint32_t& drawEnqueued)
+INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext,
+ uint32_t workerId,
+ uint32_t& curDrawBE,
+ uint32_t& drawEnqueued)
{
// increment our current draw id to the first incomplete draw
drawEnqueued = GetEnqueuedDraw(pContext);
while (IDComparesLess(curDrawBE, drawEnqueued))
{
- DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT];
+ DRAW_CONTEXT* pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT];
// If its not compute and FE is not done then break out of loop.
- if (!pDC->doneFE && !pDC->isCompute) break;
+ if (!pDC->doneFE && !pDC->isCompute)
+ break;
- bool isWorkComplete = pDC->isCompute ?
- pDC->pDispatch->isWorkComplete() :
- pDC->pTileMgr->isWorkComplete();
+ bool isWorkComplete =
+ pDC->isCompute ? pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete();
if (isWorkComplete)
{
/// @brief If there is any BE work then go work on it.
/// @param pContext - pointer to SWR context.
/// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread
-/// has its own curDrawBE counter and this ensures that each worker processes all the
-/// draws in order.
+/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker
+/// thread
+/// has its own curDrawBE counter and this ensures that each worker processes all
+/// the draws in order.
/// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its
-/// own set and each time it fails to lock a macrotile, because its already locked,
-/// then it will add that tile to the lockedTiles set. As a worker begins to work
-/// on future draws the lockedTiles ensure that it doesn't work on tiles that may
-/// still have work pending in a previous draw. Additionally, the lockedTiles is
-/// hueristic that can steer a worker back to the same macrotile that it had been
-/// working on in a previous draw.
+/// own set and each time it fails to lock a macrotile, because its already
+/// locked, then it will add that tile to the lockedTiles set. As a worker
+/// begins to work on future draws the lockedTiles ensure that it doesn't work
+/// on tiles that may still have work pending in a previous draw. Additionally,
+/// the lockedTiles is hueristic that can steer a worker back to the same
+/// macrotile that it had been working on in a previous draw.
/// @returns true if worker thread should shutdown
-bool WorkOnFifoBE(
- SWR_CONTEXT *pContext,
- uint32_t workerId,
- uint32_t &curDrawBE,
- TileSet& lockedTiles,
- uint32_t numaNode,
- uint32_t numaMask)
+bool WorkOnFifoBE(SWR_CONTEXT* pContext,
+ uint32_t workerId,
+ uint32_t& curDrawBE,
+ TileSet& lockedTiles,
+ uint32_t numaNode,
+ uint32_t numaMask)
{
bool bShutdown = false;
return false;
}
- uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
+ uint32_t lastRetiredDraw =
+ pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
// Reset our history for locked tiles. We'll have to re-learn which tiles are locked.
lockedTiles.clear();
// Try to work on each draw in order of the available draws in flight.
// 1. If we're on curDrawBE, we can work on any macrotile that is available.
- // 2. If we're trying to work on draws after curDrawBE, we are restricted to
+ // 2. If we're trying to work on draws after curDrawBE, we are restricted to
// working on those macrotiles that are known to be complete in the prior draw to
// maintain order. The locked tiles provides the history to ensures this.
for (uint32_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
{
- DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
+ DRAW_CONTEXT* pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
- if (pDC->isCompute) return false; // We don't look at compute work.
+ if (pDC->isCompute)
+ return false; // We don't look at compute work.
// First wait for FE to be finished with this draw. This keeps threading model simple
// but if there are lots of bubbles between draws then serializing FE and BE may
// need to be revisited.
- if (!pDC->doneFE) return false;
-
+ if (!pDC->doneFE)
+ return false;
+
// If this draw is dependent on a previous draw then we need to bail.
if (CheckDependency(pContext, pDC, lastRetiredDraw))
{
}
// Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it.
- auto ¯oTiles = pDC->pTileMgr->getDirtyTiles();
+ auto& macroTiles = pDC->pTileMgr->getDirtyTiles();
for (auto tile : macroTiles)
{
if (tile->tryLock())
{
- BE_WORK *pWork;
+ BE_WORK* pWork;
RDTSC_BEGIN(WorkerFoundWork, pDC->drawId);
pDC->pTileMgr->markTileComplete(tileID);
- // Optimization: If the draw is complete and we're the last one to have worked on it then
- // we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete.
+ // Optimization: If the draw is complete and we're the last one to have worked on it
+ // then we can reset the locked list as we know that all previous draws before the
+ // next are guaranteed to be complete.
if ((curDrawBE == i) && (bShutdown || pDC->pTileMgr->isWorkComplete()))
{
- // We can increment the current BE and safely move to next draw since we know this draw is complete.
+ // We can increment the current BE and safely move to next draw since we know
+ // this draw is complete.
curDrawBE++;
CompleteDrawContextInl(pContext, workerId, pDC);
}
else
{
- // This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again.
+ // This tile is already locked. So let's add it to our locked tiles set. This way we
+ // don't try locking this one again.
lockedTiles.set(tileID);
}
}
SWR_STATS_FE& stats = pDC->dynState.statsFE;
AR_EVENT(FrontendStatsEvent(pDC->drawId,
- stats.IaVertices, stats.IaPrimitives, stats.VsInvocations, stats.HsInvocations,
- stats.DsInvocations, stats.GsInvocations, stats.GsPrimitives, stats.CInvocations, stats.CPrimitives,
- stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3],
- stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3]
- ));
- AR_EVENT(FrontendDrawEndEvent(pDC->drawId));
+ stats.IaVertices,
+ stats.IaPrimitives,
+ stats.VsInvocations,
+ stats.HsInvocations,
+ stats.DsInvocations,
+ stats.GsInvocations,
+ stats.GsPrimitives,
+ stats.CInvocations,
+ stats.CPrimitives,
+ stats.SoPrimStorageNeeded[0],
+ stats.SoPrimStorageNeeded[1],
+ stats.SoPrimStorageNeeded[2],
+ stats.SoPrimStorageNeeded[3],
+ stats.SoNumPrimsWritten[0],
+ stats.SoNumPrimsWritten[1],
+ stats.SoNumPrimsWritten[2],
+ stats.SoNumPrimsWritten[3]));
+ AR_EVENT(FrontendDrawEndEvent(pDC->drawId));
pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats);
}
if ((pDC->dynState.SoWriteOffsetDirty[i]) &&
(pDC->pState->state.soBuffer[i].soWriteEnable))
{
- pContext->pfnUpdateSoWriteOffset(GetPrivateState(pDC), i, pDC->dynState.SoWriteOffset[i]);
+ pContext->pfnUpdateSoWriteOffset(
+ GetPrivateState(pDC), i, pDC->dynState.SoWriteOffset[i]);
}
}
}
InterlockedDecrement(&pContext->drawsOutstandingFE);
}
-void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
+void WorkOnFifoFE(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawFE)
{
// Try to grab the next DC from the ring
uint32_t drawEnqueued = GetEnqueuedDraw(pContext);
while (IDComparesLess(curDrawFE, drawEnqueued))
{
- uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT;
- DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
+ uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT;
+ DRAW_CONTEXT* pDC = &pContext->dcRing[dcSlot];
if (pDC->isCompute || pDC->doneFE)
{
CompleteDrawContextInl(pContext, workerId, pDC);
}
uint32_t lastRetiredFE = curDrawFE - 1;
- uint32_t curDraw = curDrawFE;
+ uint32_t curDraw = curDrawFE;
while (IDComparesLess(curDraw, drawEnqueued))
{
- uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
- DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
+ uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
+ DRAW_CONTEXT* pDC = &pContext->dcRing[dcSlot];
if (!pDC->isCompute && !pDC->FeLock)
{
/// @brief If there is any compute work then go work on it.
/// @param pContext - pointer to SWR context.
/// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread
-/// has its own curDrawBE counter and this ensures that each worker processes all the
-/// draws in order.
-void WorkOnCompute(
- SWR_CONTEXT *pContext,
- uint32_t workerId,
- uint32_t& curDrawBE)
+/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker
+/// thread
+/// has its own curDrawBE counter and this ensures that each worker processes all
+/// the draws in order.
+void WorkOnCompute(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE)
{
uint32_t drawEnqueued = 0;
if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
return;
}
- uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
+ uint32_t lastRetiredDraw =
+ pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
for (uint64_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
{
- DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
- if (pDC->isCompute == false) return;
+ DRAW_CONTEXT* pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
+ if (pDC->isCompute == false)
+ return;
// check dependencies
if (CheckDependency(pContext, pDC, lastRetiredDraw))
// Is there any work remaining?
if (queue.getNumQueued() > 0)
{
- void* pSpillFillBuffer = nullptr;
- void* pScratchSpace = nullptr;
- uint32_t threadGroupId = 0;
+ void* pSpillFillBuffer = nullptr;
+ void* pScratchSpace = nullptr;
+ uint32_t threadGroupId = 0;
while (queue.getWork(threadGroupId))
{
queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
}
}
-void BindApiThread(SWR_CONTEXT *pContext, uint32_t apiThreadId)
+void BindApiThread(SWR_CONTEXT* pContext, uint32_t apiThreadId)
{
if (nullptr == pContext)
{
{
if (pContext->threadPool.numReservedThreads)
{
- const THREAD_DATA &threadData = pContext->threadPool.pApiThreadData[0];
+ const THREAD_DATA& threadData = pContext->threadPool.pApiThreadData[0];
// Just bind to the process group used for API thread 0
bindThread(pContext, 0, threadData.procGroupId, true);
}
return;
}
- const THREAD_DATA &threadData = pContext->threadPool.pApiThreadData[apiThreadId];
+ const THREAD_DATA& threadData = pContext->threadPool.pApiThreadData[apiThreadId];
- bindThread(pContext, threadData.threadId, threadData.procGroupId, threadData.forceBindProcGroup);
+ bindThread(
+ pContext, threadData.threadId, threadData.procGroupId, threadData.forceBindProcGroup);
}
-template<bool IsFEThread, bool IsBEThread>
+template <bool IsFEThread, bool IsBEThread>
DWORD workerThreadMain(LPVOID pData)
{
- THREAD_DATA *pThreadData = (THREAD_DATA*)pData;
- SWR_CONTEXT *pContext = pThreadData->pContext;
- uint32_t threadId = pThreadData->threadId;
- uint32_t workerId = pThreadData->workerId;
+ THREAD_DATA* pThreadData = (THREAD_DATA*)pData;
+ SWR_CONTEXT* pContext = pThreadData->pContext;
+ uint32_t threadId = pThreadData->threadId;
+ uint32_t workerId = pThreadData->workerId;
bindThread(pContext, threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
// linux pthread name limited to 16 chars (including \0)
"w%03d-n%d-c%03d-t%d",
#endif
- workerId, pThreadData->numaId, pThreadData->coreId, pThreadData->htId);
+ workerId,
+ pThreadData->numaId,
+ pThreadData->coreId,
+ pThreadData->htId);
SetCurrentThreadName(threadName);
}
// each worker has the ability to work on any of the queued draws as long as certain
// conditions are met. the data associated
- // with a draw is guaranteed to be active as long as a worker hasn't signaled that he
+ // with a draw is guaranteed to be active as long as a worker hasn't signaled that he
// has moved on to the next draw when he determines there is no more work to do. The api
// thread will not increment the head of the dc ring until all workers have moved past the
// current head.
if (IsBEThread)
{
RDTSC_BEGIN(WorkerWorkOnFifoBE, 0);
- bShutdown |= WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
+ bShutdown |=
+ WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
RDTSC_END(WorkerWorkOnFifoBE, 0);
WorkOnCompute(pContext, workerId, curDrawBE);
return 0;
}
-template<> DWORD workerThreadMain<false, false>(LPVOID) = delete;
+template <>
+DWORD workerThreadMain<false, false>(LPVOID) = delete;
template <bool IsFEThread, bool IsBEThread>
DWORD workerThreadInit(LPVOID pData)
}
#if defined(_WIN32)
- __except(EXCEPTION_CONTINUE_SEARCH)
+ __except (EXCEPTION_CONTINUE_SEARCH)
{
}
return 1;
}
-template<> DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
+template <>
+DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
static void InitPerThreadStats(SWR_CONTEXT* pContext, uint32_t numThreads)
{
// Initialize DRAW_CONTEXT's per-thread stats
for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
{
- pContext->dcRing[dc].dynState.pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * numThreads, 64);
+ pContext->dcRing[dc].dynState.pStats =
+ (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * numThreads, 64);
memset(pContext->dcRing[dc].dynState.pStats, 0, sizeof(SWR_STATS) * numThreads);
}
}
void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
{
CPUNumaNodes nodes;
- uint32_t numThreadsPerProcGroup = 0;
+ uint32_t numThreadsPerProcGroup = 0;
CalculateProcessorTopology(nodes, numThreadsPerProcGroup);
// Assumption, for asymmetric topologies, multi-threaded cores will appear
// in the list before single-threaded cores. This appears to be true for
// Windows when the total HW threads is limited to 64.
- uint32_t numHWNodes = (uint32_t)nodes.size();
- uint32_t numHWCoresPerNode = (uint32_t)nodes[0].cores.size();
- uint32_t numHWHyperThreads = (uint32_t)nodes[0].cores[0].threadIds.size();
+ uint32_t numHWNodes = (uint32_t)nodes.size();
+ uint32_t numHWCoresPerNode = (uint32_t)nodes[0].cores.size();
+ uint32_t numHWHyperThreads = (uint32_t)nodes[0].cores[0].threadIds.size();
#if defined(_WIN32) && !defined(_WIN64)
if (!pContext->threadInfo.MAX_WORKER_THREADS)
}
}
- uint32_t numNodes = numHWNodes;
- uint32_t numCoresPerNode = numHWCoresPerNode;
- uint32_t numHyperThreads = numHWHyperThreads;
+ uint32_t numNodes = numHWNodes;
+ uint32_t numCoresPerNode = numHWCoresPerNode;
+ uint32_t numHyperThreads = numHWHyperThreads;
// Calc used threads per-core
if (numHyperThreads > pContext->threadInfo.BASE_THREAD)
}
else
{
- SWR_ASSERT(
- false,
- "Cannot use BASE_THREAD value: %d, maxThreads: %d, reverting BASE_THREAD to 0",
- pContext->threadInfo.BASE_THREAD,
- numHyperThreads);
+ SWR_ASSERT(false,
+ "Cannot use BASE_THREAD value: %d, maxThreads: %d, reverting BASE_THREAD to 0",
+ pContext->threadInfo.BASE_THREAD,
+ numHyperThreads);
pContext->threadInfo.BASE_THREAD = 0;
}
}
else
{
- SWR_ASSERT(
- false,
- "Cannot use BASE_CORE value: %d, maxCores: %d, reverting BASE_CORE to 0",
- pContext->threadInfo.BASE_CORE,
- numCoresPerNode);
+ SWR_ASSERT(false,
+ "Cannot use BASE_CORE value: %d, maxCores: %d, reverting BASE_CORE to 0",
+ pContext->threadInfo.BASE_CORE,
+ numCoresPerNode);
pContext->threadInfo.BASE_CORE = 0;
}
SWR_REL_ASSERT(numThreads <= numHWThreads);
uint32_t& numAPIReservedThreads = pContext->apiThreadInfo.numAPIReservedThreads;
- uint32_t& numAPIThreadsPerCore = pContext->apiThreadInfo.numAPIThreadsPerCore;
- uint32_t numRemovedThreads = 0;
+ uint32_t& numAPIThreadsPerCore = pContext->apiThreadInfo.numAPIThreadsPerCore;
+ uint32_t numRemovedThreads = 0;
if (pContext->threadInfo.SINGLE_THREADED)
{
- numAPIReservedThreads = 0;
- numThreads = 1;
+ numAPIReservedThreads = 0;
+ numThreads = 1;
pContext->NumWorkerThreads = 1;
- pContext->NumFEThreads = 1;
- pContext->NumBEThreads = 1;
- pPool->numThreads = 0;
+ pContext->NumFEThreads = 1;
+ pContext->NumBEThreads = 1;
+ pPool->numThreads = 0;
}
else if (pContext->threadInfo.MAX_WORKER_THREADS)
{
numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, numHWThreads);
pContext->threadInfo.BASE_NUMA_NODE = 0;
- pContext->threadInfo.BASE_CORE = 0;
- pContext->threadInfo.BASE_THREAD = 0;
- numAPIReservedThreads = 0;
+ pContext->threadInfo.BASE_CORE = 0;
+ pContext->threadInfo.BASE_THREAD = 0;
+ numAPIReservedThreads = 0;
}
else
{
if (numAPIThreadsPerCore == 2 && numHyperThreads == 1)
{
// Adjust removed threads to make logic below work
- numRemovedThreads = std::max(1U, (numRemovedThreads + numAPIThreadsPerCore - 1) / 2);
+ numRemovedThreads =
+ std::max(1U, (numRemovedThreads + numAPIThreadsPerCore - 1) / 2);
}
numThreads -= numRemovedThreads;
if (pContext->threadInfo.SINGLE_THREADED)
{
numAPIReservedThreads = 0;
- numThreads = 1;
+ numThreads = 1;
}
if (numAPIReservedThreads)
}
pPool->numReservedThreads = numAPIReservedThreads;
- pPool->numThreads = numThreads;
+ pPool->numThreads = numThreads;
pContext->NumWorkerThreads = pPool->numThreads;
pPool->pThreadData = new (std::nothrow) THREAD_DATA[pPool->numThreads];
pPool->pWorkerPrivateDataArray = nullptr;
if (pContext->workerPrivateState.perWorkerPrivateStateSize)
{
- size_t perWorkerSize = AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
+ size_t perWorkerSize =
+ AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
size_t totalSize = perWorkerSize * pPool->numThreads;
if (totalSize)
{
if (pContext->threadInfo.MAX_WORKER_THREADS)
{
- bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
+ bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
uint32_t numProcGroups = (numThreads + numThreadsPerProcGroup - 1) / numThreadsPerProcGroup;
// When MAX_WORKER_THREADS is set we don't bother to bind to specific HW threads
// But Windows will still require binding to specific process groups
for (uint32_t workerId = 0; workerId < numThreads; ++workerId)
{
- pPool->pThreadData[workerId].workerId = workerId;
- pPool->pThreadData[workerId].procGroupId = workerId % numProcGroups;
- pPool->pThreadData[workerId].threadId = 0;
- pPool->pThreadData[workerId].numaId = 0;
- pPool->pThreadData[workerId].coreId = 0;
- pPool->pThreadData[workerId].htId = 0;
- pPool->pThreadData[workerId].pContext = pContext;
+ pPool->pThreadData[workerId].workerId = workerId;
+ pPool->pThreadData[workerId].procGroupId = workerId % numProcGroups;
+ pPool->pThreadData[workerId].threadId = 0;
+ pPool->pThreadData[workerId].numaId = 0;
+ pPool->pThreadData[workerId].coreId = 0;
+ pPool->pThreadData[workerId].htId = 0;
+ pPool->pThreadData[workerId].pContext = pContext;
pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
pContext->NumBEThreads++;
pPool->numaMask = 0;
}
- uint32_t workerId = 0;
+ uint32_t workerId = 0;
uint32_t numReservedThreads = numAPIReservedThreads;
for (uint32_t n = 0; n < numNodes; ++n)
{
{
break;
}
- auto& node = nodes[n + pContext->threadInfo.BASE_NUMA_NODE];
+ auto& node = nodes[n + pContext->threadInfo.BASE_NUMA_NODE];
uint32_t numCores = numCoresPerNode;
for (uint32_t c = 0; c < numCores; ++c)
{
--numRemovedThreads;
SWR_REL_ASSERT(numReservedThreads);
--numReservedThreads;
- pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
+ pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
pPool->pApiThreadData[numReservedThreads].procGroupId = core.procGroup;
- pPool->pApiThreadData[numReservedThreads].threadId = core.threadIds[t];
- pPool->pApiThreadData[numReservedThreads].numaId = useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
- pPool->pApiThreadData[numReservedThreads].coreId = c + pContext->threadInfo.BASE_CORE;
- pPool->pApiThreadData[numReservedThreads].htId = t + pContext->threadInfo.BASE_THREAD;
- pPool->pApiThreadData[numReservedThreads].pContext = pContext;
+ pPool->pApiThreadData[numReservedThreads].threadId = core.threadIds[t];
+ pPool->pApiThreadData[numReservedThreads].numaId =
+ useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
+ pPool->pApiThreadData[numReservedThreads].coreId =
+ c + pContext->threadInfo.BASE_CORE;
+ pPool->pApiThreadData[numReservedThreads].htId =
+ t + pContext->threadInfo.BASE_THREAD;
+ pPool->pApiThreadData[numReservedThreads].pContext = pContext;
pPool->pApiThreadData[numReservedThreads].forceBindProcGroup = false;
-
if (numAPIThreadsPerCore > numHyperThreads && numReservedThreads)
{
--numReservedThreads;
- pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
+ pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
pPool->pApiThreadData[numReservedThreads].procGroupId = core.procGroup;
- pPool->pApiThreadData[numReservedThreads].threadId = core.threadIds[t + 1];
- pPool->pApiThreadData[numReservedThreads].numaId = useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
- pPool->pApiThreadData[numReservedThreads].coreId = c + pContext->threadInfo.BASE_CORE;
- pPool->pApiThreadData[numReservedThreads].htId = t + pContext->threadInfo.BASE_THREAD;
- pPool->pApiThreadData[numReservedThreads].pContext = pContext;
+ pPool->pApiThreadData[numReservedThreads].threadId =
+ core.threadIds[t + 1];
+ pPool->pApiThreadData[numReservedThreads].numaId =
+ useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
+ pPool->pApiThreadData[numReservedThreads].coreId =
+ c + pContext->threadInfo.BASE_CORE;
+ pPool->pApiThreadData[numReservedThreads].htId =
+ t + pContext->threadInfo.BASE_THREAD;
+ pPool->pApiThreadData[numReservedThreads].pContext = pContext;
pPool->pApiThreadData[numReservedThreads].forceBindProcGroup = false;
}
SWR_ASSERT(workerId < numThreads);
- pPool->pThreadData[workerId].workerId = workerId;
+ pPool->pThreadData[workerId].workerId = workerId;
pPool->pThreadData[workerId].procGroupId = core.procGroup;
- pPool->pThreadData[workerId].threadId = core.threadIds[t + pContext->threadInfo.BASE_THREAD];
- pPool->pThreadData[workerId].numaId = useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
- pPool->pThreadData[workerId].coreId = c + pContext->threadInfo.BASE_CORE;
- pPool->pThreadData[workerId].htId = t + pContext->threadInfo.BASE_THREAD;
+ pPool->pThreadData[workerId].threadId =
+ core.threadIds[t + pContext->threadInfo.BASE_THREAD];
+ pPool->pThreadData[workerId].numaId =
+ useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
+ pPool->pThreadData[workerId].coreId = c + pContext->threadInfo.BASE_CORE;
+ pPool->pThreadData[workerId].htId = t + pContext->threadInfo.BASE_THREAD;
pPool->pThreadData[workerId].pContext = pContext;
pPool->pThreadData[workerId].forceBindProcGroup = false;
for (uint32_t workerId = 0; workerId < pContext->NumWorkerThreads; ++workerId)
{
- pPool->pThreads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+ pPool->pThreads[workerId] =
+ new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
}
}
/// @brief Destroys thread pool.
/// @param pContext - pointer to context
/// @param pPool - pointer to thread pool object.
-void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
+void DestroyThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
{
// Wait for all threads to finish
SwrWaitForIdle(pContext);
// Detach from thread. Cannot join() due to possibility (in Windows) of code
// in some DLLMain(THREAD_DETATCH case) blocking the thread until after this returns.
pPool->pThreads[t]->detach();
- delete(pPool->pThreads[t]);
+ delete (pPool->pThreads[t]);
}
if (pContext->workerPrivateState.pfnFinishWorkerData)
{
- pContext->workerPrivateState.pfnFinishWorkerData(pPool->pThreadData[t].pWorkerPrivateData, t);
+ pContext->workerPrivateState.pfnFinishWorkerData(
+ pPool->pThreadData[t].pWorkerPrivateData, t);
}
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file threads.h
-*
-* @brief Definitions for SWR threading model.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file threads.h
+ *
+ * @brief Definitions for SWR threading model.
+ *
+ ******************************************************************************/
#pragma once
#include "knobs.h"
struct THREAD_DATA
{
- void* pWorkerPrivateData;// Pointer to per-worker private data
- uint32_t procGroupId; // Will always be 0 for non-Windows OS
- uint32_t threadId; // within the procGroup for Windows
- uint32_t numaId; // NUMA node id
- uint32_t coreId; // Core id
- uint32_t htId; // Hyperthread id
- uint32_t workerId;
- SWR_CONTEXT *pContext;
- bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
+ void* pWorkerPrivateData; // Pointer to per-worker private data
+ uint32_t procGroupId; // Will always be 0 for non-Windows OS
+ uint32_t threadId; // within the procGroup for Windows
+ uint32_t numaId; // NUMA node id
+ uint32_t coreId; // Core id
+ uint32_t htId; // Hyperthread id
+ uint32_t workerId;
+ SWR_CONTEXT* pContext;
+ bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
};
-
struct THREAD_POOL
{
- THREAD_PTR* pThreads;
- uint32_t numThreads;
- uint32_t numaMask;
- THREAD_DATA *pThreadData;
- void* pWorkerPrivateDataArray; // All memory for worker private data
- uint32_t numReservedThreads; // Number of threads reserved for API use
- THREAD_DATA *pApiThreadData;
+ THREAD_PTR* pThreads;
+ uint32_t numThreads;
+ uint32_t numaMask;
+ THREAD_DATA* pThreadData;
+ void* pWorkerPrivateDataArray; // All memory for worker private data
+ uint32_t numReservedThreads; // Number of threads reserved for API use
+ THREAD_DATA* pApiThreadData;
};
struct TileSet;
-void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
+void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
-void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
+void DestroyThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
// Expose FE and BE worker functions to the API thread if single threaded
-void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE);
-bool WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE, TileSet &usedTiles, uint32_t numaNode, uint32_t numaMask);
-void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE);
+void WorkOnFifoFE(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawFE);
+bool WorkOnFifoBE(SWR_CONTEXT* pContext,
+ uint32_t workerId,
+ uint32_t& curDrawBE,
+ TileSet& usedTiles,
+ uint32_t numaNode,
+ uint32_t numaMask);
+void WorkOnCompute(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE);
int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC);
-void BindApiThread(SWR_CONTEXT *pContext, uint32_t apiThreadId);
+void BindApiThread(SWR_CONTEXT* pContext, uint32_t apiThreadId);
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tilemgr.cpp
-*
-* @brief Implementation for Macro Tile Manager which provides the facilities
-* for threads to work on an macro tile.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tilemgr.cpp
+ *
+ * @brief Implementation for Macro Tile Manager which provides the facilities
+ * for threads to work on an macro tile.
+ *
+ ******************************************************************************/
#include <unordered_map>
#include "fifo.hpp"
#include "core/multisample.h"
#include "rdtsc_core.h"
-MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
-{
-}
+MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena) {}
-void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
+void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK* pWork)
{
// Should not enqueue more then what we have backing for in the hot tile manager.
SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
- if ((x & ~(KNOB_NUM_HOT_TILES_X-1)) | (y & ~(KNOB_NUM_HOT_TILES_Y-1)))
+ if ((x & ~(KNOB_NUM_HOT_TILES_X - 1)) | (y & ~(KNOB_NUM_HOT_TILES_Y - 1)))
{
return;
}
mTiles.resize((16 + id) * 2);
}
- MacroTileQueue *pTile = mTiles[id];
+ MacroTileQueue* pTile = mTiles[id];
if (!pTile)
{
pTile = mTiles[id] = new MacroTileQueue();
void MacroTileMgr::markTileComplete(uint32_t id)
{
SWR_ASSERT(mTiles.size() > id);
- MacroTileQueue &tile = *mTiles[id];
- uint32_t numTiles = tile.mWorkItemsFE;
+ MacroTileQueue& tile = *mTiles[id];
+ uint32_t numTiles = tile.mWorkItemsFE;
InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
_ReadWriteBarrier();
tile.mWorkItemsBE = 0;
}
-HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerPrivateData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
- uint32_t renderTargetArrayIndex)
+HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ HANDLE hWorkerPrivateData,
+ uint32_t macroID,
+ SWR_RENDERTARGET_ATTACHMENT attachment,
+ bool create,
+ uint32_t numSamples,
+ uint32_t renderTargetArrayIndex)
{
uint32_t x, y;
MacroTileMgr::getTileIndices(macroID, x, y);
SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
- HotTileSet &tile = mHotTiles[x][y];
- HOTTILE& hotTile = tile.Attachment[attachment];
+ HotTileSet& tile = mHotTiles[x][y];
+ HOTTILE& hotTile = tile.Attachment[attachment];
if (hotTile.pBuffer == NULL)
{
if (create)
{
- uint32_t size = numSamples * mHotTileSize[attachment];
+ uint32_t size = numSamples * mHotTileSize[attachment];
uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
- hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
- hotTile.state = HOTTILE_INVALID;
- hotTile.numSamples = numSamples;
+ hotTile.pBuffer =
+ (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
+ hotTile.state = HOTTILE_INVALID;
+ hotTile.numSamples = numSamples;
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
}
else
// free the old tile and create a new one with enough space to hold all samples
if (numSamples > hotTile.numSamples)
{
- // tile should be either uninitialized or resolved if we're deleting and switching to a
+ // tile should be either uninitialized or resolved if we're deleting and switching to a
// new sample count
- SWR_ASSERT((hotTile.state == HOTTILE_INVALID) ||
- (hotTile.state == HOTTILE_RESOLVED) ||
- (hotTile.state == HOTTILE_CLEAR));
+ SWR_ASSERT((hotTile.state == HOTTILE_INVALID) || (hotTile.state == HOTTILE_RESOLVED) ||
+ (hotTile.state == HOTTILE_CLEAR));
FreeHotTileMem(hotTile.pBuffer);
- uint32_t size = numSamples * mHotTileSize[attachment];
+ uint32_t size = numSamples * mHotTileSize[attachment];
uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
- hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
- hotTile.state = HOTTILE_INVALID;
+ hotTile.pBuffer =
+ (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
+ hotTile.state = HOTTILE_INVALID;
hotTile.numSamples = numSamples;
}
- // if requested render target array index isn't currently loaded, need to store out the current hottile
- // and load the requested array slice
+ // if requested render target array index isn't currently loaded, need to store out the
+ // current hottile and load the requested array slice
if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
{
SWR_FORMAT format;
case SWR_ATTACHMENT_COLOR4:
case SWR_ATTACHMENT_COLOR5:
case SWR_ATTACHMENT_COLOR6:
- case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
- case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
- case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
- default: SWR_INVALID("Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
+ case SWR_ATTACHMENT_COLOR7:
+ format = KNOB_COLOR_HOT_TILE_FORMAT;
+ break;
+ case SWR_ATTACHMENT_DEPTH:
+ format = KNOB_DEPTH_HOT_TILE_FORMAT;
+ break;
+ case SWR_ATTACHMENT_STENCIL:
+ format = KNOB_STENCIL_HOT_TILE_FORMAT;
+ break;
+ default:
+ SWR_INVALID("Unknown attachment: %d", attachment);
+ format = KNOB_COLOR_HOT_TILE_FORMAT;
+ break;
}
if (hotTile.state == HOTTILE_CLEAR)
if (hotTile.state == HOTTILE_DIRTY)
{
- pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
- x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
+ pContext->pfnStoreTile(GetPrivateState(pDC),
+ hWorkerPrivateData,
+ format,
+ attachment,
+ x * KNOB_MACROTILE_X_DIM,
+ y * KNOB_MACROTILE_Y_DIM,
+ hotTile.renderTargetArrayIndex,
+ hotTile.pBuffer);
}
- pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
- x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
+ pContext->pfnLoadTile(GetPrivateState(pDC),
+ hWorkerPrivateData,
+ format,
+ attachment,
+ x * KNOB_MACROTILE_X_DIM,
+ y * KNOB_MACROTILE_Y_DIM,
+ renderTargetArrayIndex,
+ hotTile.pBuffer);
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
- hotTile.state = HOTTILE_DIRTY;
+ hotTile.state = HOTTILE_DIRTY;
}
}
return &tile.Attachment[attachment];
}
-HOTTILE* HotTileMgr::GetHotTileNoLoad(
- SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID,
- SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples)
+HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t macroID,
+ SWR_RENDERTARGET_ATTACHMENT attachment,
+ bool create,
+ uint32_t numSamples)
{
uint32_t x, y;
MacroTileMgr::getTileIndices(macroID, x, y);
SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
- HotTileSet &tile = mHotTiles[x][y];
- HOTTILE& hotTile = tile.Attachment[attachment];
+ HotTileSet& tile = mHotTiles[x][y];
+ HOTTILE& hotTile = tile.Attachment[attachment];
if (hotTile.pBuffer == NULL)
{
if (create)
{
- uint32_t size = numSamples * mHotTileSize[attachment];
- hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, 64);
- hotTile.state = HOTTILE_INVALID;
- hotTile.numSamples = numSamples;
+ uint32_t size = numSamples * mHotTileSize[attachment];
+ hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, 64);
+ hotTile.state = HOTTILE_INVALID;
+ hotTile.numSamples = numSamples;
hotTile.renderTargetArrayIndex = 0;
}
else
}
#if USE_8x2_TILE_BACKEND
-void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearColorHotTile(
+ const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
{
// Load clear color into SIMD register...
- float *pClearData = (float *)(pHotTile->clearData);
- simd16scalar valR = _simd16_broadcast_ss(&pClearData[0]);
- simd16scalar valG = _simd16_broadcast_ss(&pClearData[1]);
- simd16scalar valB = _simd16_broadcast_ss(&pClearData[2]);
- simd16scalar valA = _simd16_broadcast_ss(&pClearData[3]);
+ float* pClearData = (float*)(pHotTile->clearData);
+ simd16scalar valR = _simd16_broadcast_ss(&pClearData[0]);
+ simd16scalar valG = _simd16_broadcast_ss(&pClearData[1]);
+ simd16scalar valB = _simd16_broadcast_ss(&pClearData[2]);
+ simd16scalar valA = _simd16_broadcast_ss(&pClearData[3]);
- float *pfBuf = (float *)pHotTile->pBuffer;
+ float* pfBuf = (float*)pHotTile->pBuffer;
uint32_t numSamples = pHotTile->numSamples;
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
{
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
{
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+ si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
{
_simd16_store_ps(pfBuf, valR);
pfBuf += KNOB_SIMD16_WIDTH;
}
}
-void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearDepthHotTile(
+ const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
{
// Load clear color into SIMD register...
- float *pClearData = (float *)(pHotTile->clearData);
- simd16scalar valZ = _simd16_broadcast_ss(&pClearData[0]);
+ float* pClearData = (float*)(pHotTile->clearData);
+ simd16scalar valZ = _simd16_broadcast_ss(&pClearData[0]);
- float *pfBuf = (float *)pHotTile->pBuffer;
+ float* pfBuf = (float*)pHotTile->pBuffer;
uint32_t numSamples = pHotTile->numSamples;
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
{
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
{
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+ si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
{
_simd16_store_ps(pfBuf, valZ);
pfBuf += KNOB_SIMD16_WIDTH;
{
// convert from F32 to U8.
uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
- //broadcast 32x into __m256i...
+ // broadcast 32x into __m256i...
simd16scalari valS = _simd16_set1_epi8(clearVal);
- simd16scalari *pBuf = (simd16scalari *)pHotTile->pBuffer;
- uint32_t numSamples = pHotTile->numSamples;
+ simd16scalari* pBuf = (simd16scalari*)pHotTile->pBuffer;
+ uint32_t numSamples = pHotTile->numSamples;
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
{
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
{
// We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+ si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
{
_simd16_store_si(pBuf, valS);
pBuf += 1;
}
#else
-void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearColorHotTile(
+ const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
{
// Load clear color into SIMD register...
- float *pClearData = (float*)(pHotTile->clearData);
- simdscalar valR = _simd_broadcast_ss(&pClearData[0]);
- simdscalar valG = _simd_broadcast_ss(&pClearData[1]);
- simdscalar valB = _simd_broadcast_ss(&pClearData[2]);
- simdscalar valA = _simd_broadcast_ss(&pClearData[3]);
+ float* pClearData = (float*)(pHotTile->clearData);
+ simdscalar valR = _simd_broadcast_ss(&pClearData[0]);
+ simdscalar valG = _simd_broadcast_ss(&pClearData[1]);
+ simdscalar valB = _simd_broadcast_ss(&pClearData[2]);
+ simdscalar valA = _simd_broadcast_ss(&pClearData[3]);
- float *pfBuf = (float*)pHotTile->pBuffer;
+ float* pfBuf = (float*)pHotTile->pBuffer;
uint32_t numSamples = pHotTile->numSamples;
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
{
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
{
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) //SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+ si +=
+ SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) // SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
{
_simd_store_ps(pfBuf, valR);
pfBuf += KNOB_SIMD_WIDTH;
}
}
-void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearDepthHotTile(
+ const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
{
// Load clear color into SIMD register...
- float *pClearData = (float*)(pHotTile->clearData);
- simdscalar valZ = _simd_broadcast_ss(&pClearData[0]);
+ float* pClearData = (float*)(pHotTile->clearData);
+ simdscalar valZ = _simd_broadcast_ss(&pClearData[0]);
- float *pfBuf = (float*)pHotTile->pBuffer;
+ float* pfBuf = (float*)pHotTile->pBuffer;
uint32_t numSamples = pHotTile->numSamples;
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
{
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
{
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+ si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
{
_simd_store_ps(pfBuf, valZ);
pfBuf += KNOB_SIMD_WIDTH;
{
// convert from F32 to U8.
uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
- //broadcast 32x into __m256i...
+ // broadcast 32x into __m256i...
simdscalari valS = _simd_set1_epi8(clearVal);
- simdscalari* pBuf = (simdscalari*)pHotTile->pBuffer;
- uint32_t numSamples = pHotTile->numSamples;
+ simdscalari* pBuf = (simdscalari*)pHotTile->pBuffer;
+ uint32_t numSamples = pHotTile->numSamples;
for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
{
for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
{
// We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
- for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
+ for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+ si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
{
_simd_store_si(pBuf, valS);
pBuf += 1;
/// to avoid unnecessary setup every triangle
/// @todo support deferred clear
/// @param pCreateInfo - pointer to creation info.
-void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
+void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroID)
{
- const API_STATE& state = GetApiState(pDC);
+ const API_STATE& state = GetApiState(pDC);
HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
uint32_t x, y;
uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
// check RT if enabled
- unsigned long rtSlot = 0;
- uint32_t colorHottileEnableMask = state.colorHottileEnable;
+ unsigned long rtSlot = 0;
+ uint32_t colorHottileEnableMask = state.colorHottileEnable;
while (_BitScanForward(&rtSlot, colorHottileEnableMask))
{
- HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
+ HOTTILE* pHotTile =
+ GetHotTile(pContext,
+ pDC,
+ hWorkerPrivateData,
+ macroID,
+ (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
+ true,
+ numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+ pContext->pfnLoadTile(GetPrivateState(pDC),
+ hWorkerPrivateData,
+ KNOB_COLOR_HOT_TILE_FORMAT,
+ (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
+ x,
+ y,
+ pHotTile->renderTargetArrayIndex,
+ pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
// check depth if enabled
if (state.depthHottileEnable)
{
- HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
+ HOTTILE* pHotTile = GetHotTile(
+ pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+ pContext->pfnLoadTile(GetPrivateState(pDC),
+ hWorkerPrivateData,
+ KNOB_DEPTH_HOT_TILE_FORMAT,
+ SWR_ATTACHMENT_DEPTH,
+ x,
+ y,
+ pHotTile->renderTargetArrayIndex,
+ pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
// check stencil if enabled
if (state.stencilHottileEnable)
{
- HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
+ HOTTILE* pHotTile = GetHotTile(
+ pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
if (pHotTile->state == HOTTILE_INVALID)
{
RDTSC_BEGIN(BELoadTiles, pDC->drawId);
// invalid hottile before draw requires a load from surface before we can draw to it
- pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+ pContext->pfnLoadTile(GetPrivateState(pDC),
+ hWorkerPrivateData,
+ KNOB_STENCIL_HOT_TILE_FORMAT,
+ SWR_ATTACHMENT_STENCIL,
+ x,
+ y,
+ pHotTile->renderTargetArrayIndex,
+ pHotTile->pBuffer);
pHotTile->state = HOTTILE_DIRTY;
RDTSC_END(BELoadTiles, 0);
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tilemgr.h
-*
-* @brief Definitions for Macro Tile Manager which provides the facilities
-* for threads to work on an macro tile.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tilemgr.h
+ *
+ * @brief Definitions for Macro Tile Manager which provides the facilities
+ * for threads to work on an macro tile.
+ *
+ ******************************************************************************/
#pragma once
#include <set>
//////////////////////////////////////////////////////////////////////////
struct MacroTileQueue
{
- MacroTileQueue() { }
+ MacroTileQueue() {}
~MacroTileQueue() { destroy(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Returns number of work items queued for this tile.
- uint32_t getNumQueued()
- {
- return mFifo.getNumQueued();
- }
+ uint32_t getNumQueued() { return mFifo.getNumQueued(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Attempt to lock the work fifo. If already locked then return false.
- bool tryLock()
- {
- return mFifo.tryLock();
- }
+ bool tryLock() { return mFifo.tryLock(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Clear fifo and unlock it.
//////////////////////////////////////////////////////////////////////////
/// @brief Peek at work sitting at the front of the fifo.
- BE_WORK* peek()
- {
- return mFifo.peek();
- }
+ BE_WORK* peek() { return mFifo.peek(); }
template <typename ArenaT>
bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
//////////////////////////////////////////////////////////////////////////
/// @brief Move to next work item
- void dequeue()
- {
- mFifo.dequeue_noinc();
- }
+ void dequeue() { mFifo.dequeue_noinc(); }
//////////////////////////////////////////////////////////////////////////
/// @brief Destroy fifo
- void destroy()
- {
- mFifo.destroy();
- }
+ void destroy() { mFifo.destroy(); }
///@todo This will all be private.
uint32_t mWorkItemsFE = 0;
uint32_t mWorkItemsBE = 0;
- uint32_t mId = 0;
+ uint32_t mId = 0;
private:
QUEUE<BE_WORK> mFifo;
MacroTileMgr(CachingArena& arena);
~MacroTileMgr()
{
- for (auto *pTile : mTiles)
+ for (auto* pTile : mTiles)
{
delete pTile;
}
}
INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
- void markTileComplete(uint32_t id);
+ void markTileComplete(uint32_t id);
- INLINE bool isWorkComplete()
- {
- return mWorkItemsProduced == mWorkItemsConsumed;
- }
+ INLINE bool isWorkComplete() { return mWorkItemsProduced == mWorkItemsConsumed; }
- void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
+ void enqueue(uint32_t x, uint32_t y, BE_WORK* pWork);
- static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
+ static INLINE void getTileIndices(uint32_t tileID, uint32_t& x, uint32_t& y)
{
// Morton / Z order of tiles
x = pext_u32(tileID, 0x55555555);
}
private:
- CachingArena& mArena;
+ CachingArena& mArena;
std::vector<MacroTileQueue*> mTiles;
// Any tile that has work queued to it is a dirty tile.
std::vector<MacroTileQueue*> mDirtyTiles;
- OSALIGNLINE(long) mWorkItemsProduced { 0 };
- OSALIGNLINE(volatile long) mWorkItemsConsumed { 0 };
+ OSALIGNLINE(long) mWorkItemsProduced{0};
+ OSALIGNLINE(volatile long) mWorkItemsConsumed{0};
};
-typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
+typedef void (*PFN_DISPATCH)(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t threadGroupId,
+ void*& pSpillFillBuffer,
+ void*& pScratchSpace);
//////////////////////////////////////////////////////////////////////////
/// DispatchQueue - work queue for dispatch
{
// The available and outstanding counts start with total tasks.
// At the start there are N tasks available and outstanding.
- // When both the available and outstanding counts have reached 0 then all work has completed.
- // When a worker starts on a threadgroup then it decrements the available count.
+ // When both the available and outstanding counts have reached 0 then all work has
+ // completed. When a worker starts on a threadgroup then it decrements the available count.
// When a worker completes a threadgroup then it decrements the outstanding count.
- mTasksAvailable = totalTasks;
+ mTasksAvailable = totalTasks;
mTasksOutstanding = totalTasks;
- mpTaskData = pTaskData;
+ mpTaskData = pTaskData;
mPfnDispatch = pfnDispatch;
}
//////////////////////////////////////////////////////////////////////////
/// @brief Returns number of tasks available for this dispatch.
- uint32_t getNumQueued()
- {
- return (mTasksAvailable > 0) ? mTasksAvailable : 0;
- }
+ uint32_t getNumQueued() { return (mTasksAvailable > 0) ? mTasksAvailable : 0; }
//////////////////////////////////////////////////////////////////////////
/// @brief Atomically decrement the work available count. If the result
//////////////////////////////////////////////////////////////////////////
/// @brief Work is complete once both the available/outstanding counts have reached 0.
- bool isWorkComplete()
- {
- return ((mTasksAvailable <= 0) &&
- (mTasksOutstanding <= 0));
- }
+ bool isWorkComplete() { return ((mTasksAvailable <= 0) && (mTasksOutstanding <= 0)); }
//////////////////////////////////////////////////////////////////////////
/// @brief Return pointer to task data.
- const void* GetTasksData()
- {
- return mpTaskData;
- }
+ const void* GetTasksData() { return mpTaskData; }
//////////////////////////////////////////////////////////////////////////
/// @brief Dispatches a unit of work
- void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
+ void dispatch(DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t threadGroupId,
+ void*& pSpillFillBuffer,
+ void*& pScratchSpace)
{
SWR_ASSERT(mPfnDispatch != nullptr);
mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
}
- void* mpTaskData{ nullptr }; // The API thread will set this up and the callback task function will interpet this.
- PFN_DISPATCH mPfnDispatch{ nullptr }; // Function to call per dispatch
+ void* mpTaskData{nullptr}; // The API thread will set this up and the callback task function
+ // will interpet this.
+ PFN_DISPATCH mPfnDispatch{nullptr}; // Function to call per dispatch
- OSALIGNLINE(volatile long) mTasksAvailable{ 0 };
- OSALIGNLINE(volatile long) mTasksOutstanding{ 0 };
+ OSALIGNLINE(volatile long) mTasksAvailable{0};
+ OSALIGNLINE(volatile long) mTasksOutstanding{0};
};
-
enum HOTTILE_STATE
{
- HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents before rendering
- HOTTILE_CLEAR, // tile should be cleared
- HOTTILE_DIRTY, // tile has been rendered to
- HOTTILE_RESOLVED, // tile has been stored to memory
+ HOTTILE_INVALID, // tile is in unitialized state and should be loaded with surface contents
+ // before rendering
+ HOTTILE_CLEAR, // tile should be cleared
+ HOTTILE_DIRTY, // tile has been rendered to
+ HOTTILE_RESOLVED, // tile has been stored to memory
};
struct HOTTILE
{
- uint8_t *pBuffer;
+ uint8_t* pBuffer;
HOTTILE_STATE state;
- DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
+ DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for
+ // alignment?
uint32_t numSamples;
- uint32_t renderTargetArrayIndex; // current render target array index loaded
+ uint32_t renderTargetArrayIndex; // current render target array index loaded
};
union HotTileSet
// cache hottile size
for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
{
- mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
+ mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
+ FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
}
- mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
- mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
+ mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
+ FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
+ mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
+ FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
}
~HotTileMgr()
}
}
- void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
-
- HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
- uint32_t renderTargetArrayIndex = 0);
-
- HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
+ void InitializeHotTiles(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t workerId,
+ uint32_t macroID);
+
+ HOTTILE* GetHotTile(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ HANDLE hWorkerData,
+ uint32_t macroID,
+ SWR_RENDERTARGET_ATTACHMENT attachment,
+ bool create,
+ uint32_t numSamples = 1,
+ uint32_t renderTargetArrayIndex = 0);
+
+ HOTTILE* GetHotTileNoLoad(SWR_CONTEXT* pContext,
+ DRAW_CONTEXT* pDC,
+ uint32_t macroID,
+ SWR_RENDERTARGET_ATTACHMENT attachment,
+ bool create,
+ uint32_t numSamples = 1);
static void ClearColorHotTile(const HOTTILE* pHotTile);
static void ClearDepthHotTile(const HOTTILE* pHotTile);
private:
HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
- uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
+ uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
{
void* p = nullptr;
#if defined(_WIN32)
HANDLE hProcess = GetCurrentProcess();
- p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
+ p = VirtualAllocExNuma(
+ hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
#else
p = AlignedMalloc(size, align);
#endif
}
}
};
-
/****************************************************************************
-* Copyright (C) 2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tileset.h
-*
-* @brief Custom bitset class for managing locked tiles
-*
-******************************************************************************/
+ * Copyright (C) 2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tileset.h
+ *
+ * @brief Custom bitset class for managing locked tiles
+ *
+ ******************************************************************************/
#pragma once
struct TileSet
private:
static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
- static const size_t BITS_OFFSET = BITS_PER_WORD - 1;
+ static const size_t BITS_OFFSET = BITS_PER_WORD - 1;
- size_t m_size = 0;
- size_t m_maxSet = 0;
- size_t* m_bits = nullptr;
+ size_t m_size = 0;
+ size_t m_maxSet = 0;
+ size_t* m_bits = nullptr;
- INLINE size_t& _get_word(size_t idx)
- {
- return m_bits[idx / BITS_PER_WORD];
- }
+ INLINE size_t& _get_word(size_t idx) { return m_bits[idx / BITS_PER_WORD]; }
void _grow(size_t idx)
{
return;
}
- size_t new_size = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
- size_t num_words = new_size / BITS_PER_WORD;
- size_t* newBits = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
- size_t copy_words = 0;
+ size_t new_size = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
+ size_t num_words = new_size / BITS_PER_WORD;
+ size_t* newBits = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
+ size_t copy_words = 0;
if (m_bits)
{
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file utils.h
-*
-* @brief Utilities used by SWR core.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file utils.h
+ *
+ * @brief Utilities used by SWR core.
+ *
+ ******************************************************************************/
#pragma once
#include <string.h>
};
#endif
-template<typename SIMD_T>
+template <typename SIMD_T>
struct SIMDBBOX_T
{
- typename SIMD_T::Integer ymin;
- typename SIMD_T::Integer ymax;
- typename SIMD_T::Integer xmin;
- typename SIMD_T::Integer xmax;
+ typename SIMD_T::Integer ymin;
+ typename SIMD_T::Integer ymax;
+ typename SIMD_T::Integer xmin;
+ typename SIMD_T::Integer xmax;
};
// helper function to unroll loops
-template<int Begin, int End, int Step = 1>
-struct UnrollerL {
- template<typename Lambda>
- INLINE static void step(Lambda& func) {
+template <int Begin, int End, int Step = 1>
+struct UnrollerL
+{
+ template <typename Lambda>
+ INLINE static void step(Lambda& func)
+ {
func(Begin);
UnrollerL<Begin + Step, End, Step>::step(func);
}
};
-template<int End, int Step>
-struct UnrollerL<End, End, Step> {
- template<typename Lambda>
- static void step(Lambda& func) {
+template <int End, int Step>
+struct UnrollerL<End, End, Step>
+{
+ template <typename Lambda>
+ static void step(Lambda& func)
+ {
}
};
// helper function to unroll loops, with mask to skip specific iterations
-template<int Begin, int End, int Step = 1, int Mask = 0x7f>
-struct UnrollerLMask {
- template<typename Lambda>
- INLINE static void step(Lambda& func) {
- if(Mask & (1 << Begin))
+template <int Begin, int End, int Step = 1, int Mask = 0x7f>
+struct UnrollerLMask
+{
+ template <typename Lambda>
+ INLINE static void step(Lambda& func)
+ {
+ if (Mask & (1 << Begin))
{
func(Begin);
}
}
};
-template<int End, int Step, int Mask>
-struct UnrollerLMask<End, End, Step, Mask> {
- template<typename Lambda>
- static void step(Lambda& func) {
+template <int End, int Step, int Mask>
+struct UnrollerLMask<End, End, Step, Mask>
+{
+ template <typename Lambda>
+ static void step(Lambda& func)
+ {
}
};
// general CRC compute
INLINE
-uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)
+uint32_t ComputeCRC(uint32_t crc, const void* pData, uint32_t size)
{
#if defined(_WIN64) || defined(__x86_64__)
- uint32_t sizeInQwords = size / sizeof(uint64_t);
- uint32_t sizeRemainderBytes = size % sizeof(uint64_t);
- uint64_t* pDataWords = (uint64_t*)pData;
+ uint32_t sizeInQwords = size / sizeof(uint64_t);
+ uint32_t sizeRemainderBytes = size % sizeof(uint64_t);
+ uint64_t* pDataWords = (uint64_t*)pData;
for (uint32_t i = 0; i < sizeInQwords; ++i)
{
crc = (uint32_t)_mm_crc32_u64(crc, *pDataWords++);
}
#else
- uint32_t sizeInDwords = size / sizeof(uint32_t);
- uint32_t sizeRemainderBytes = size % sizeof(uint32_t);
- uint32_t* pDataWords = (uint32_t*)pData;
+ uint32_t sizeInDwords = size / sizeof(uint32_t);
+ uint32_t sizeRemainderBytes = size % sizeof(uint32_t);
+ uint32_t* pDataWords = (uint32_t*)pData;
for (uint32_t i = 0; i < sizeInDwords; ++i)
{
crc = _mm_crc32_u32(crc, *pDataWords++);
/// Check specified bit within a data word
//////////////////////////////////////////////////////////////////////////
template <typename T>
-INLINE
-static bool CheckBit(T word, uint32_t bit)
+INLINE static bool CheckBit(T word, uint32_t bit)
{
return 0 != (word & (T(1) << bit));
}
/// Add byte offset to any-type pointer
//////////////////////////////////////////////////////////////////////////
template <typename T>
-INLINE
-static T* PtrAdd(T* p, intptr_t offset)
+INLINE static T* PtrAdd(T* p, intptr_t offset)
{
intptr_t intp = reinterpret_cast<intptr_t>(p);
return reinterpret_cast<T*>(intp + offset);
/// Is a power-of-2?
//////////////////////////////////////////////////////////////////////////
template <typename T>
-INLINE
-static bool IsPow2(T value)
+INLINE static bool IsPow2(T value)
{
return value == (value & (T(0) - value));
}
/// Note: IsPow2(alignment) MUST be true
//////////////////////////////////////////////////////////////////////////
template <typename T1, typename T2>
-INLINE
-static T1 AlignDownPow2(T1 value, T2 alignment)
+INLINE static T1 AlignDownPow2(T1 value, T2 alignment)
{
SWR_ASSERT(IsPow2(alignment));
return value & ~T1(alignment - 1);
/// Note: IsPow2(alignment) MUST be true
//////////////////////////////////////////////////////////////////////////
template <typename T1, typename T2>
-INLINE
-static T1 AlignUpPow2(T1 value, T2 alignment)
+INLINE static T1 AlignUpPow2(T1 value, T2 alignment)
{
return AlignDownPow2(value + T1(alignment - 1), alignment);
}
/// Note: IsPow2(alignment) MUST be true
//////////////////////////////////////////////////////////////////////////
template <typename T1, typename T2>
-INLINE
-static T1* AlignUpPow2(T1* value, T2 alignment)
+INLINE static T1* AlignUpPow2(T1* value, T2 alignment)
{
return reinterpret_cast<T1*>(
AlignDownPow2(reinterpret_cast<uintptr_t>(value) + uintptr_t(alignment - 1), alignment));
/// Align down to specified alignment
//////////////////////////////////////////////////////////////////////////
template <typename T1, typename T2>
-INLINE
-static T1 AlignDown(T1 value, T2 alignment)
+INLINE static T1 AlignDown(T1 value, T2 alignment)
{
- if (IsPow2(alignment)) { return AlignDownPow2(value, alignment); }
+ if (IsPow2(alignment))
+ {
+ return AlignDownPow2(value, alignment);
+ }
return value - T1(value % alignment);
}
/// Align down to specified alignment
//////////////////////////////////////////////////////////////////////////
template <typename T1, typename T2>
-INLINE
-static T1* AlignDown(T1* value, T2 alignment)
+INLINE static T1* AlignDown(T1* value, T2 alignment)
{
return (T1*)AlignDown(uintptr_t(value), alignment);
}
/// Note: IsPow2(alignment) MUST be true
//////////////////////////////////////////////////////////////////////////
template <typename T1, typename T2>
-INLINE
-static T1 AlignUp(T1 value, T2 alignment)
+INLINE static T1 AlignUp(T1 value, T2 alignment)
{
return AlignDown(value + T1(alignment - 1), alignment);
}
/// Note: IsPow2(alignment) MUST be true
//////////////////////////////////////////////////////////////////////////
template <typename T1, typename T2>
-INLINE
-static T1* AlignUp(T1* value, T2 alignment)
+INLINE static T1* AlignUp(T1* value, T2 alignment)
{
return AlignDown(PtrAdd(value, alignment - 1), alignment);
}
//////////////////////////////////////////////////////////////////////////
-/// Helper structure used to access an array of elements that don't
+/// Helper structure used to access an array of elements that don't
/// correspond to a typical word size.
//////////////////////////////////////////////////////////////////////////
-template<typename T, size_t BitsPerElementT, size_t ArrayLenT>
+template <typename T, size_t BitsPerElementT, size_t ArrayLenT>
class BitsArray
{
private:
- static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
+ static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
static const size_t ELEMENTS_PER_WORD = BITS_PER_WORD / BitsPerElementT;
- static const size_t NUM_WORDS = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
- static const size_t ELEMENT_MASK = (size_t(1) << BitsPerElementT) - 1;
+ static const size_t NUM_WORDS = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
+ static const size_t ELEMENT_MASK = (size_t(1) << BitsPerElementT) - 1;
static_assert(ELEMENTS_PER_WORD * BitsPerElementT == BITS_PER_WORD,
- "Element size must an integral fraction of pointer size");
+ "Element size must an integral fraction of pointer size");
- size_t m_words[NUM_WORDS] = {};
+ size_t m_words[NUM_WORDS] = {};
public:
-
- T operator[] (size_t elementIndex) const
+ T operator[](size_t elementIndex) const
{
size_t word = m_words[elementIndex / ELEMENTS_PER_WORD];
word >>= ((elementIndex % ELEMENTS_PER_WORD) * BitsPerElementT);
}
if (TMax > TMin)
{
- return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(RangedArg<T, TMin, (T)(int(TMax)-1)>{iArg.val});
+ return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
+ RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val});
}
- SWR_ASSUME(false); return nullptr;
+ SWR_ASSUME(false);
+ return nullptr;
}
template <typename T, T TVal>
static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg)
{
if (iArg.val == TMax)
{
- return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TMax>>::GetFunc(remainingArgs...);
+ return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TMax>>::GetFunc(
+ remainingArgs...);
}
if (TMax > TMin)
{
- return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val}, remainingArgs...);
+ return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
+ RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val}, remainingArgs...);
}
- SWR_ASSUME(false); return nullptr;
+ SWR_ASSUME(false);
+ return nullptr;
}
template <typename T, T TVal, typename... TArgsT>
static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg, TArgsT... remainingArgs)
{
SWR_ASSERT(iArg.val == TVal);
- return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TVal>>::GetFunc(remainingArgs...);
+ return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TVal>>::GetFunc(
+ remainingArgs...);
}
};
std::string output;
#if defined(_WIN32)
DWORD valueSize = GetEnvironmentVariableA(variableName.c_str(), nullptr, 0);
- if (!valueSize) return output;
+ if (!valueSize)
+ return output;
output.resize(valueSize - 1); // valueSize includes null, output.resize() does not
GetEnvironmentVariableA(variableName.c_str(), &output[0], valueSize);
#else
- char *env = getenv(variableName.c_str());
- output = env ? env : "";
+ char* env = getenv(variableName.c_str());
+ output = env ? env : "";
#endif
return output;
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file JitManager.cpp
-*
-* @brief Implementation if the Jit Manager.
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file JitManager.cpp
+ *
+ * @brief Implementation if the Jit Manager.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "JitManager.h"
//////////////////////////////////////////////////////////////////////////
/// @brief Contructor for JitManager.
/// @param simdWidth - SIMD width to be used in generated program.
-JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
- : mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth), mArch(arch)
+JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
+ mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth),
+ mArch(arch)
{
InitializeNativeTarget();
InitializeNativeTargetAsmPrinter();
InitializeNativeTargetDisassembler();
- TargetOptions tOpts;
+ TargetOptions tOpts;
tOpts.AllowFPOpFusion = FPOpFusion::Fast;
- tOpts.NoInfsFPMath = false;
- tOpts.NoNaNsFPMath = false;
+ tOpts.NoInfsFPMath = false;
+ tOpts.NoNaNsFPMath = false;
tOpts.UnsafeFPMath = false;
- //tOpts.PrintMachineCode = true;
+ // tOpts.PrintMachineCode = true;
std::unique_ptr<Module> newModule(new Module("", mContext));
mpCurrentModule = newModule.get();
StringRef hostCPUName;
// force JIT to use the same CPU arch as the rest of swr
- if(mArch.AVX512F())
+ if (mArch.AVX512F())
{
#if USE_SIMD16_SHADERS
- if(mArch.AVX512ER())
+ if (mArch.AVX512ER())
{
hostCPUName = StringRef("knl");
}
mVWidth = 8;
}
}
- else if(mArch.AVX2())
+ else if (mArch.AVX2())
{
hostCPUName = StringRef("core-avx2");
if (mVWidth == 0)
mVWidth = 8;
}
}
- else if(mArch.AVX())
+ else if (mArch.AVX())
{
if (mArch.F16C())
{
mpCurrentModule->setTargetTriple(sys::getProcessTriple());
mpExec = EngineBuilder(std::move(newModule))
- .setTargetOptions(tOpts)
- .setOptLevel(optLevel)
- .setMCPU(hostCPUName)
- .create();
+ .setTargetOptions(tOpts)
+ .setOptLevel(optLevel)
+ .setMCPU(hostCPUName)
+ .create();
if (KNOB_JIT_ENABLE_CACHE)
{
#else
// typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
#endif
- std::vector<Type*> fsArgs;
+ std::vector<Type *> fsArgs;
// llvm5 is picky and does not take a void * type
fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
}
-DIType* JitManager::CreateDebugStructType(StructType* pType, const std::string& name, DIFile* pFile, uint32_t lineNum,
- const std::vector<std::pair<std::string, uint32_t>>& members)
+DIType *
+JitManager::CreateDebugStructType(StructType * pType,
+ const std::string & name,
+ DIFile * pFile,
+ uint32_t lineNum,
+ const std::vector<std::pair<std::string, uint32_t>> &members)
{
- DIBuilder builder(*mpCurrentModule);
- SmallVector<Metadata*, 8> ElemTypes;
- DataLayout DL = DataLayout(mpCurrentModule);
- uint32_t size = DL.getTypeAllocSizeInBits(pType);
- uint32_t alignment = DL.getABITypeAlignment(pType);
- DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
-
- DICompositeType* pDIStructTy = builder.createStructType(pFile, name, pFile, lineNum, size, alignment,
- flags, nullptr, builder.getOrCreateArray(ElemTypes));
+ DIBuilder builder(*mpCurrentModule);
+ SmallVector<Metadata *, 8> ElemTypes;
+ DataLayout DL = DataLayout(mpCurrentModule);
+ uint32_t size = DL.getTypeAllocSizeInBits(pType);
+ uint32_t alignment = DL.getABITypeAlignment(pType);
+ DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
+
+ DICompositeType *pDIStructTy = builder.createStructType(pFile,
+ name,
+ pFile,
+ lineNum,
+ size,
+ alignment,
+ flags,
+ nullptr,
+ builder.getOrCreateArray(ElemTypes));
// Register mapping now to break loops (in case struct contains itself or pointers to itself)
mDebugStructMap[pType] = pDIStructTy;
uint32_t idx = 0;
- for (auto& elem : pType->elements())
+ for (auto &elem : pType->elements())
{
- std::string name = members[idx].first;
- uint32_t lineNum = members[idx].second;
- size = DL.getTypeAllocSizeInBits(elem);
- alignment = DL.getABITypeAlignment(elem);
- uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
- llvm::DIType* pDebugTy = GetDebugType(elem);
- ElemTypes.push_back(builder.createMemberType(pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
+ std::string name = members[idx].first;
+ uint32_t lineNum = members[idx].second;
+ size = DL.getTypeAllocSizeInBits(elem);
+ alignment = DL.getABITypeAlignment(elem);
+ uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
+ llvm::DIType *pDebugTy = GetDebugType(elem);
+ ElemTypes.push_back(builder.createMemberType(
+ pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
idx++;
}
return pDIStructTy;
}
-DIType* JitManager::GetDebugArrayType(Type* pTy)
+DIType *JitManager::GetDebugArrayType(Type *pTy)
{
- DIBuilder builder(*mpCurrentModule);
- DataLayout DL = DataLayout(mpCurrentModule);
- ArrayType* pArrayTy = cast<ArrayType>(pTy);
- uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy);
- uint32_t alignment = DL.getABITypeAlignment(pArrayTy);
+ DIBuilder builder(*mpCurrentModule);
+ DataLayout DL = DataLayout(mpCurrentModule);
+ ArrayType *pArrayTy = cast<ArrayType>(pTy);
+ uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy);
+ uint32_t alignment = DL.getABITypeAlignment(pArrayTy);
- SmallVector<Metadata*, 8> Elems;
+ SmallVector<Metadata *, 8> Elems;
Elems.push_back(builder.getOrCreateSubrange(0, pArrayTy->getNumElements()));
- return builder.createArrayType(size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
+ return builder.createArrayType(
+ size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
}
// Create a DIType from llvm Type
-DIType* JitManager::GetDebugType(Type* pTy)
+DIType *JitManager::GetDebugType(Type *pTy)
{
- DIBuilder builder(*mpCurrentModule);
+ DIBuilder builder(*mpCurrentModule);
Type::TypeID id = pTy->getTypeID();
switch (id)
{
- case Type::VoidTyID: return builder.createUnspecifiedType("void"); break;
- case Type::HalfTyID: return builder.createBasicType("float16", 16, dwarf::DW_ATE_float); break;
- case Type::FloatTyID: return builder.createBasicType("float", 32, dwarf::DW_ATE_float); break;
- case Type::DoubleTyID: return builder.createBasicType("double", 64, dwarf::DW_ATE_float); break;
- case Type::IntegerTyID: return GetDebugIntegerType(pTy); break;
- case Type::StructTyID: return GetDebugStructType(pTy); break;
- case Type::ArrayTyID: return GetDebugArrayType(pTy); break;
- case Type::PointerTyID: return builder.createPointerType(GetDebugType(pTy->getPointerElementType()), 64, 64); break;
- case Type::VectorTyID: return GetDebugVectorType(pTy); break;
- case Type::FunctionTyID: return GetDebugFunctionType(pTy); break;
- default: SWR_ASSERT(false, "Unimplemented llvm type");
+ case Type::VoidTyID:
+ return builder.createUnspecifiedType("void");
+ break;
+ case Type::HalfTyID:
+ return builder.createBasicType("float16", 16, dwarf::DW_ATE_float);
+ break;
+ case Type::FloatTyID:
+ return builder.createBasicType("float", 32, dwarf::DW_ATE_float);
+ break;
+ case Type::DoubleTyID:
+ return builder.createBasicType("double", 64, dwarf::DW_ATE_float);
+ break;
+ case Type::IntegerTyID:
+ return GetDebugIntegerType(pTy);
+ break;
+ case Type::StructTyID:
+ return GetDebugStructType(pTy);
+ break;
+ case Type::ArrayTyID:
+ return GetDebugArrayType(pTy);
+ break;
+ case Type::PointerTyID:
+ return builder.createPointerType(GetDebugType(pTy->getPointerElementType()), 64, 64);
+ break;
+ case Type::VectorTyID:
+ return GetDebugVectorType(pTy);
+ break;
+ case Type::FunctionTyID:
+ return GetDebugFunctionType(pTy);
+ break;
+ default:
+ SWR_ASSERT(false, "Unimplemented llvm type");
}
return nullptr;
}
// Create a DISubroutineType from an llvm FunctionType
-DIType* JitManager::GetDebugFunctionType(Type* pTy)
+DIType *JitManager::GetDebugFunctionType(Type *pTy)
{
- SmallVector<Metadata*, 8> ElemTypes;
- FunctionType* pFuncTy = cast<FunctionType>(pTy);
- DIBuilder builder(*mpCurrentModule);
+ SmallVector<Metadata *, 8> ElemTypes;
+ FunctionType * pFuncTy = cast<FunctionType>(pTy);
+ DIBuilder builder(*mpCurrentModule);
// Add result type
ElemTypes.push_back(GetDebugType(pFuncTy->getReturnType()));
// Add arguments
- for (auto& param : pFuncTy->params())
+ for (auto ¶m : pFuncTy->params())
{
ElemTypes.push_back(GetDebugType(param));
}
return builder.createSubroutineType(builder.getOrCreateTypeArray(ElemTypes));
}
-DIType* JitManager::GetDebugIntegerType(Type* pTy)
+DIType *JitManager::GetDebugIntegerType(Type *pTy)
{
- DIBuilder builder(*mpCurrentModule);
- IntegerType* pIntTy = cast<IntegerType>(pTy);
+ DIBuilder builder(*mpCurrentModule);
+ IntegerType *pIntTy = cast<IntegerType>(pTy);
switch (pIntTy->getBitWidth())
{
- case 1: return builder.createBasicType("int1", 1, dwarf::DW_ATE_unsigned); break;
- case 8: return builder.createBasicType("int8", 8, dwarf::DW_ATE_signed); break;
- case 16: return builder.createBasicType("int16", 16, dwarf::DW_ATE_signed); break;
- case 32: return builder.createBasicType("int", 32, dwarf::DW_ATE_signed); break;
- case 64: return builder.createBasicType("int64", 64, dwarf::DW_ATE_signed); break;
- case 128: return builder.createBasicType("int128", 128, dwarf::DW_ATE_signed); break;
- default: SWR_ASSERT(false, "Unimplemented integer bit width");
+ case 1:
+ return builder.createBasicType("int1", 1, dwarf::DW_ATE_unsigned);
+ break;
+ case 8:
+ return builder.createBasicType("int8", 8, dwarf::DW_ATE_signed);
+ break;
+ case 16:
+ return builder.createBasicType("int16", 16, dwarf::DW_ATE_signed);
+ break;
+ case 32:
+ return builder.createBasicType("int", 32, dwarf::DW_ATE_signed);
+ break;
+ case 64:
+ return builder.createBasicType("int64", 64, dwarf::DW_ATE_signed);
+ break;
+ case 128:
+ return builder.createBasicType("int128", 128, dwarf::DW_ATE_signed);
+ break;
+ default:
+ SWR_ASSERT(false, "Unimplemented integer bit width");
}
return nullptr;
}
-DIType* JitManager::GetDebugVectorType(Type* pTy)
+DIType *JitManager::GetDebugVectorType(Type *pTy)
{
- DIBuilder builder(*mpCurrentModule);
- VectorType* pVecTy = cast<VectorType>(pTy);
- DataLayout DL = DataLayout(mpCurrentModule);
- uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
- uint32_t alignment = DL.getABITypeAlignment(pVecTy);
- SmallVector<Metadata*, 1> Elems;
+ DIBuilder builder(*mpCurrentModule);
+ VectorType * pVecTy = cast<VectorType>(pTy);
+ DataLayout DL = DataLayout(mpCurrentModule);
+ uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
+ uint32_t alignment = DL.getABITypeAlignment(pVecTy);
+ SmallVector<Metadata *, 1> Elems;
Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getVectorNumElements()));
- return builder.createVectorType(size, alignment, GetDebugType(pVecTy->getVectorElementType()), builder.getOrCreateArray(Elems));
-
+ return builder.createVectorType(size,
+ alignment,
+ GetDebugType(pVecTy->getVectorElementType()),
+ builder.getOrCreateArray(Elems));
}
//////////////////////////////////////////////////////////////////////////
/// @brief Dump function x86 assembly to file.
/// @note This should only be called after the module has been jitted to x86 and the
/// module will not be further accessed.
-void JitManager::DumpAsm(Function* pFunction, const char* fileName)
+void JitManager::DumpAsm(Function *pFunction, const char *fileName)
{
if (KNOB_DUMP_SHADER_IR)
{
-
#if defined(_WIN32)
DWORD pid = GetCurrentProcessId();
- char procname[MAX_PATH];
+ char procname[MAX_PATH];
GetModuleFileNameA(NULL, procname, MAX_PATH);
- const char* pBaseName = strrchr(procname, '\\');
+ const char * pBaseName = strrchr(procname, '\\');
std::stringstream outDir;
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
CreateDirectoryPath(outDir.str().c_str());
#endif
std::error_code EC;
- Module* pModule = pFunction->getParent();
- const char *funcName = pFunction->getName().data();
- char fName[256];
+ Module * pModule = pFunction->getParent();
+ const char * funcName = pFunction->getName().data();
+ char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName);
#else
raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None);
- legacy::PassManager* pMPasses = new legacy::PassManager();
- auto* pTarget = mpExec->getTargetMachine();
+ legacy::PassManager *pMPasses = new legacy::PassManager();
+ auto * pTarget = mpExec->getTargetMachine();
pTarget->Options.MCOptions.AsmVerbose = true;
pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile);
pMPasses->run(*pModule);
{
#if defined(_WIN32)
DWORD pid = GetCurrentProcessId();
- char procname[MAX_PATH];
+ char procname[MAX_PATH];
GetModuleFileNameA(NULL, procname, MAX_PATH);
- const char* pBaseName = strrchr(procname, '\\');
+ const char * pBaseName = strrchr(procname, '\\');
std::stringstream outDir;
outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid;
CreateDirectoryPath(outDir.str().c_str());
std::string outDir = GetOutputDir();
std::error_code EC;
- const char *funcName = M->getName().data();
- char fName[256];
+ const char * funcName = M->getName().data();
+ char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
#else
std::string outDir = GetOutputDir();
std::error_code EC;
- const char *funcName = f->getName().data();
- char fName[256];
+ const char * funcName = f->getName().data();
+ char fName[256];
#if defined(_WIN32)
sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
#else
fd.flush();
raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
- WriteGraph(fd_cfg, (const Function*)f);
+ WriteGraph(fd_cfg, (const Function *)f);
fd_cfg.flush();
}
}
-extern "C"
-{
- bool g_DllActive = true;
+extern "C" {
+bool g_DllActive = true;
- //////////////////////////////////////////////////////////////////////////
- /// @brief Create JIT context.
- /// @param simdWidth - SIMD width to be used in generated program.
- HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core)
- {
- return new JitManager(targetSimdWidth, arch, core);
- }
+//////////////////////////////////////////////////////////////////////////
+/// @brief Create JIT context.
+/// @param simdWidth - SIMD width to be used in generated program.
+HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char *arch, const char *core)
+{
+ return new JitManager(targetSimdWidth, arch, core);
+}
- //////////////////////////////////////////////////////////////////////////
- /// @brief Destroy JIT context.
- void JITCALL JitDestroyContext(HANDLE hJitContext)
+//////////////////////////////////////////////////////////////////////////
+/// @brief Destroy JIT context.
+void JITCALL JitDestroyContext(HANDLE hJitContext)
+{
+ if (g_DllActive)
{
- if (g_DllActive)
- {
- delete reinterpret_cast<JitManager*>(hJitContext);
- }
+ delete reinterpret_cast<JitManager *>(hJitContext);
}
}
+}
//////////////////////////////////////////////////////////////////////////
/// JitCache
//////////////////////////////////////////////////////////////////////////
struct JitCacheFileHeader
{
- void Init(
- uint32_t llCRC,
- uint32_t objCRC,
- const std::string& moduleID,
- const std::string& cpu,
- uint32_t optLevel,
- uint64_t objSize)
+ void Init(uint32_t llCRC,
+ uint32_t objCRC,
+ const std::string &moduleID,
+ const std::string &cpu,
+ uint32_t optLevel,
+ uint64_t objSize)
{
m_objSize = objSize;
- m_llCRC = llCRC;
- m_objCRC = objCRC;
+ m_llCRC = llCRC;
+ m_objCRC = objCRC;
strncpy(m_ModuleID, moduleID.c_str(), JC_STR_MAX_LEN - 1);
m_ModuleID[JC_STR_MAX_LEN - 1] = 0;
strncpy(m_Cpu, cpu.c_str(), JC_STR_MAX_LEN - 1);
m_Cpu[JC_STR_MAX_LEN - 1] = 0;
- m_optLevel = optLevel;
+ m_optLevel = optLevel;
}
- bool IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu, uint32_t optLevel)
+ bool
+ IsValid(uint32_t llCRC, const std::string &moduleID, const std::string &cpu, uint32_t optLevel)
{
- if ((m_MagicNumber != JC_MAGIC_NUMBER) ||
- (m_llCRC != llCRC) ||
- (m_platformKey != JC_PLATFORM_KEY) ||
- (m_optLevel != optLevel))
+ if ((m_MagicNumber != JC_MAGIC_NUMBER) || (m_llCRC != llCRC) ||
+ (m_platformKey != JC_PLATFORM_KEY) || (m_optLevel != optLevel))
{
return false;
}
uint64_t GetObjectCRC() const { return m_objCRC; }
private:
- static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543211ULL + 4;
- static const size_t JC_STR_MAX_LEN = 32;
- static const uint32_t JC_PLATFORM_KEY =
- (LLVM_VERSION_MAJOR << 24) |
- (LLVM_VERSION_MINOR << 16) |
- (LLVM_VERSION_PATCH << 8) |
- ((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0);
-
- uint64_t m_MagicNumber = JC_MAGIC_NUMBER;
- uint64_t m_objSize = 0;
- uint32_t m_llCRC = 0;
- uint32_t m_platformKey = JC_PLATFORM_KEY;
- uint32_t m_objCRC = 0;
- uint32_t m_optLevel = 0;
- char m_ModuleID[JC_STR_MAX_LEN] = {};
- char m_Cpu[JC_STR_MAX_LEN] = {};
+ static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543211ULL + 4;
+ static const size_t JC_STR_MAX_LEN = 32;
+ static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) |
+ (LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) |
+ ((sizeof(void *) > sizeof(uint32_t)) ? 1 : 0);
+
+ uint64_t m_MagicNumber = JC_MAGIC_NUMBER;
+ uint64_t m_objSize = 0;
+ uint32_t m_llCRC = 0;
+ uint32_t m_platformKey = JC_PLATFORM_KEY;
+ uint32_t m_objCRC = 0;
+ uint32_t m_optLevel = 0;
+ char m_ModuleID[JC_STR_MAX_LEN] = {};
+ char m_Cpu[JC_STR_MAX_LEN] = {};
};
-static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
+static inline uint32_t ComputeModuleCRC(const llvm::Module *M)
{
- std::string bitcodeBuffer;
+ std::string bitcodeBuffer;
raw_string_ostream bitcodeStream(bitcodeBuffer);
#if LLVM_VERSION_MAJOR >= 7
#else
llvm::WriteBitcodeToFile(M, bitcodeStream);
#endif
- //M->print(bitcodeStream, nullptr, false);
+ // M->print(bitcodeStream, nullptr, false);
bitcodeStream.flush();
JitCache::JitCache()
{
#if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
- if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0) {
+ if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0)
+ {
char *homedir;
- if (!(homedir = getenv("HOME"))) {
+ if (!(homedir = getenv("HOME")))
+ {
homedir = getpwuid(getuid())->pw_dir;
}
mCacheDir = homedir;
mCacheDir += (KNOB_JIT_CACHE_DIR.c_str() + 1);
- } else
+ }
+ else
#endif
{
mCacheDir = KNOB_JIT_CACHE_DIR;
}
}
-int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
+int ExecUnhookedProcess(const std::string &CmdLine, std::string *pStdOut, std::string *pStdErr)
{
return ExecCmd(CmdLine, "", pStdOut, pStdErr);
}
/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj)
{
- const std::string& moduleID = M->getModuleIdentifier();
+ const std::string &moduleID = M->getModuleIdentifier();
if (!moduleID.length())
{
return;
objPath += JIT_OBJ_EXT;
{
- std::error_code err;
+ std::error_code err;
llvm::raw_fd_ostream fileObj(objPath.c_str(), err, llvm::sys::fs::F_None);
fileObj << Obj.getBuffer();
fileObj.flush();
{
- std::error_code err;
+ std::error_code err;
llvm::raw_fd_ostream fileObj(filePath.c_str(), err, llvm::sys::fs::F_None);
uint32_t objcrc = ComputeCRC(0, Obj.getBufferStart(), Obj.getBufferSize());
header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, mOptLevel, Obj.getBufferSize());
- fileObj.write((const char*)&header, sizeof(header));
+ fileObj.write((const char *)&header, sizeof(header));
fileObj.flush();
}
}
/// Returns a pointer to a newly allocated MemoryBuffer that contains the
/// object which corresponds with Module M, or 0 if an object is not
/// available.
-std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
+std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
{
- const std::string& moduleID = M->getModuleIdentifier();
- mCurrentModuleCRC = ComputeModuleCRC(M);
+ const std::string &moduleID = M->getModuleIdentifier();
+ mCurrentModuleCRC = ComputeModuleCRC(M);
if (!moduleID.length())
{
llvm::SmallString<MAX_PATH> objFilePath = filePath;
objFilePath += JIT_OBJ_EXT;
- FILE* fpObjIn = nullptr;
- FILE* fpIn = fopen(filePath.c_str(), "rb");
+ FILE *fpObjIn = nullptr;
+ FILE *fpIn = fopen(filePath.c_str(), "rb");
if (!fpIn)
{
return nullptr;
#else
pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize()));
#endif
- if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
+ if (!fread(const_cast<char *>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
{
pBuf = nullptr;
break;
break;
}
- }
- while (0);
+ } while (0);
fclose(fpIn);
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file JitManager.h
-*
-* @brief JitManager contains the LLVM data structures used for JIT generation
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file JitManager.h
+ *
+ * @brief JitManager contains the LLVM data structures used for JIT generation
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "jit_pch.hpp"
/// JitInstructionSet
/// @brief Subclass of InstructionSet that allows users to override
/// the reporting of support for certain ISA features. This allows capping
-/// the jitted code to a certain feature level, e.g. jit AVX level code on
+/// the jitted code to a certain feature level, e.g. jit AVX level code on
/// a platform that supports AVX2.
//////////////////////////////////////////////////////////////////////////
class JitInstructionSet : public InstructionSet
{
std::transform(isaRequest.begin(), isaRequest.end(), isaRequest.begin(), ::tolower);
- if(isaRequest == "avx")
+ if (isaRequest == "avx")
{
- bForceAVX = true;
- bForceAVX2 = false;
+ bForceAVX = true;
+ bForceAVX2 = false;
bForceAVX512 = false;
}
- else if(isaRequest == "avx2")
+ else if (isaRequest == "avx2")
{
- bForceAVX = false;
- bForceAVX2 = true;
+ bForceAVX = false;
+ bForceAVX2 = true;
bForceAVX512 = false;
}
- else if(isaRequest == "avx512")
+ else if (isaRequest == "avx512")
{
- bForceAVX = false;
- bForceAVX2 = false;
+ bForceAVX = false;
+ bForceAVX2 = false;
bForceAVX512 = true;
}
};
bool BMI2(void) { return bForceAVX ? 0 : InstructionSet::BMI2(); }
private:
- bool bForceAVX = false;
- bool bForceAVX2 = false;
- bool bForceAVX512 = false;
+ bool bForceAVX = false;
+ bool bForceAVX2 = false;
+ bool bForceAVX512 = false;
std::string isaRequest;
};
-
-
struct JitLLVMContext : llvm::LLVMContext
{
};
-
//////////////////////////////////////////////////////////////////////////
/// JitCache
//////////////////////////////////////////////////////////////////////////
JitCache();
virtual ~JitCache() {}
- void Init(
- JitManager* pJitMgr,
- const llvm::StringRef& cpu,
- llvm::CodeGenOpt::Level level)
+ void Init(JitManager* pJitMgr, const llvm::StringRef& cpu, llvm::CodeGenOpt::Level level)
{
- mCpu = cpu.str();
- mpJitMgr = pJitMgr;
+ mCpu = cpu.str();
+ mpJitMgr = pJitMgr;
mOptLevel = level;
}
/// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
- void notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj) override;
+ void notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj) override;
/// Returns a pointer to a newly allocated MemoryBuffer that contains the
/// object which corresponds with Module M, or 0 if an object is not
std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M) override;
private:
- std::string mCpu;
+ std::string mCpu;
llvm::SmallString<MAX_PATH> mCacheDir;
- uint32_t mCurrentModuleCRC = 0;
- JitManager* mpJitMgr = nullptr;
- llvm::CodeGenOpt::Level mOptLevel = llvm::CodeGenOpt::None;
+ uint32_t mCurrentModuleCRC = 0;
+ JitManager* mpJitMgr = nullptr;
+ llvm::CodeGenOpt::Level mOptLevel = llvm::CodeGenOpt::None;
};
//////////////////////////////////////////////////////////////////////////
JitManager(uint32_t w, const char* arch, const char* core);
~JitManager(){};
- JitLLVMContext mContext; ///< LLVM compiler
- llvm::IRBuilder<> mBuilder; ///< LLVM IR Builder
- llvm::ExecutionEngine* mpExec;
- JitCache mCache;
+ JitLLVMContext mContext; ///< LLVM compiler
+ llvm::IRBuilder<> mBuilder; ///< LLVM IR Builder
+ llvm::ExecutionEngine* mpExec;
+ JitCache mCache;
// Need to be rebuilt after a JIT and before building new IR
- llvm::Module* mpCurrentModule;
- bool mIsModuleFinalized;
- uint32_t mJitNumber;
+ llvm::Module* mpCurrentModule;
+ bool mIsModuleFinalized;
+ uint32_t mJitNumber;
- uint32_t mVWidth;
+ uint32_t mVWidth;
- bool mUsingAVX512 = false;
+ bool mUsingAVX512 = false;
// fetch shader types
- llvm::FunctionType* mFetchShaderTy;
+ llvm::FunctionType* mFetchShaderTy;
- JitInstructionSet mArch;
+ JitInstructionSet mArch;
// Debugging support
std::unordered_map<llvm::StructType*, llvm::DIType*> mDebugStructMap;
void SetupNewModule();
- void DumpAsm(llvm::Function* pFunction, const char* fileName);
- static void DumpToFile(llvm::Function *f, const char *fileName);
- static void DumpToFile(llvm::Module *M, const char *fileName);
+ void DumpAsm(llvm::Function* pFunction, const char* fileName);
+ static void DumpToFile(llvm::Function* f, const char* fileName);
+ static void DumpToFile(llvm::Module* M, const char* fileName);
static std::string GetOutputDir();
// Debugging support methods
return mDebugStructMap[pStructTy];
}
- llvm::DIType* CreateDebugStructType(llvm::StructType* pType, const std::string& name, llvm::DIFile* pFile, uint32_t lineNum,
- const std::vector<std::pair<std::string, uint32_t>>& members);
+ llvm::DIType*
+ CreateDebugStructType(llvm::StructType* pType,
+ const std::string& name,
+ llvm::DIFile* pFile,
+ uint32_t lineNum,
+ const std::vector<std::pair<std::string, uint32_t>>& members);
};
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file blend_jit.cpp
-*
-* @brief Implementation of the blend jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file blend_jit.cpp
+ *
+ * @brief Implementation of the blend jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "builder.h"
#include "jit_api.h"
{
BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
- template<bool Color, bool Alpha>
- void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4])
+ template <bool Color, bool Alpha>
+ void GenerateBlendFactor(SWR_BLEND_FACTOR factor,
+ Value* constColor[4],
+ Value* src[4],
+ Value* src1[4],
+ Value* dst[4],
+ Value* result[4])
{
Value* out[4];
break;
case BLENDFACTOR_SRC_ALPHA_SATURATE:
out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
- out[3] = VIMMED1(1.0f);
+ out[3] = VIMMED1(1.0f);
break;
case BLENDFACTOR_CONST_COLOR:
out[0] = constColor[0];
void Clamp(SWR_FORMAT format, Value* src[4])
{
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- SWR_TYPE type = info.type[0];
+ SWR_TYPE type = info.type[0];
switch (type)
{
src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
break;
- case SWR_TYPE_UNKNOWN: SWR_INVALID("Unsupport format type: %d", type);
+ case SWR_TYPE_UNKNOWN:
+ SWR_INVALID("Unsupport format type: %d", type);
}
}
{
const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- bool valid[] = { false, false, false, false };
+ bool valid[] = {false, false, false, false};
for (uint32_t c = 0; c < info.numComps; ++c)
{
valid[info.swizzle[c]] = true;
{
if (info.type[c] == SWR_TYPE_UNUSED)
{
- src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
+ src[info.swizzle[c]] =
+ BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
}
}
}
if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED)
{
uint32_t swizComp = info.swizzle[c];
- float factor = (float)((1 << info.bpc[c]) - 1);
+ float factor = (float)((1 << info.bpc[c]) - 1);
switch (info.type[c])
{
case SWR_TYPE_UNORM:
src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
- src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor));
+ src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f / factor));
break;
- default: SWR_INVALID("Unsupported format type: %d", info.type[c]);
+ default:
+ SWR_INVALID("Unsupported format type: %d", info.type[c]);
}
}
}
}
- template<bool Color, bool Alpha>
- void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4])
+ template <bool Color, bool Alpha>
+ void BlendFunc(SWR_BLEND_OP blendOp,
+ Value* src[4],
+ Value* srcFactor[4],
+ Value* dst[4],
+ Value* dstFactor[4],
+ Value* result[4])
{
Value* out[4];
Value* srcBlend[4];
void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
{
// Op: (s == PS output, d = RT contents)
- switch(logicOp)
+ switch (logicOp)
{
case LOGICOP_CLEAR:
result[0] = VIMMED1(0);
}
}
- void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
+ void
+ AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
{
// load uint32_t reference
- Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference }));
-
+ Value* pRef = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_alphaTestReference}));
+
// load alpha
- Value* pAlpha = LOAD(ppAlpha, { 0, 0 });
+ Value* pAlpha = LOAD(ppAlpha, {0, 0});
Value* pTest = nullptr;
if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
{
// convert float alpha to unorm8
Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
- pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
+ pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
// compare
switch (state.alphaTestFunction)
{
- case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
- case ZFUNC_NEVER: pTest = VIMMED1(false); break;
- case ZFUNC_LT: pTest = ICMP_ULT(pAlphaU8, pRef); break;
- case ZFUNC_EQ: pTest = ICMP_EQ(pAlphaU8, pRef); break;
- case ZFUNC_LE: pTest = ICMP_ULE(pAlphaU8, pRef); break;
- case ZFUNC_GT: pTest = ICMP_UGT(pAlphaU8, pRef); break;
- case ZFUNC_NE: pTest = ICMP_NE(pAlphaU8, pRef); break;
- case ZFUNC_GE: pTest = ICMP_UGE(pAlphaU8, pRef); break;
+ case ZFUNC_ALWAYS:
+ pTest = VIMMED1(true);
+ break;
+ case ZFUNC_NEVER:
+ pTest = VIMMED1(false);
+ break;
+ case ZFUNC_LT:
+ pTest = ICMP_ULT(pAlphaU8, pRef);
+ break;
+ case ZFUNC_EQ:
+ pTest = ICMP_EQ(pAlphaU8, pRef);
+ break;
+ case ZFUNC_LE:
+ pTest = ICMP_ULE(pAlphaU8, pRef);
+ break;
+ case ZFUNC_GT:
+ pTest = ICMP_UGT(pAlphaU8, pRef);
+ break;
+ case ZFUNC_NE:
+ pTest = ICMP_NE(pAlphaU8, pRef);
+ break;
+ case ZFUNC_GE:
+ pTest = ICMP_UGE(pAlphaU8, pRef);
+ break;
default:
SWR_INVALID("Invalid alpha test function");
break;
// compare
switch (state.alphaTestFunction)
{
- case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
- case ZFUNC_NEVER: pTest = VIMMED1(false); break;
- case ZFUNC_LT: pTest = FCMP_OLT(pAlpha, pRef); break;
- case ZFUNC_EQ: pTest = FCMP_OEQ(pAlpha, pRef); break;
- case ZFUNC_LE: pTest = FCMP_OLE(pAlpha, pRef); break;
- case ZFUNC_GT: pTest = FCMP_OGT(pAlpha, pRef); break;
- case ZFUNC_NE: pTest = FCMP_ONE(pAlpha, pRef); break;
- case ZFUNC_GE: pTest = FCMP_OGE(pAlpha, pRef); break;
+ case ZFUNC_ALWAYS:
+ pTest = VIMMED1(true);
+ break;
+ case ZFUNC_NEVER:
+ pTest = VIMMED1(false);
+ break;
+ case ZFUNC_LT:
+ pTest = FCMP_OLT(pAlpha, pRef);
+ break;
+ case ZFUNC_EQ:
+ pTest = FCMP_OEQ(pAlpha, pRef);
+ break;
+ case ZFUNC_LE:
+ pTest = FCMP_OLE(pAlpha, pRef);
+ break;
+ case ZFUNC_GT:
+ pTest = FCMP_OGT(pAlpha, pRef);
+ break;
+ case ZFUNC_NE:
+ pTest = FCMP_ONE(pAlpha, pRef);
+ break;
+ case ZFUNC_GE:
+ pTest = FCMP_OGE(pAlpha, pRef);
+ break;
default:
SWR_INVALID("Invalid alpha test function");
break;
Function* Create(const BLEND_COMPILE_STATE& state)
{
- std::stringstream fnName("BLND_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
+ std::stringstream fnName("BLND_",
+ std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << ComputeCRC(0, &state, sizeof(state));
// blend function signature
- //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*);
+ // typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*);
std::vector<Type*> args{
PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0) // SWR_BLEND_CONTEXT*
};
- //std::vector<Type*> args{
+ // std::vector<Type*> args{
// PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0), // SWR_BLEND_CONTEXT*
//};
- FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
- Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
+ FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
+ Function* blendFunc = Function::Create(
+ fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
blendFunc->getParent()->setModuleIdentifier(blendFunc->getName());
BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
IRB()->SetInsertPoint(entry);
// arguments
- auto argitr = blendFunc->arg_begin();
+ auto argitr = blendFunc->arg_begin();
Value* pBlendContext = &*argitr++;
pBlendContext->setName("pBlendContext");
- Value* pBlendState = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pBlendState });
+ Value* pBlendState = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pBlendState});
pBlendState->setName("pBlendState");
- Value* pSrc = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_src });
+ Value* pSrc = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src});
pSrc->setName("src");
- Value* pSrc1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_src1 });
+ Value* pSrc1 = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src1});
pSrc1->setName("src1");
- Value* pSrc0Alpha = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_src0alpha });
+ Value* pSrc0Alpha = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src0alpha});
pSrc0Alpha->setName("src0alpha");
- Value* sampleNum = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_sampleNum });
+ Value* sampleNum = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_sampleNum});
sampleNum->setName("sampleNum");
- Value* pDst = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pDst });
+ Value* pDst = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pDst});
pDst->setName("pDst");
- Value* pResult = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_result });
+ Value* pResult = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_result});
pResult->setName("result");
- Value* ppoMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_oMask });
+ Value* ppoMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_oMask});
ppoMask->setName("ppoMask");
- Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask });
+ Value* ppMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pMask});
ppMask->setName("pMask");
- static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
+ static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
+ "Unsupported hot tile format");
Value* dst[4];
Value* constantColor[4];
Value* src[4];
for (uint32_t i = 0; i < 4; ++i)
{
// load hot tile
- dst[i] = LOAD(pDst, { 0, i });
+ dst[i] = LOAD(pDst, {0, i});
// load constant color
- constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i }));
-
+ constantColor[i] = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_constantColor, i}));
+
// load src
- src[i] = LOAD(pSrc, { 0, i });
+ src[i] = LOAD(pSrc, {0, i});
// load src1
- src1[i] = LOAD(pSrc1, { 0, i });
+ src1[i] = LOAD(pSrc1, {0, i});
}
Value* currentSampleMask = VIMMED1(-1);
if (state.desc.alphaToCoverageEnable)
{
- Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
- uint32_t bits = (1 << state.desc.numSamples) - 1;
- currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
- currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty);
+ Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
+ uint32_t bits = (1 << state.desc.numSamples) - 1;
+ currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
+ currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty);
}
// alpha test
if (state.desc.alphaTestEnable)
{
// Gather for archrast stats
- STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+ STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
}
else
{
// Gather for archrast stats
- STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+ STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
}
// color blend
if (state.blendState.blendEnable)
{
// Gather for archrast stats
- STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+ STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
// clamp sources
Clamp(state.format, src);
Value* dstFactor[4];
if (state.desc.independentAlphaBlendEnable)
{
- GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor);
-
- GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
- GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor);
-
- BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
- BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
+ GenerateBlendFactor<true, false>(
+ state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
+ GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor,
+ constantColor,
+ src,
+ src1,
+ dst,
+ srcFactor);
+
+ GenerateBlendFactor<true, false>(
+ state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
+ GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor,
+ constantColor,
+ src,
+ src1,
+ dst,
+ dstFactor);
+
+ BlendFunc<true, false>(
+ state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+ BlendFunc<false, true>(
+ state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
}
else
{
- GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
- GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
+ GenerateBlendFactor<true, true>(
+ state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
+ GenerateBlendFactor<true, true>(
+ state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
- BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+ BlendFunc<true, true>(
+ state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
}
// store results out
for (uint32_t i = 0; i < 4; ++i)
{
- STORE(result[i], pResult, { 0, i });
+ STORE(result[i], pResult, {0, i});
}
}
else
{
// Gather for archrast stats
- STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+ STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
}
-
- if(state.blendState.logicOpEnable)
+
+ if (state.blendState.logicOpEnable)
{
const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
- Value* vMask[4];
- float scale[4];
+ Value* vMask[4];
+ float scale[4];
if (!state.blendState.blendEnable)
{
Clamp(state.format, dst);
}
- for(uint32_t i = 0; i < 4; i++)
+ for (uint32_t i = 0; i < 4; i++)
{
if (info.type[i] == SWR_TYPE_UNUSED)
{
dst[i] = BITCAST(dst[i], mSimdInt32Ty);
break;
case SWR_TYPE_SNORM:
- src[i] = FP_TO_SI(
- FMUL(src[i], VIMMED1(scale[i])),
- mSimdInt32Ty);
- dst[i] = FP_TO_SI(
- FMUL(dst[i], VIMMED1(scale[i])),
- mSimdInt32Ty);
+ src[i] = FP_TO_SI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
+ dst[i] = FP_TO_SI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
break;
case SWR_TYPE_UNORM:
- src[i] = FP_TO_UI(
- FMUL(src[i], VIMMED1(scale[i])),
- mSimdInt32Ty);
- dst[i] = FP_TO_UI(
- FMUL(dst[i], VIMMED1(scale[i])),
- mSimdInt32Ty);
+ src[i] = FP_TO_UI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
+ dst[i] = FP_TO_UI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
break;
}
}
LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
// store results out
- for(uint32_t i = 0; i < 4; ++i)
+ for (uint32_t i = 0; i < 4; ++i)
{
if (info.type[i] == SWR_TYPE_UNUSED)
{
case SWR_TYPE_SNORM:
result[i] = SHL(result[i], C(32 - info.bpc[i]));
result[i] = ASHR(result[i], C(32 - info.bpc[i]));
- result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty),
- VIMMED1(1.0f / scale[i]));
+ result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
break;
case SWR_TYPE_UNORM:
- result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty),
- VIMMED1(1.0f / scale[i]));
+ result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
break;
}
}
}
- if(state.desc.oMaskEnable)
+ if (state.desc.oMaskEnable)
{
assert(!(state.desc.alphaToCoverageEnable));
// load current mask
- Value* oMask = LOAD(ppoMask);
+ Value* oMask = LOAD(ppoMask);
currentSampleMask = AND(oMask, currentSampleMask);
}
- if(state.desc.sampleMaskEnable)
+ if (state.desc.sampleMaskEnable)
{
- Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask});
+ Value* sampleMask = LOAD(pBlendState, {0, SWR_BLEND_STATE_sampleMask});
currentSampleMask = AND(VBROADCAST(sampleMask), currentSampleMask);
}
- if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
- state.desc.oMaskEnable)
+ if (state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
+ state.desc.oMaskEnable)
{
// load coverage mask and mask off any lanes with no samples
- Value* pMask = LOAD(ppMask);
+ Value* pMask = LOAD(ppMask);
Value* sampleMasked = SHL(C(1), sampleNum);
- currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked));
+ currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked));
currentSampleMask = S_EXT(ICMP_UGT(currentSampleMask, VBROADCAST(C(0))), mSimdInt32Ty);
Value* outputMask = AND(pMask, currentSampleMask);
// store new mask
/// @return PFN_FETCH_FUNC - pointer to fetch code
PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
{
- const llvm::Function *func = (const llvm::Function*)hFunc;
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
- PFN_BLEND_JIT_FUNC pfnBlend;
+ const llvm::Function* func = (const llvm::Function*)hFunc;
+ JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
+ PFN_BLEND_JIT_FUNC pfnBlend;
pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
- // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
+ // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
+ // add new IR to the module
pJitMgr->mIsModuleFinalized = true;
return pfnBlend;
/// @brief JIT compiles blend shader
/// @param hJitMgr - JitManager handle
/// @param state - blend state to build function from
-extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state)
+extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr,
+ const BLEND_COMPILE_STATE& state)
{
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
pJitMgr->SetupNewModule();
BlendJit theJit(pJitMgr);
- HANDLE hFunc = theJit.Create(state);
+ HANDLE hFunc = theJit.Create(state);
return JitBlendFunc(hJitMgr, hFunc);
}
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file blend_jit.h
-*
-* @brief Definition of the blend jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file blend_jit.h
+ *
+ * @brief Definition of the blend jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "common/formats.h"
struct RENDER_TARGET_BLEND_COMPILE_STATE
{
- bool blendEnable;
- bool logicOpEnable;
+ bool blendEnable;
+ bool logicOpEnable;
SWR_BLEND_FACTOR sourceAlphaBlendFactor;
SWR_BLEND_FACTOR destAlphaBlendFactor;
SWR_BLEND_FACTOR sourceBlendFactor;
SWR_BLEND_FACTOR destBlendFactor;
- SWR_BLEND_OP colorBlendFunc;
- SWR_BLEND_OP alphaBlendFunc;
- SWR_LOGIC_OP logicOpFunc;
+ SWR_BLEND_OP colorBlendFunc;
+ SWR_BLEND_OP alphaBlendFunc;
+ SWR_LOGIC_OP logicOpFunc;
};
enum ALPHA_TEST_FORMAT
{
struct
{
- uint32_t alphaTestEnable: 1;
- uint32_t independentAlphaBlendEnable: 1;
- uint32_t alphaToCoverageEnable: 1;
- uint32_t oMaskEnable:1;
- uint32_t inputCoverageEnable:1;
- uint32_t sampleMaskEnable:1;
- uint32_t numSamples:5;
- uint32_t _reserved : 21;
+ uint32_t alphaTestEnable : 1;
+ uint32_t independentAlphaBlendEnable : 1;
+ uint32_t alphaToCoverageEnable : 1;
+ uint32_t oMaskEnable : 1;
+ uint32_t inputCoverageEnable : 1;
+ uint32_t sampleMaskEnable : 1;
+ uint32_t numSamples : 5;
+ uint32_t _reserved : 21;
};
uint32_t bits;
};
//////////////////////////////////////////////////////////////////////////
struct BLEND_COMPILE_STATE
{
- SWR_FORMAT format; // format of render target being blended
+ SWR_FORMAT format; // format of render target being blended
RENDER_TARGET_BLEND_COMPILE_STATE blendState;
- BLEND_DESC desc;
+ BLEND_DESC desc;
- SWR_ZFUNCTION alphaTestFunction;
+ SWR_ZFUNCTION alphaTestFunction;
ALPHA_TEST_FORMAT alphaTestFormat;
bool operator==(const BLEND_COMPILE_STATE& other) const
{
if (!desc.alphaTestEnable)
{
- alphaTestFormat = (ALPHA_TEST_FORMAT)0;
+ alphaTestFormat = (ALPHA_TEST_FORMAT)0;
alphaTestFunction = (SWR_ZFUNCTION)0;
}
if (!blendState.blendEnable)
{
blendState.sourceAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.sourceBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.destBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.colorBlendFunc = (SWR_BLEND_OP)0;
- blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
+ blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
+ blendState.sourceBlendFactor = (SWR_BLEND_FACTOR)0;
+ blendState.destBlendFactor = (SWR_BLEND_FACTOR)0;
+ blendState.colorBlendFunc = (SWR_BLEND_OP)0;
+ blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
}
if (!blendState.logicOpEnable)
if (!desc.independentAlphaBlendEnable)
{
blendState.sourceAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
- blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
+ blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
+ blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
}
}
};
//////////////////////////////////////////////////////////////////////////
/// @brief Contructor for Builder.
/// @param pJitMgr - JitManager which contains modules, function passes, etc.
- Builder::Builder(JitManager *pJitMgr) : mpJitMgr(pJitMgr), mpPrivateContext(nullptr)
+ Builder::Builder(JitManager* pJitMgr) : mpJitMgr(pJitMgr), mpPrivateContext(nullptr)
{
mVWidth = pJitMgr->mVWidth;
mVWidth16 = 16;
mSimd32Int8Ty = VectorType::get(mInt8Ty, 32);
- if (sizeof(uint32_t *) == 4)
+ if (sizeof(uint32_t*) == 4)
{
mIntPtrTy = mInt32Ty;
mSimdIntPtrTy = mSimdInt32Ty;
}
else
{
- SWR_ASSERT(sizeof(uint32_t *) == 8);
+ SWR_ASSERT(sizeof(uint32_t*) == 8);
mIntPtrTy = mInt64Ty;
mSimdIntPtrTy = mSimdInt64Ty;
}
/// @brief Mark this alloca as temporary to avoid hoisting later on
- void Builder::SetTempAlloca(Value *inst)
+ void Builder::SetTempAlloca(Value* inst)
{
- AllocaInst *pAlloca = dyn_cast<AllocaInst>(inst);
+ AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
- MDNode *N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, "is_temp_alloca"));
+ MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, "is_temp_alloca"));
pAlloca->setMetadata("is_temp_alloca", N);
}
- bool Builder::IsTempAlloca(Value *inst)
+ bool Builder::IsTempAlloca(Value* inst)
{
- AllocaInst *pAlloca = dyn_cast<AllocaInst>(inst);
+ AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
return (pAlloca->getMetadata("is_temp_alloca") != nullptr);
}
// Returns true if able to find a call instruction to mark
- bool Builder::SetNamedMetaDataOnCallInstr(Instruction *inst, StringRef mdName)
+ bool Builder::SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName)
{
- CallInst *pCallInstr = dyn_cast<CallInst>(inst);
+ CallInst* pCallInstr = dyn_cast<CallInst>(inst);
if (pCallInstr)
{
- MDNode *N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, mdName));
+ MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, mdName));
pCallInstr->setMetadata(mdName, N);
return true;
}
else
{
// Follow use def chain back up
- for (Use &u : inst->operands())
+ for (Use& u : inst->operands())
{
- Instruction *srcInst = dyn_cast<Instruction>(u.get());
+ Instruction* srcInst = dyn_cast<Instruction>(u.get());
if (srcInst)
{
if (SetNamedMetaDataOnCallInstr(srcInst, mdName))
return false;
}
- bool Builder::HasNamedMetaDataOnCallInstr(Instruction *inst,
- StringRef mdName)
+ bool Builder::HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName)
{
- CallInst *pCallInstr = dyn_cast<CallInst>(inst);
+ CallInst* pCallInstr = dyn_cast<CallInst>(inst);
if (!pCallInstr)
{
//////////////////////////////////////////////////////////////////////////
/// @brief Packetizes the type. Assumes SOA conversion.
- Type *Builder::GetVectorType(Type *pType)
+ Type* Builder::GetVectorType(Type* pType)
{
if (pType->isVectorTy())
{
if (pType->isArrayTy())
{
uint32_t arraySize = pType->getArrayNumElements();
- Type * pArrayType = pType->getArrayElementType();
- Type * pVecArrayType = GetVectorType(pArrayType);
- Type * pVecType = ArrayType::get(pVecArrayType, arraySize);
+ Type* pArrayType = pType->getArrayElementType();
+ Type* pVecArrayType = GetVectorType(pArrayType);
+ Type* pVecType = ArrayType::get(pVecArrayType, arraySize);
return pVecType;
}
// {float,int} should packetize to {<8 x float>, <8 x int>}
if (pType->isAggregateType())
{
- uint32_t numElems = pType->getStructNumElements();
- SmallVector<Type *, 8> vecTypes;
+ uint32_t numElems = pType->getStructNumElements();
+ SmallVector<Type*, 8> vecTypes;
for (uint32_t i = 0; i < numElems; ++i)
{
- Type *pElemType = pType->getStructElementType(i);
- Type *pVecElemType = GetVectorType(pElemType);
+ Type* pElemType = pType->getStructElementType(i);
+ Type* pVecElemType = GetVectorType(pElemType);
vecTypes.push_back(pVecElemType);
}
- Type *pVecType = StructType::get(JM()->mContext, vecTypes);
+ Type* pVecType = StructType::get(JM()->mContext, vecTypes);
return pVecType;
}
}
// <ty> should packetize to <8 x <ty>>
- Type *vecType = VectorType::get(pType, JM()->mVWidth);
+ Type* vecType = VectorType::get(pType, JM()->mVWidth);
return vecType;
}
-}
+} // namespace SwrJit
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder.h
-*
-* @brief Includes all the builder related functionality
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder.h
+ *
+ * @brief Includes all the builder related functionality
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "JitManager.h"
///@todo Move this to better place
enum SHADER_STATS_COUNTER_TYPE
{
- STATS_INST_EXECUTED = 0,
- STATS_SAMPLE_EXECUTED = 1,
- STATS_SAMPLE_L_EXECUTED = 2,
- STATS_SAMPLE_B_EXECUTED = 3,
- STATS_SAMPLE_C_EXECUTED = 4,
- STATS_SAMPLE_C_LZ_EXECUTED = 5,
- STATS_SAMPLE_C_D_EXECUTED = 6,
- STATS_LOD_EXECUTED = 7,
- STATS_GATHER4_EXECUTED = 8,
- STATS_GATHER4_C_EXECUTED = 9,
- STATS_GATHER4_C_PO_EXECUTED = 10,
+ STATS_INST_EXECUTED = 0,
+ STATS_SAMPLE_EXECUTED = 1,
+ STATS_SAMPLE_L_EXECUTED = 2,
+ STATS_SAMPLE_B_EXECUTED = 3,
+ STATS_SAMPLE_C_EXECUTED = 4,
+ STATS_SAMPLE_C_LZ_EXECUTED = 5,
+ STATS_SAMPLE_C_D_EXECUTED = 6,
+ STATS_LOD_EXECUTED = 7,
+ STATS_GATHER4_EXECUTED = 8,
+ STATS_GATHER4_C_EXECUTED = 9,
+ STATS_GATHER4_C_PO_EXECUTED = 10,
STATS_GATHER4_C_PO_C_EXECUTED = 11,
- STATS_LOAD_RAW_UAV = 12,
- STATS_LOAD_RAW_RESOURCE = 13,
- STATS_STORE_RAW_UAV = 14,
- STATS_STORE_TGSM = 15,
- STATS_DISCARD = 16,
- STATS_BARRIER = 17,
+ STATS_LOAD_RAW_UAV = 12,
+ STATS_LOAD_RAW_RESOURCE = 13,
+ STATS_STORE_RAW_UAV = 14,
+ STATS_STORE_TGSM = 15,
+ STATS_DISCARD = 16,
+ STATS_BARRIER = 17,
};
using namespace llvm;
struct Builder
{
- Builder(JitManager *pJitMgr);
+ Builder(JitManager* pJitMgr);
virtual ~Builder() {}
- IRBuilder<> *IRB() { return mpIRBuilder; };
- JitManager *JM() { return mpJitMgr; }
+ IRBuilder<>* IRB() { return mpIRBuilder; };
+ JitManager* JM() { return mpJitMgr; }
- JitManager *mpJitMgr;
- IRBuilder<> *mpIRBuilder;
+ JitManager* mpJitMgr;
+ IRBuilder<>* mpIRBuilder;
- uint32_t mVWidth; // vector width target simd
- uint32_t mVWidth16; // vector width simd16
+ uint32_t mVWidth; // vector width target simd
+ uint32_t mVWidth16; // vector width simd16
// Built in types: scalar
- Type* mVoidTy;
- Type* mInt1Ty;
- Type* mInt8Ty;
- Type* mInt16Ty;
- Type* mInt32Ty;
- Type* mInt64Ty;
- Type* mIntPtrTy;
- Type* mFP16Ty;
- Type* mFP32Ty;
- Type* mFP32PtrTy;
- Type* mDoubleTy;
- Type* mInt8PtrTy;
- Type* mInt16PtrTy;
- Type* mInt32PtrTy;
-
- Type* mSimd4FP64Ty;
+ Type* mVoidTy;
+ Type* mInt1Ty;
+ Type* mInt8Ty;
+ Type* mInt16Ty;
+ Type* mInt32Ty;
+ Type* mInt64Ty;
+ Type* mIntPtrTy;
+ Type* mFP16Ty;
+ Type* mFP32Ty;
+ Type* mFP32PtrTy;
+ Type* mDoubleTy;
+ Type* mInt8PtrTy;
+ Type* mInt16PtrTy;
+ Type* mInt32PtrTy;
+
+ Type* mSimd4FP64Ty;
// Built in types: target SIMD
- Type* mSimdFP16Ty;
- Type* mSimdFP32Ty;
- Type* mSimdInt1Ty;
- Type* mSimdInt16Ty;
- Type* mSimdInt32Ty;
- Type* mSimdInt64Ty;
- Type* mSimdIntPtrTy;
- Type* mSimdVectorTy;
- Type* mSimdVectorTRTy;
- Type* mSimdVectorIntTy;
+ Type* mSimdFP16Ty;
+ Type* mSimdFP32Ty;
+ Type* mSimdInt1Ty;
+ Type* mSimdInt16Ty;
+ Type* mSimdInt32Ty;
+ Type* mSimdInt64Ty;
+ Type* mSimdIntPtrTy;
+ Type* mSimdVectorTy;
+ Type* mSimdVectorTRTy;
+ Type* mSimdVectorIntTy;
// Built in types: simd16
- Type* mSimd16FP16Ty;
- Type* mSimd16FP32Ty;
- Type* mSimd16Int1Ty;
- Type* mSimd16Int16Ty;
- Type* mSimd16Int32Ty;
- Type* mSimd16Int64Ty;
- Type* mSimd16IntPtrTy;
- Type* mSimd16VectorTy;
- Type* mSimd16VectorTRTy;
-
- Type* mSimd32Int8Ty;
-
- void SetTargetWidth(uint32_t width);
- void SetTempAlloca(Value* inst);
- bool IsTempAlloca(Value* inst);
- bool SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
- bool HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
+ Type* mSimd16FP16Ty;
+ Type* mSimd16FP32Ty;
+ Type* mSimd16Int1Ty;
+ Type* mSimd16Int16Ty;
+ Type* mSimd16Int32Ty;
+ Type* mSimd16Int64Ty;
+ Type* mSimd16IntPtrTy;
+ Type* mSimd16VectorTy;
+ Type* mSimd16VectorTRTy;
+
+ Type* mSimd32Int8Ty;
+
+ void SetTargetWidth(uint32_t width);
+ void SetTempAlloca(Value* inst);
+ bool IsTempAlloca(Value* inst);
+ bool SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
+ bool HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
Type* GetVectorType(Type* pType);
- void SetMetadata(StringRef s, uint32_t val)
+ void SetMetadata(StringRef s, uint32_t val)
{
- llvm::NamedMDNode *metaData = mpJitMgr->mpCurrentModule->getOrInsertNamedMetadata(s);
- Constant* cval = mpIRBuilder->getInt32(val);
- llvm::MDNode *mdNode = llvm::MDNode::get(mpJitMgr->mpCurrentModule->getContext(), llvm::ConstantAsMetadata::get(cval));
+ llvm::NamedMDNode* metaData = mpJitMgr->mpCurrentModule->getOrInsertNamedMetadata(s);
+ Constant* cval = mpIRBuilder->getInt32(val);
+ llvm::MDNode* mdNode = llvm::MDNode::get(mpJitMgr->mpCurrentModule->getContext(),
+ llvm::ConstantAsMetadata::get(cval));
if (metaData->getNumOperands())
{
metaData->setOperand(0, mdNode);
NamedMDNode* metaData = mpJitMgr->mpCurrentModule->getNamedMetadata(s);
if (metaData)
{
- MDNode* mdNode = metaData->getOperand(0);
- Metadata* val = mdNode->getOperand(0);
+ MDNode* mdNode = metaData->getOperand(0);
+ Metadata* val = mdNode->getOperand(0);
return mdconst::dyn_extract<ConstantInt>(val)->getZExtValue();
}
else
#include "builder_mem.h"
protected:
-
- void SetPrivateContext(Value* pPrivateContext)
- {
- mpPrivateContext = pPrivateContext;
+ void SetPrivateContext(Value* pPrivateContext)
+ {
+ mpPrivateContext = pPrivateContext;
NotifyPrivateContextSet();
}
- virtual void NotifyPrivateContextSet() {}
+ virtual void NotifyPrivateContextSet() {}
inline Value* GetPrivateContext() { return mpPrivateContext; }
- private:
+ private:
Value* mpPrivateContext;
-
};
-}
+} // namespace SwrJit
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_gfx_mem.cpp
-*
-* @brief Definition of the gfx mem builder
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_gfx_mem.cpp
+ *
+ * @brief Definition of the gfx mem builder
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "builder.h"
#include "common/rdtsc_buckets.h"
{
using namespace llvm;
- BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) :
- Builder(pJitMgr)
+ BuilderGfxMem::BuilderGfxMem(JitManager *pJitMgr) : Builder(pJitMgr)
{
- mpTranslationFuncTy = nullptr;
+ mpTranslationFuncTy = nullptr;
mpfnTranslateGfxAddress = nullptr;
- mpParamSimDC = nullptr;
+ mpParamSimDC = nullptr;
}
{
}
- void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
+ void BuilderGfxMem::AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage)
{
- SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t.");
+ SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL),
+ "Internal memory should not be gfxptr_t.");
}
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value* BuilderGfxMem::GATHERPS(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask,
- uint8_t scale, JIT_MEM_CLIENT usage)
- {
- // address may be coming in as 64bit int now so get the pointer
+ Value *BuilderGfxMem::GATHERPS(Value * vSrc,
+ Value * pBase,
+ Value * vIndices,
+ Value * vMask,
+ uint8_t scale,
+ JIT_MEM_CLIENT usage)
+ {
+ // address may be coming in as 64bit int now so get the pointer
if (pBase->getType() == mInt64Ty)
{
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
}
- Value* vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
+ Value *vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
return vGather;
}
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value* BuilderGfxMem::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask,
- uint8_t scale, JIT_MEM_CLIENT usage)
+ Value *BuilderGfxMem::GATHERDD(Value * vSrc,
+ Value * pBase,
+ Value * vIndices,
+ Value * vMask,
+ uint8_t scale,
+ JIT_MEM_CLIENT usage)
{
// address may be coming in as 64bit int now so get the pointer
pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
}
- Value* vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
+ Value *vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
return vGather;
}
- Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
+ Value *BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset)
{
return ADD(base, offset);
}
- Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
+ Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, Idx, nullptr, Name);
}
- Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
+ Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ty, Ptr, Idx, Name);
}
- Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
+ Value *BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, indexList);
}
- Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
+ Value *
+ BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
{
Ptr = TranslationHelper(Ptr, Ty);
return Builder::GEP(Ptr, indexList);
}
- Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty)
+ Value *BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty)
{
SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
"Access of GFX pointers must have non-null type specified.");
return Ptr;
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::LOAD(Ptr, Name);
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::LOAD(Ptr, Name);
}
- LoadInst* BuilderGfxMem::LOAD(Value* Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst *BuilderGfxMem::LOAD(
+ Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::LOAD(Ptr, isVolatile, Name);
}
- LoadInst* BuilderGfxMem::LOAD(Value* BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst *BuilderGfxMem::LOAD(Value * BasePtr,
+ const std::initializer_list<uint32_t> &offset,
+ const llvm::Twine & name,
+ Type * Ty,
+ JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(BasePtr, usage);
if (BasePtr->getType() == mInt64Ty)
{
SWR_ASSERT(Ty);
- BasePtr = INT_TO_PTR(BasePtr, Ty, name);
+ BasePtr = INT_TO_PTR(BasePtr, Ty, name);
bNeedTranslation = true;
}
- std::vector<Value*> valIndices;
+ std::vector<Value *> valIndices;
for (auto i : offset)
{
valIndices.push_back(C(i));
return LOAD(BasePtr, name, Ty, usage);
}
- CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr, unsigned Align, Value* Mask, Value* PassThru, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+ CallInst *BuilderGfxMem::MASKED_LOAD(Value * Ptr,
+ unsigned Align,
+ Value * Mask,
+ Value * PassThru,
+ const Twine & Name,
+ Type * Ty,
+ JIT_MEM_CLIENT usage)
{
AssertGFXMemoryParams(Ptr, usage);
return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
}
- Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress, Type* PtrTy, const Twine &Name, JIT_MEM_CLIENT /* usage */)
+ Value *BuilderGfxMem::TranslateGfxAddress(Value * xpGfxAddress,
+ Type * PtrTy,
+ const Twine &Name,
+ JIT_MEM_CLIENT /* usage */)
{
if (PtrTy == nullptr)
{
}
return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
}
-}
+} // namespace SwrJit
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_gfx_mem.h
-*
-* @brief Definition of the builder to support different translation types for gfx memory access
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_gfx_mem.h
+ *
+ * @brief Definition of the builder to support different translation types for gfx memory access
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "builder.h"
class BuilderGfxMem : public Builder
{
public:
- BuilderGfxMem(JitManager* pJitMgr);
+ BuilderGfxMem(JitManager *pJitMgr);
virtual ~BuilderGfxMem() {}
virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
- virtual Value *GEP(Value* Ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
- virtual Value *GEP(Value* Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
-
- virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
- virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
- virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
- virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-
- Value* TranslateGfxAddress(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+ virtual Value *
+ GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty = nullptr);
+ virtual Value *
+ GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
+
+ virtual LoadInst *LOAD(Value * Ptr,
+ const char * Name,
+ Type * Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+ virtual LoadInst *LOAD(Value * Ptr,
+ const Twine & Name = "",
+ Type * Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+ virtual LoadInst *LOAD(Value * Ptr,
+ bool isVolatile,
+ const Twine & Name = "",
+ Type * Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+ virtual LoadInst *LOAD(Value * BasePtr,
+ const std::initializer_list<uint32_t> &offset,
+ const llvm::Twine & Name = "",
+ Type * Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+ virtual CallInst *MASKED_LOAD(Value * Ptr,
+ unsigned Align,
+ Value * Mask,
+ Value * PassThru = nullptr,
+ const Twine & Name = "",
+ Type * Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+ virtual Value *GATHERPS(Value * src,
+ Value * pBase,
+ Value * indices,
+ Value * mask,
+ uint8_t scale = 1,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+ virtual Value *GATHERDD(Value * src,
+ Value * pBase,
+ Value * indices,
+ Value * mask,
+ uint8_t scale = 1,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+
+ Value *TranslateGfxAddress(Value * xpGfxAddress,
+ Type * PtrTy = nullptr,
+ const Twine & Name = "",
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
template <typename T>
- Value* TranslateGfxAddress(Value* xpGfxBaseAddress, const std::initializer_list<T> &offset, Type* PtrTy = nullptr, const Twine &Name = "", JIT_MEM_CLIENT usage = GFX_MEM_CLIENT_SHADER)
+ Value *TranslateGfxAddress(Value * xpGfxBaseAddress,
+ const std::initializer_list<T> &offset,
+ Type * PtrTy = nullptr,
+ const Twine & Name = "",
+ JIT_MEM_CLIENT usage = GFX_MEM_CLIENT_SHADER)
{
AssertGFXMemoryParams(xpGfxBaseAddress, usage);
SWR_ASSERT(xpGfxBaseAddress->getType()->isPointerTy() == false);
PtrTy = mInt8PtrTy;
}
- Value* ptr = INT_TO_PTR(xpGfxBaseAddress, PtrTy);
- ptr = GEP(ptr, offset);
+ Value *ptr = INT_TO_PTR(xpGfxBaseAddress, PtrTy);
+ ptr = GEP(ptr, offset);
return TranslateGfxAddress(PTR_TO_INT(ptr, mInt64Ty), PtrTy, Name, usage);
}
protected:
+ void AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage);
- void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
-
virtual void NotifyPrivateContextSet();
- virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+ virtual Value *OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset);
- Value* TranslationHelper(Value *Ptr, Type *Ty);
+ Value *TranslationHelper(Value *Ptr, Type *Ty);
- FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
- Value* GetTranslationFunction() { return mpfnTranslateGfxAddress; }
- Value* GetParamSimDC() { return mpParamSimDC; }
+ FunctionType *GetTranslationFunctionType() { return mpTranslationFuncTy; }
+ Value * GetTranslationFunction() { return mpfnTranslateGfxAddress; }
+ Value * GetParamSimDC() { return mpParamSimDC; }
private:
-
- FunctionType* mpTranslationFuncTy;
- Value* mpfnTranslateGfxAddress;
- Value* mpParamSimDC;
+ FunctionType *mpTranslationFuncTy;
+ Value * mpfnTranslateGfxAddress;
+ Value * mpParamSimDC;
};
-}
+} // namespace SwrJit
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_math.h
-*
-* @brief math/alu builder functions
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_math.h
+ *
+ * @brief math/alu builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
Value* VLOG2PS(Value* src);
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_misc.cpp
-*
-* @brief Implementation for miscellaneous builder functions
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.cpp
+ *
+ * @brief Implementation for miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "builder.h"
#include "common/rdtsc_buckets.h"
{
void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
{
- SWR_ASSERT(ptr->getType() != mInt64Ty, "Address appears to be GFX access. Requires translation through BuilderGfxMem.");
+ SWR_ASSERT(
+ ptr->getType() != mInt64Ty,
+ "Address appears to be GFX access. Requires translation through BuilderGfxMem.");
}
- Value *Builder::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
+ Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
{
return IRB()->CreateGEP(Ptr, Idx, Name);
}
- Value *Builder::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
+ Value* Builder::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
{
return IRB()->CreateGEP(Ty, Ptr, Idx, Name);
}
- Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty)
+ Value* Builder::GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
{
std::vector<Value*> indices;
for (auto i : indexList)
return GEPA(ptr, indices);
}
- Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
+ Value* Builder::GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
{
std::vector<Value*> indices;
for (auto i : indexList)
return GEPA(ptr, indices);
}
- Value *Builder::GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+ Value* Builder::GEPA(Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name)
{
return IRB()->CreateGEP(Ptr, IdxList, Name);
}
- Value *Builder::GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+ Value* Builder::GEPA(Type* Ty, Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name)
{
return IRB()->CreateGEP(Ty, Ptr, IdxList, Name);
}
- Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
+ Value* Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
{
std::vector<Value*> indices;
for (auto i : indexList)
return IN_BOUNDS_GEP(ptr, indices);
}
- Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
+ Value* Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList)
{
std::vector<Value*> indices;
for (auto i : indexList)
return IN_BOUNDS_GEP(ptr, indices);
}
- LoadInst* Builder::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, Name);
}
- LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ty, Ptr, Name);
}
- LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst*
+ Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(Ptr, usage);
return IRB()->CreateLoad(Ptr, isVolatile, Name);
}
- LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
+ LoadInst* Builder::LOAD(Value* basePtr,
+ const std::initializer_list<uint32_t>& indices,
+ const llvm::Twine& name,
+ Type* Ty,
+ JIT_MEM_CLIENT usage)
{
std::vector<Value*> valIndices;
for (auto i : indices)
return Builder::LOAD(GEPA(basePtr, valIndices), name);
}
- LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list<Value*> &indices, const llvm::Twine& name)
+ LoadInst* Builder::LOADV(Value* basePtr,
+ const std::initializer_list<Value*>& indices,
+ const llvm::Twine& name)
{
std::vector<Value*> valIndices;
for (auto i : indices)
return LOAD(GEPA(basePtr, valIndices), name);
}
- StoreInst *Builder::STORE(Value *val, Value *basePtr, const std::initializer_list<uint32_t> &indices)
+ StoreInst*
+ Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices)
{
std::vector<Value*> valIndices;
for (auto i : indices)
return STORE(val, GEPA(basePtr, valIndices));
}
- StoreInst *Builder::STOREV(Value *val, Value *basePtr, const std::initializer_list<Value*> &indices)
+ StoreInst*
+ Builder::STOREV(Value* val, Value* basePtr, const std::initializer_list<Value*>& indices)
{
std::vector<Value*> valIndices;
for (auto i : indices)
return STORE(val, GEPA(basePtr, valIndices));
}
- Value* Builder::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset)
+ Value* Builder::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
{
return GEP(base, offset);
}
- Value* Builder::MEM_ADD(Value* i32Incr, Value* basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name)
+ Value* Builder::MEM_ADD(Value* i32Incr,
+ Value* basePtr,
+ const std::initializer_list<uint32_t>& indices,
+ const llvm::Twine& name)
{
- Value* i32Value = LOAD(GEP(basePtr, indices), name);
+ Value* i32Value = LOAD(GEP(basePtr, indices), name);
Value* i32Result = ADD(i32Value, i32Incr);
return STORE(i32Result, GEP(basePtr, indices));
}
//////////////////////////////////////////////////////////////////////////
- /// @brief Generate a masked gather operation in LLVM IR. If not
+ /// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
/// @param pBase - Int8* base VB address pointer value
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
+ Value* Builder::GATHERPS(Value* vSrc,
+ Value* pBase,
+ Value* vIndices,
+ Value* vMask,
+ uint8_t scale,
+ JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
}
//////////////////////////////////////////////////////////////////////////
- /// @brief Generate a masked gather operation in LLVM IR. If not
+ /// @brief Generate a masked gather operation in LLVM IR. If not
/// supported on the underlying platform, emulate it with loads
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
/// @param pBase - Int8* base VB address pointer value
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage)
+ Value* Builder::GATHERDD(Value* vSrc,
+ Value* pBase,
+ Value* vIndices,
+ Value* vMask,
+ uint8_t scale,
+ JIT_MEM_CLIENT usage)
{
AssertMemoryUsageParams(pBase, usage);
/// @param vIndices - SIMD wide value of VB byte offsets
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
/// @param scale - value to scale indices by
- Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
+ Value*
+ Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
{
return VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
}
return MASKED_GATHER(pVecSrcPtr, 4, pVecMask, pVecPassthru);
}
- void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
+ void Builder::Gather4(const SWR_FORMAT format,
+ Value* pSrcBase,
+ Value* byteOffsets,
+ Value* mask,
+ Value* vGatherComponents[],
+ bool bPackedOutput,
+ JIT_MEM_CLIENT usage)
{
- const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+ const SWR_FORMAT_INFO& info = GetFormatInfo(format);
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
{
GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
}
}
- void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
+ void Builder::GATHER4PS(const SWR_FORMAT_INFO& info,
+ Value* pSrcBase,
+ Value* byteOffsets,
+ Value* vMask,
+ Value* vGatherComponents[],
+ bool bPackedOutput,
+ JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
// offset base to the next components(zw) in the vertex to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
+ vGatherResult[1] =
+ GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
+ // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
//
}
else
uint32_t swizzleIndex = info.swizzle[i];
// Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
+ vGatherComponents[swizzleIndex] = GATHERPS(
+ vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
// offset base to the next component to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
}
}
- void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
+ void Builder::GATHER4DD(const SWR_FORMAT_INFO& info,
+ Value* pSrcBase,
+ Value* byteOffsets,
+ Value* vMask,
+ Value* vGatherComponents[],
+ bool bPackedOutput,
+ JIT_MEM_CLIENT usage)
{
switch (info.bpp / info.numComps)
{
case 8:
{
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
- Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
+ Value* vGatherResult =
+ GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of an 8x32bit integer gather for 8bit components
// 256i - 0 1 2 3 4 5 6 7
- // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
+ // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
Shuffle8bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
}
// offset base to the next components(zw) in the vertex to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
- vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
+ vGatherResult[1] =
+ GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
// e.g. result of second 8x32bit integer gather for 16bit components
// 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
+ // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
//
}
else
// Shuffle gathered components into place, each row is a component
Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
-
}
break;
case 32:
uint32_t swizzleIndex = info.swizzle[i];
// Gather a SIMD of components
- vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
+ vGatherComponents[swizzleIndex] = GATHERDD(
+ vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
// offset base to the next component to gather
pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
}
}
- void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[2], Value* vGatherOutput[4], bool bPackedOutput)
+ void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO& info,
+ Value* vGatherInput[2],
+ Value* vGatherOutput[4],
+ bool bPackedOutput)
{
// cast types
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
- // input could either be float or int vector; do shuffle work in int
+ // input could either be float or int vector; do shuffle work in int
vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
vGatherInput[1] = BITCAST(vGatherInput[1], mSimdInt32Ty);
if (bPackedOutput)
{
- Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
-
- // shuffle mask
- Value* vConstMask = C<char>({ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
- 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 });
- Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[0], v32x8Ty), vConstMask), vGatherTy);
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
+ mVWidth / 4); // vwidth is units of 32 bits
+
+ // shuffle mask
+ Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
+ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
+ Value* vShufResult =
+ BITCAST(PSHUFB(BITCAST(vGatherInput[0], v32x8Ty), vConstMask), vGatherTy);
// after pshufb: group components together in each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
- Value* vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ Value* vi128XY =
+ BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
// after PERMD: move and pack xy components into each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
Value* vi128ZW = nullptr;
if (info.numComps > 2)
{
- Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
- vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ Value* vShufResult =
+ BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
+ vi128ZW =
+ BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
}
for (uint32_t i = 0; i < 4; i++)
// if x or y, use vi128XY permute result, else use vi128ZW
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
- // extract packed component 128 bit lanes
+ // extract packed component 128 bit lanes
vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
}
-
}
else
{
// pshufb masks for each component
Value* vConstMask[2];
// x/z shuffle mask
- vConstMask[0] = C<char>({ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
+ vConstMask[0] = C<char>({
+ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+ });
// y/w shuffle mask
- vConstMask[1] = C<char>({ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
- 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1 });
-
+ vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
+ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
// shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
// apply defaults
// if x or y, use vi128XY permute result, else use vi128ZW
uint32_t selectedGather = (i < 2) ? 0 : 1;
- vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
+ vGatherOutput[swizzleIndex] =
+ BITCAST(PSHUFB(BITCAST(vGatherInput[selectedGather], v32x8Ty),
+ vConstMask[selectedMask]),
+ vGatherTy);
// after pshufb mask for x channel; z uses the same shuffle from the second gather
// 256i - 0 1 2 3 4 5 6 7
- // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
+ // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
}
}
}
- void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput)
+ void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
+ Value* vGatherInput,
+ Value* vGatherOutput[],
+ bool bPackedOutput)
{
// cast types
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
if (bPackedOutput)
{
- Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
- // shuffle mask
- Value* vConstMask = C<char>({ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
- 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 });
- Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
+ Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
+ mVWidth / 4); // vwidth is units of 32 bits
+ // shuffle mask
+ Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
+ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15});
+ Value* vShufResult =
+ BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
// after pshufb: group components together in each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
- Value* vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
+ Value* vi128XY =
+ BITCAST(VPERMD(vShufResult, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})), v128Ty);
// after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
Value* vi128ZW = nullptr;
if (info.numComps > 2)
{
- vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
+ vi128ZW =
+ BITCAST(VPERMD(vShufResult, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})), v128Ty);
}
- // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+ // sign extend all enabled components. If we have a fill vVertexElements, output to
+ // current simdvertex
for (uint32_t i = 0; i < 4; i++)
{
uint32_t swizzleIndex = info.swizzle[i];
}
}
// else zero extend
- else {
+ else
+ {
// shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
// apply defaults
for (uint32_t i = 0; i < 4; ++i)
vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
}
- for (uint32_t i = 0; i < info.numComps; i++) {
+ for (uint32_t i = 0; i < info.numComps; i++)
+ {
uint32_t swizzleIndex = info.swizzle[i];
// pshufb masks for each component
{
case 0:
// x shuffle mask
- vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
- 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1 });
+ vConstMask =
+ C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
+ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
break;
case 1:
// y shuffle mask
- vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
- 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1 });
+ vConstMask =
+ C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
+ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
break;
case 2:
// z shuffle mask
- vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
- 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1 });
+ vConstMask =
+ C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
+ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
break;
case 3:
// w shuffle mask
- vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
- 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1 });
+ vConstMask =
+ C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
+ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
break;
default:
vConstMask = nullptr;
break;
}
- vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
+ vGatherOutput[swizzleIndex] =
+ BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
// after pshufb for x channel
// 256i - 0 1 2 3 4 5 6 7
- // x000 x000 x000 x000 x000 x000 x000 x000
+ // x000 x000 x000 x000 x000 x000 x000 x000
}
}
}
//////////////////////////////////////////////////////////////////////////
/// @brief emulates a scatter operation.
- /// @param pDst - pointer to destination
+ /// @param pDst - pointer to destination
/// @param vSrc - vector of src data to scatter
/// @param vOffsets - vector of byte offsets from pDst
/// @param vMask - mask of valid lanes
- void Builder::SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask)
+ void Builder::SCATTERPS(
+ Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
{
+ AssertMemoryUsageParams(pDst, usage);
+
/* Scatter algorithm
while(Index = BitScanForward(mask))
*/
BasicBlock* pCurBB = IRB()->GetInsertBlock();
- Function* pFunc = pCurBB->getParent();
- Type* pSrcTy = vSrc->getType()->getVectorElementType();
+ Function* pFunc = pCurBB->getParent();
+ Type* pSrcTy = vSrc->getType()->getVectorElementType();
// Store vectors on stack
if (pScatterStackSrc == nullptr)
{
// Save off stack allocations and reuse per scatter. Significantly reduces stack
// requirements for shaders with a lot of scatters.
- pScatterStackSrc = CreateEntryAlloca(pFunc, mSimdInt64Ty);
+ pScatterStackSrc = CreateEntryAlloca(pFunc, mSimdInt64Ty);
pScatterStackOffsets = CreateEntryAlloca(pFunc, mSimdInt32Ty);
}
- Value* pSrcArrayPtr = BITCAST(pScatterStackSrc, PointerType::get(vSrc->getType(), 0));
+ Value* pSrcArrayPtr = BITCAST(pScatterStackSrc, PointerType::get(vSrc->getType(), 0));
Value* pOffsetsArrayPtr = pScatterStackOffsets;
STORE(vSrc, pSrcArrayPtr);
STORE(vOffsets, pOffsetsArrayPtr);
// Cast to pointers for random access
- pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
+ pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0));
Value* pMask = VMOVMSK(vMask);
// Add loop basic block contents
IRB()->SetInsertPoint(pLoop);
PHINode* pIndexPhi = PHI(mInt32Ty, 2);
- PHINode* pMaskPhi = PHI(mInt32Ty, 2);
+ PHINode* pMaskPhi = PHI(mInt32Ty, 2);
pIndexPhi->addIncoming(pIndex, pCurBB);
pMaskPhi->addIncoming(pMask, pCurBB);
// Extract elements for this index
- Value* pSrcElem = LOADV(pSrcArrayPtr, { pIndexPhi });
- Value* pOffsetElem = LOADV(pOffsetsArrayPtr, { pIndexPhi });
+ Value* pSrcElem = LOADV(pSrcArrayPtr, {pIndexPhi});
+ Value* pOffsetElem = LOADV(pOffsetsArrayPtr, {pIndexPhi});
// GEP to this offset in dst
Value* pCurDst = GEP(pDst, pOffsetElem, mInt8PtrTy);
- pCurDst = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0));
+ pCurDst = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0));
STORE(pSrcElem, pCurDst);
// Update the mask
// Move builder to beginning of post loop
IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
}
-}
+} // namespace SwrJit
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_misc.h
-*
-* @brief miscellaneous builder functions
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.h
+ *
+ * @brief miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
public:
-
typedef enum _JIT_MEM_CLIENT
{
MEM_CLIENT_INTERNAL,
} JIT_MEM_CLIENT;
protected:
-
-virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
-void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
+virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
+void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
public:
-
-virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
-virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
-virtual Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
-virtual Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
-
-Value *GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
-Value *GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
-
-Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
-Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
-
-virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL)
+virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
+virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
+virtual Value* GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
+virtual Value*
+GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr);
+
+Value* GEPA(Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name = "");
+Value* GEPA(Type* Ty, Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name = "");
+
+Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList);
+Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
+
+virtual LoadInst*
+ LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value* Ptr,
+ const Twine& Name = "",
+ Type* Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst*
+ LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value* Ptr,
+ bool isVolatile,
+ const Twine& Name = "",
+ Type* Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value* BasePtr,
+ const std::initializer_list<uint32_t>& offset,
+ const llvm::Twine& Name = "",
+ Type* Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+virtual CallInst* MASKED_LOAD(Value* Ptr,
+ unsigned Align,
+ Value* Mask,
+ Value* PassThru = nullptr,
+ const Twine& Name = "",
+ Type* Ty = nullptr,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL)
{
return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
}
-LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
-StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
-StoreInst *STOREV(Value *Val, Value *BasePtr, const std::initializer_list<Value*> &offset);
-
-Value* MEM_ADD(Value* i32Incr, Value* basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name = "");
-
-void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
- Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
-
-Value *GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
-
-void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
-
-void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
-void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Value* vGatherOutput[], bool bPackedOutput);
+LoadInst*
+ LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
+StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset);
+StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset);
+
+Value* MEM_ADD(Value* i32Incr,
+ Value* basePtr,
+ const std::initializer_list<uint32_t>& indices,
+ const llvm::Twine& name = "");
+
+void Gather4(const SWR_FORMAT format,
+ Value* pSrcBase,
+ Value* byteOffsets,
+ Value* mask,
+ Value* vGatherComponents[],
+ bool bPackedOutput,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+virtual Value* GATHERPS(Value* src,
+ Value* pBase,
+ Value* indices,
+ Value* mask,
+ uint8_t scale = 1,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+void GATHER4PS(const SWR_FORMAT_INFO& info,
+ Value* pSrcBase,
+ Value* byteOffsets,
+ Value* mask,
+ Value* vGatherComponents[],
+ bool bPackedOutput,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+virtual Value* GATHERDD(Value* src,
+ Value* pBase,
+ Value* indices,
+ Value* mask,
+ uint8_t scale = 1,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+void GATHER4DD(const SWR_FORMAT_INFO& info,
+ Value* pSrcBase,
+ Value* byteOffsets,
+ Value* mask,
+ Value* vGatherComponents[],
+ bool bPackedOutput,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
+
+Value* GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
+
+virtual void SCATTERPS(Value* pDst,
+ Value* vSrc,
+ Value* vOffsets,
+ Value* vMask,
+ JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
+ Value* vGatherInput,
+ Value* vGatherOutput[],
+ bool bPackedOutput);
+void Shuffle16bpcGather4(const SWR_FORMAT_INFO& info,
+ Value* vGatherInput[],
+ Value* vGatherOutput[],
+ bool bPackedOutput);
// Static stack allocations for scatter operations
-Value* pScatterStackSrc{ nullptr };
-Value* pScatterStackOffsets{ nullptr };
+Value* pScatterStackSrc{nullptr};
+Value* pScatterStackOffsets{nullptr};
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_misc.cpp
-*
-* @brief Implementation for miscellaneous builder functions
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.cpp
+ *
+ * @brief Implementation for miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "builder.h"
#include "common/rdtsc_buckets.h"
// Extract the sign, exponent, and mantissa
uint32_t uf = *(uint32_t*)&val;
- sign = (uf & 0x80000000) >> 31;
- exp = (uf & 0x7F800000) >> 23;
- mant = uf & 0x007FFFFF;
+ sign = (uf & 0x80000000) >> 31;
+ exp = (uf & 0x7F800000) >> 23;
+ mant = uf & 0x007FFFFF;
// Check for out of range
if (std::isnan(val))
{
- exp = 0x1F;
+ exp = 0x1F;
mant = 0x200;
- sign = 1; // set the sign bit for NANs
+ sign = 1; // set the sign bit for NANs
}
else if (std::isinf(val))
{
- exp = 0x1f;
+ exp = 0x1f;
mant = 0x0;
}
else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
{
- exp = 0x1E;
+ exp = 0x1E;
mant = 0x3FF;
}
else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
mant |= 0x00800000;
for (; exp <= 0x70; mant >>= 1, exp++)
;
- exp = 0;
+ exp = 0;
mant = mant >> 13;
}
else if (exp < 0x66) // Too small to represent -> Zero
{
- exp = 0;
+ exp = 0;
mant = 0;
}
else
// Saves bits that will be shifted off for rounding
roundBits = mant & 0x1FFFu;
// convert exponent and mantissa to 16 bit format
- exp = exp - 0x70;
+ exp = exp - 0x70;
mant = mant >> 13;
// Essentially RTZ, but round up if off by only 1 lsb
{
uint32_t sign = (val & 0x8000) << 16;
uint32_t mant = (val & 0x3ff) << 13;
- uint32_t exp = (val >> 10) & 0x1f;
+ uint32_t exp = (val >> 10) & 0x1f;
if ((exp == 0) && (mant != 0)) // Adjust exponent and mantissa for denormals
{
mant <<= 1;
}
mant &= (0x3ff << 13);
}
- exp = ((exp - 15 + 127) & 0xff) << 23;
+ exp = ((exp - 15 + 127) & 0xff) << 23;
result = sign | exp | mant;
}
return *(float*)&result;
}
- Constant *Builder::C(bool i)
- {
- return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0));
- }
+ Constant* Builder::C(bool i) { return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0)); }
- Constant *Builder::C(char i)
- {
- return ConstantInt::get(IRB()->getInt8Ty(), i);
- }
+ Constant* Builder::C(char i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
- Constant *Builder::C(uint8_t i)
- {
- return ConstantInt::get(IRB()->getInt8Ty(), i);
- }
+ Constant* Builder::C(uint8_t i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
- Constant *Builder::C(int i)
- {
- return ConstantInt::get(IRB()->getInt32Ty(), i);
- }
+ Constant* Builder::C(int i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
- Constant *Builder::C(int64_t i)
- {
- return ConstantInt::get(IRB()->getInt64Ty(), i);
- }
+ Constant* Builder::C(int64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
- Constant *Builder::C(uint16_t i)
- {
- return ConstantInt::get(mInt16Ty,i);
- }
+ Constant* Builder::C(uint16_t i) { return ConstantInt::get(mInt16Ty, i); }
- Constant *Builder::C(uint32_t i)
- {
- return ConstantInt::get(IRB()->getInt32Ty(), i);
- }
+ Constant* Builder::C(uint32_t i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
- Constant *Builder::C(uint64_t i)
- {
- return ConstantInt::get(IRB()->getInt64Ty(), i);
- }
+ Constant* Builder::C(uint64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
- Constant *Builder::C(float i)
- {
- return ConstantFP::get(IRB()->getFloatTy(), i);
- }
+ Constant* Builder::C(float i) { return ConstantFP::get(IRB()->getFloatTy(), i); }
- Constant *Builder::PRED(bool pred)
+ Constant* Builder::PRED(bool pred)
{
return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0));
}
- Value *Builder::VIMMED1(int i)
+ Value* Builder::VIMMED1(int i)
{
return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
- Value *Builder::VIMMED1_16(int i)
+ Value* Builder::VIMMED1_16(int i)
{
return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
}
- Value *Builder::VIMMED1(uint32_t i)
+ Value* Builder::VIMMED1(uint32_t i)
{
return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
- Value *Builder::VIMMED1_16(uint32_t i)
+ Value* Builder::VIMMED1_16(uint32_t i)
{
return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
}
- Value *Builder::VIMMED1(float i)
+ Value* Builder::VIMMED1(float i)
{
return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
}
- Value *Builder::VIMMED1_16(float i)
+ Value* Builder::VIMMED1_16(float i)
{
return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i)));
}
- Value *Builder::VIMMED1(bool i)
+ Value* Builder::VIMMED1(bool i)
{
return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
}
- Value *Builder::VIMMED1_16(bool i)
+ Value* Builder::VIMMED1_16(bool i)
{
return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
}
- Value *Builder::VUNDEF_IPTR()
- {
- return UndefValue::get(VectorType::get(mInt32PtrTy,mVWidth));
- }
+ Value* Builder::VUNDEF_IPTR() { return UndefValue::get(VectorType::get(mInt32PtrTy, mVWidth)); }
- Value *Builder::VUNDEF(Type* t)
- {
- return UndefValue::get(VectorType::get(t, mVWidth));
- }
+ Value* Builder::VUNDEF(Type* t) { return UndefValue::get(VectorType::get(t, mVWidth)); }
- Value *Builder::VUNDEF_I()
- {
- return UndefValue::get(VectorType::get(mInt32Ty, mVWidth));
- }
+ Value* Builder::VUNDEF_I() { return UndefValue::get(VectorType::get(mInt32Ty, mVWidth)); }
- Value *Builder::VUNDEF_I_16()
- {
- return UndefValue::get(VectorType::get(mInt32Ty, mVWidth16));
- }
+ Value* Builder::VUNDEF_I_16() { return UndefValue::get(VectorType::get(mInt32Ty, mVWidth16)); }
- Value *Builder::VUNDEF_F()
- {
- return UndefValue::get(VectorType::get(mFP32Ty, mVWidth));
- }
+ Value* Builder::VUNDEF_F() { return UndefValue::get(VectorType::get(mFP32Ty, mVWidth)); }
- Value *Builder::VUNDEF_F_16()
- {
- return UndefValue::get(VectorType::get(mFP32Ty, mVWidth16));
- }
+ Value* Builder::VUNDEF_F_16() { return UndefValue::get(VectorType::get(mFP32Ty, mVWidth16)); }
- Value *Builder::VUNDEF(Type *ty, uint32_t size)
+ Value* Builder::VUNDEF(Type* ty, uint32_t size)
{
return UndefValue::get(VectorType::get(ty, size));
}
- Value *Builder::VBROADCAST(Value *src, const llvm::Twine& name)
+ Value* Builder::VBROADCAST(Value* src, const llvm::Twine& name)
{
// check if src is already a vector
if (src->getType()->isVectorTy())
return VECTOR_SPLAT(mVWidth, src, name);
}
- Value *Builder::VBROADCAST_16(Value *src)
+ Value* Builder::VBROADCAST_16(Value* src)
{
// check if src is already a vector
if (src->getType()->isVectorTy())
uint32_t Builder::IMMED(Value* v)
{
SWR_ASSERT(isa<ConstantInt>(v));
- ConstantInt *pValConst = cast<ConstantInt>(v);
+ ConstantInt* pValConst = cast<ConstantInt>(v);
return pValConst->getZExtValue();
}
int32_t Builder::S_IMMED(Value* v)
{
SWR_ASSERT(isa<ConstantInt>(v));
- ConstantInt *pValConst = cast<ConstantInt>(v);
+ ConstantInt* pValConst = cast<ConstantInt>(v);
return pValConst->getSExtValue();
}
- CallInst *Builder::CALL(Value *Callee, const std::initializer_list<Value*> &argsList, const llvm::Twine& name)
+ CallInst* Builder::CALL(Value* Callee,
+ const std::initializer_list<Value*>& argsList,
+ const llvm::Twine& name)
{
std::vector<Value*> args;
for (auto arg : argsList)
return CALLA(Callee, args, name);
}
- CallInst *Builder::CALL(Value *Callee, Value* arg)
+ CallInst* Builder::CALL(Value* Callee, Value* arg)
{
std::vector<Value*> args;
args.push_back(arg);
return CALLA(Callee, args);
}
- CallInst *Builder::CALL2(Value *Callee, Value* arg1, Value* arg2)
+ CallInst* Builder::CALL2(Value* Callee, Value* arg1, Value* arg2)
{
std::vector<Value*> args;
args.push_back(arg1);
return CALLA(Callee, args);
}
- CallInst *Builder::CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3)
+ CallInst* Builder::CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3)
{
std::vector<Value*> args;
args.push_back(arg1);
return CALLA(Callee, args);
}
- Value *Builder::VRCP(Value *va, const llvm::Twine& name)
+ Value* Builder::VRCP(Value* va, const llvm::Twine& name)
{
- return FDIV(VIMMED1(1.0f), va, name); // 1 / a
+ return FDIV(VIMMED1(1.0f), va, name); // 1 / a
}
- Value *Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY)
+ Value* Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY)
{
Value* vOut = FMADDPS(vA, vX, vC);
- vOut = FMADDPS(vB, vY, vOut);
+ vOut = FMADDPS(vB, vY, vOut);
return vOut;
}
/// result from a GEP, printing out the pointer to memory
/// @param printStr - constant string to print, which includes format specifiers
/// @param printArgs - initializer list of Value*'s to print to std out
- CallInst *Builder::PRINT(const std::string &printStr,const std::initializer_list<Value*> &printArgs)
+ CallInst* Builder::PRINT(const std::string& printStr,
+ const std::initializer_list<Value*>& printArgs)
{
// push the arguments to CallPrint into a vector
std::vector<Value*> printCallArgs;
printCallArgs.resize(1);
// search through the format string for special processing
- size_t pos = 0;
+ size_t pos = 0;
std::string tempStr(printStr);
- pos = tempStr.find('%', pos);
+ pos = tempStr.find('%', pos);
auto v = printArgs.begin();
while ((pos != std::string::npos) && (v != printArgs.end()))
{
- Value* pArg = *v;
- Type* pType = pArg->getType();
+ Value* pArg = *v;
+ Type* pType = pArg->getType();
if (pType->isVectorTy())
{
if (toupper(tempStr[pos + 1]) == 'X')
{
- tempStr[pos] = '0';
+ tempStr[pos] = '0';
tempStr[pos + 1] = 'x';
tempStr.insert(pos + 2, "%08X ");
pos += 7;
{
tempStr.insert(pos, std::string("%f "));
pos += 3;
- printCallArgs.push_back(FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
+ printCallArgs.push_back(
+ FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
}
- printCallArgs.push_back(FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
+ printCallArgs.push_back(
+ FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
}
else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy()))
{
{
tempStr.insert(pos, std::string("%d "));
pos += 3;
- printCallArgs.push_back(S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+ printCallArgs.push_back(
+ S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
}
- printCallArgs.push_back(S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+ printCallArgs.push_back(
+ S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
}
else if ((tempStr[pos + 1] == 'u') && (pContainedType->isIntegerTy()))
{
{
tempStr.insert(pos, std::string("%d "));
pos += 3;
- printCallArgs.push_back(Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+ printCallArgs.push_back(
+ Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
}
- printCallArgs.push_back(Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+ printCallArgs.push_back(
+ Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
}
}
else
}
// create global variable constant string
- Constant *constString = ConstantDataArray::getString(JM()->mContext,tempStr,true);
- GlobalVariable *gvPtr = new GlobalVariable(constString->getType(),true,GlobalValue::InternalLinkage,constString,"printStr");
+ Constant* constString = ConstantDataArray::getString(JM()->mContext, tempStr, true);
+ GlobalVariable* gvPtr = new GlobalVariable(
+ constString->getType(), true, GlobalValue::InternalLinkage, constString, "printStr");
JM()->mpCurrentModule->getGlobalList().push_back(gvPtr);
// get a pointer to the first character in the constant string array
- std::vector<Constant*> geplist{C(0),C(0)};
- Constant *strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr,geplist,false);
+ std::vector<Constant*> geplist{C(0), C(0)};
+ Constant* strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr, geplist, false);
// insert the pointer to the format string in the argument vector
printCallArgs[0] = strGEP;
// get pointer to CallPrint function and insert decl into the module if needed
std::vector<Type*> args;
- args.push_back(PointerType::get(mInt8Ty,0));
- FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext),args,true);
- Function *callPrintFn = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
+ args.push_back(PointerType::get(mInt8Ty, 0));
+ FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, true);
+ Function* callPrintFn =
+ cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
// if we haven't yet added the symbol to the symbol table
- if((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
+ if ((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
{
- sys::DynamicLibrary::AddSymbol("CallPrint", (void *)&CallPrint);
+ sys::DynamicLibrary::AddSymbol("CallPrint", (void*)&CallPrint);
}
// insert a call to CallPrint
- return CALLA(callPrintFn,printCallArgs);
+ return CALLA(callPrintFn, printCallArgs);
}
//////////////////////////////////////////////////////////////////////////
/// @brief Wrapper around PRINT with initializer list.
- CallInst* Builder::PRINT(const std::string &printStr)
- {
- return PRINT(printStr, {});
- }
+ CallInst* Builder::PRINT(const std::string& printStr) { return PRINT(printStr, {}); }
- Value *Builder::EXTRACT_16(Value *x, uint32_t imm)
+ Value* Builder::EXTRACT_16(Value* x, uint32_t imm)
{
if (imm == 0)
{
- return VSHUFFLE(x, UndefValue::get(x->getType()), { 0, 1, 2, 3, 4, 5, 6, 7 });
+ return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7});
}
else
{
- return VSHUFFLE(x, UndefValue::get(x->getType()), { 8, 9, 10, 11, 12, 13, 14, 15 });
+ return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15});
}
}
- Value *Builder::JOIN_16(Value *a, Value *b)
+ Value* Builder::JOIN_16(Value* a, Value* b)
{
- return VSHUFFLE(a, b, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+ return VSHUFFLE(a, b, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
}
//////////////////////////////////////////////////////////////////////////
/// @brief convert x86 <N x float> mask to llvm <N x i1> mask
- Value *Builder::MASK(Value *vmask)
+ Value* Builder::MASK(Value* vmask)
{
- Value *src = BITCAST(vmask, mSimdInt32Ty);
+ Value* src = BITCAST(vmask, mSimdInt32Ty);
return ICMP_SLT(src, VIMMED1(0));
}
- Value *Builder::MASK_16(Value *vmask)
+ Value* Builder::MASK_16(Value* vmask)
{
- Value *src = BITCAST(vmask, mSimd16Int32Ty);
+ Value* src = BITCAST(vmask, mSimd16Int32Ty);
return ICMP_SLT(src, VIMMED1_16(0));
}
//////////////////////////////////////////////////////////////////////////
/// @brief convert llvm <N x i1> mask to x86 <N x i32> mask
- Value *Builder::VMASK(Value *mask)
- {
- return S_EXT(mask, mSimdInt32Ty);
- }
+ Value* Builder::VMASK(Value* mask) { return S_EXT(mask, mSimdInt32Ty); }
- Value *Builder::VMASK_16(Value *mask)
- {
- return S_EXT(mask, mSimd16Int32Ty);
- }
+ Value* Builder::VMASK_16(Value* mask) { return S_EXT(mask, mSimd16Int32Ty); }
/// @brief Convert <Nxi1> llvm mask to integer
- Value *Builder::VMOVMSK(Value* mask)
+ Value* Builder::VMOVMSK(Value* mask)
{
SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
uint32_t numLanes = mask->getType()->getVectorNumElements();
- Value* i32Result;
+ Value* i32Result;
if (numLanes == 8)
{
i32Result = BITCAST(mask, mInt8Ty);
}
//////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation in LLVM IR. If not
+ /// @brief Generate a VPSHUFB operation in LLVM IR. If not
/// supported on the underlying platform, emulate it
/// @param a - 256bit SIMD(32x8bit) of 8bit integer values
/// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values
- /// Byte masks in lower 128 lane of b selects 8 bit values from lower
- /// 128bits of a, and vice versa for the upper lanes. If the mask
+ /// Byte masks in lower 128 lane of b selects 8 bit values from lower
+ /// 128bits of a, and vice versa for the upper lanes. If the mask
/// value is negative, '0' is inserted.
- Value *Builder::PSHUFB(Value* a, Value* b)
+ Value* Builder::PSHUFB(Value* a, Value* b)
{
Value* res;
// use avx2 pshufb instruction if available
- if(JM()->mArch.AVX2())
+ if (JM()->mArch.AVX2())
{
res = VPSHUFB(a, b);
}
// insert an 8 bit value from the high and low lanes of a per loop iteration
numElms /= 2;
- for(uint32_t i = 0; i < numElms; i++)
+ for (uint32_t i = 0; i < numElms; i++)
{
- ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i));
+ ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i));
ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms));
// extract values from constant mask
- char valLow128bLane = (char)(cLow128b->getSExtValue());
+ char valLow128bLane = (char)(cLow128b->getSExtValue());
char valHigh128bLane = (char)(cHigh128b->getSExtValue());
Value* insertValLow128b;
Value* insertValHigh128b;
// if the mask value is negative, insert a '0' in the respective output position
- // otherwise, lookup the value at mask position (bits 3..0 of the respective mask byte) in a and insert in output vector
- insertValLow128b = (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
- insertValHigh128b = (valHigh128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
+ // otherwise, lookup the value at mask position (bits 3..0 of the respective mask
+ // byte) in a and insert in output vector
+ insertValLow128b =
+ (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
+ insertValHigh128b = (valHigh128bLane < 0)
+ ? C((char)0)
+ : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
vShuf = VINSERT(vShuf, insertValLow128b, i);
vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms));
}
//////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
+ /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
/// bits)in LLVM IR. If not supported on the underlying platform, emulate it
- /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only
+ /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only
/// lower 8 values are used.
- Value *Builder::PMOVSXBD(Value* a)
+ Value* Builder::PMOVSXBD(Value* a)
{
// VPMOVSXBD output type
Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
}
//////////////////////////////////////////////////////////////////////////
- /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
+ /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
/// bits)in LLVM IR. If not supported on the underlying platform, emulate it
/// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
- Value *Builder::PMOVSXWD(Value* a)
+ Value* Builder::PMOVSXWD(Value* a)
{
// VPMOVSXWD output type
Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
/// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
/// in LLVM IR. If not supported on the underlying platform, emulate it
/// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
- Value *Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
+ Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
{
if (JM()->mArch.F16C())
{
}
else
{
- FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty);
- Function* pCvtPh2Ps = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy));
+ FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty);
+ Function* pCvtPh2Ps = cast<Function>(
+ JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy));
if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr)
{
- sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32", (void *)&ConvertFloat16ToFloat32);
+ sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32",
+ (void*)&ConvertFloat16ToFloat32);
}
Value* pResult = UndefValue::get(mSimdFP32Ty);
for (uint32_t i = 0; i < mVWidth; ++i)
{
- Value* pSrc = VEXTRACT(a, C(i));
+ Value* pSrc = VEXTRACT(a, C(i));
Value* pConv = CALL(pCvtPh2Ps, std::initializer_list<Value*>{pSrc});
- pResult = VINSERT(pResult, pConv, C(i));
+ pResult = VINSERT(pResult, pConv, C(i));
}
pResult->setName(name);
/// @brief Generate a VCVTPS2PH operation (float32->float16 conversion)
/// in LLVM IR. If not supported on the underlying platform, emulate it
/// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
- Value *Builder::CVTPS2PH(Value* a, Value* rounding)
+ Value* Builder::CVTPS2PH(Value* a, Value* rounding)
{
if (JM()->mArch.F16C())
{
else
{
// call scalar C function for now
- FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty);
- Function* pCvtPs2Ph = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
+ FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty);
+ Function* pCvtPs2Ph = cast<Function>(
+ JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr)
{
- sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16", (void *)&ConvertFloat32ToFloat16);
+ sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16",
+ (void*)&ConvertFloat32ToFloat16);
}
Value* pResult = UndefValue::get(mSimdInt16Ty);
for (uint32_t i = 0; i < mVWidth; ++i)
{
- Value* pSrc = VEXTRACT(a, C(i));
+ Value* pSrc = VEXTRACT(a, C(i));
Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
- pResult = VINSERT(pResult, pConv, C(i));
+ pResult = VINSERT(pResult, pConv, C(i));
}
return pResult;
}
}
- Value *Builder::PMAXSD(Value* a, Value* b)
+ Value* Builder::PMAXSD(Value* a, Value* b)
{
Value* cmp = ICMP_SGT(a, b);
return SELECT(cmp, a, b);
}
- Value *Builder::PMINSD(Value* a, Value* b)
+ Value* Builder::PMINSD(Value* a, Value* b)
{
Value* cmp = ICMP_SLT(a, b);
return SELECT(cmp, a, b);
}
- Value *Builder::PMAXUD(Value* a, Value* b)
+ Value* Builder::PMAXUD(Value* a, Value* b)
{
Value* cmp = ICMP_UGT(a, b);
return SELECT(cmp, a, b);
}
- Value *Builder::PMINUD(Value* a, Value* b)
+ Value* Builder::PMINUD(Value* a, Value* b)
{
Value* cmp = ICMP_ULT(a, b);
return SELECT(cmp, a, b);
Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
{
auto saveIP = IRB()->saveIP();
- IRB()->SetInsertPoint(&pFunc->getEntryBlock(),
- pFunc->getEntryBlock().begin());
+ IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
Value* pAlloca = ALLOCA(pType);
- if (saveIP.isSet()) IRB()->restoreIP(saveIP);
+ if (saveIP.isSet())
+ IRB()->restoreIP(saveIP);
return pAlloca;
}
Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize)
{
auto saveIP = IRB()->saveIP();
- IRB()->SetInsertPoint(&pFunc->getEntryBlock(),
- pFunc->getEntryBlock().begin());
+ IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
Value* pAlloca = ALLOCA(pType, pArraySize);
- if (saveIP.isSet()) IRB()->restoreIP(saveIP);
+ if (saveIP.isSet())
+ IRB()->restoreIP(saveIP);
return pAlloca;
}
Value* Builder::VABSPS(Value* a)
{
- Value* asInt = BITCAST(a, mSimdInt32Ty);
+ Value* asInt = BITCAST(a, mSimdInt32Ty);
Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty);
return result;
}
- Value *Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
+ Value* Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
{
- Value *lowCmp = ICMP_SLT(src, low);
- Value *ret = SELECT(lowCmp, low, src);
+ Value* lowCmp = ICMP_SLT(src, low);
+ Value* ret = SELECT(lowCmp, low, src);
- Value *highCmp = ICMP_SGT(ret, high);
- ret = SELECT(highCmp, high, ret, name);
+ Value* highCmp = ICMP_SGT(ret, high);
+ ret = SELECT(highCmp, high, ret, name);
return ret;
}
- Value *Builder::FCLAMP(Value* src, Value* low, Value* high)
+ Value* Builder::FCLAMP(Value* src, Value* low, Value* high)
{
- Value *lowCmp = FCMP_OLT(src, low);
- Value *ret = SELECT(lowCmp, low, src);
+ Value* lowCmp = FCMP_OLT(src, low);
+ Value* ret = SELECT(lowCmp, low, src);
- Value *highCmp = FCMP_OGT(ret, high);
- ret = SELECT(highCmp, high, ret);
+ Value* highCmp = FCMP_OGT(ret, high);
+ ret = SELECT(highCmp, high, ret);
return ret;
}
- Value *Builder::FCLAMP(Value* src, float low, float high)
+ Value* Builder::FCLAMP(Value* src, float low, float high)
{
Value* result = VMAXPS(src, VIMMED1(low));
- result = VMINPS(result, VIMMED1(high));
+ result = VMINPS(result, VIMMED1(high));
return result;
}
- Value *Builder::FMADDPS(Value* a, Value* b, Value* c)
+ Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
{
Value* vOut;
// use FMADs if available
- if(JM()->mArch.AVX2())
+ if (JM()->mArch.AVX2())
{
vOut = VFMADDPS(a, b, c);
}
//////////////////////////////////////////////////////////////////////////
/// @brief pop count on vector mask (e.g. <8 x i1>)
- Value* Builder::VPOPCNT(Value* a)
- {
- return POPCNT(VMOVMSK(a));
- }
+ Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); }
//////////////////////////////////////////////////////////////////////////
/// @brief C functions called by LLVM IR
//////////////////////////////////////////////////////////////////////////
- Value *Builder::VEXTRACTI128(Value* a, Constant* imm8)
+ Value* Builder::VEXTRACTI128(Value* a, Constant* imm8)
{
- bool flag = !imm8->isZeroValue();
- SmallVector<Constant*,8> idx;
- for (unsigned i = 0; i < mVWidth / 2; i++) {
+ bool flag = !imm8->isZeroValue();
+ SmallVector<Constant*, 8> idx;
+ for (unsigned i = 0; i < mVWidth / 2; i++)
+ {
idx.push_back(C(flag ? i + mVWidth / 2 : i));
}
return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
}
- Value *Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
+ Value* Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
{
- bool flag = !imm8->isZeroValue();
- SmallVector<Constant*,8> idx;
- for (unsigned i = 0; i < mVWidth; i++) {
+ bool flag = !imm8->isZeroValue();
+ SmallVector<Constant*, 8> idx;
+ for (unsigned i = 0; i < mVWidth; i++)
+ {
idx.push_back(C(i));
}
- Value *inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
+ Value* inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
- SmallVector<Constant*,8> idx2;
- for (unsigned i = 0; i < mVWidth / 2; i++) {
+ SmallVector<Constant*, 8> idx2;
+ for (unsigned i = 0; i < mVWidth / 2; i++)
+ {
idx2.push_back(C(flag ? i : i + mVWidth));
}
- for (unsigned i = mVWidth / 2; i < mVWidth; i++) {
+ for (unsigned i = mVWidth / 2; i < mVWidth; i++)
+ {
idx2.push_back(C(flag ? i + mVWidth / 2 : i));
}
return VSHUFFLE(a, inter, ConstantVector::get(idx2));
// rdtsc buckets macros
void Builder::RDTSC_START(Value* pBucketMgr, Value* pId)
{
- // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
- // buckets framework when single threaded
+ // @todo due to an issue with thread local storage propagation in llvm, we can only safely
+ // call into buckets framework when single threaded
if (KNOB_SINGLE_THREADED)
{
std::vector<Type*> args{
- PointerType::get(mInt32Ty, 0), // pBucketMgr
- mInt32Ty // id
+ PointerType::get(mInt32Ty, 0), // pBucketMgr
+ mInt32Ty // id
};
FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
- Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") == nullptr)
+ Function* pFunc = cast<Function>(
+ JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
+ if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") ==
+ nullptr)
{
- sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket", (void*)&BucketManager_StartBucket);
+ sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket",
+ (void*)&BucketManager_StartBucket);
}
- CALL(pFunc, { pBucketMgr, pId });
+ CALL(pFunc, {pBucketMgr, pId});
}
}
void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId)
{
- // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
- // buckets framework when single threaded
+ // @todo due to an issue with thread local storage propagation in llvm, we can only safely
+ // call into buckets framework when single threaded
if (KNOB_SINGLE_THREADED)
{
std::vector<Type*> args{
- PointerType::get(mInt32Ty, 0), // pBucketMgr
- mInt32Ty // id
+ PointerType::get(mInt32Ty, 0), // pBucketMgr
+ mInt32Ty // id
};
FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
- Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
- if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") == nullptr)
+ Function* pFunc = cast<Function>(
+ JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
+ if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") ==
+ nullptr)
{
- sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket", (void*)&BucketManager_StopBucket);
+ sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket",
+ (void*)&BucketManager_StopBucket);
}
- CALL(pFunc, { pBucketMgr, pId });
+ CALL(pFunc, {pBucketMgr, pId});
}
}
if (pType->isStructTy())
{
uint32_t numElems = pType->getStructNumElements();
- Type* pElemTy = pType->getStructElementType(0);
+ Type* pElemTy = pType->getStructElementType(0);
return numElems * GetTypeSize(pElemTy);
}
if (pType->isArrayTy())
{
uint32_t numElems = pType->getArrayNumElements();
- Type* pElemTy = pType->getArrayElementType();
+ Type* pElemTy = pType->getArrayElementType();
return numElems * GetTypeSize(pElemTy);
}
SWR_ASSERT(false, "Unimplemented type.");
return 0;
}
-}
+} // namespace SwrJit
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_misc.h
-*
-* @brief miscellaneous builder functions
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.h
+ *
+ * @brief miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
-Constant *C(bool i);
-Constant *C(char i);
-Constant *C(uint8_t i);
-Constant *C(int i);
-Constant *C(int64_t i);
-Constant *C(uint64_t i);
-Constant *C(uint16_t i);
-Constant *C(uint32_t i);
-Constant *C(float i);
-
-template<typename Ty>
-Constant *C(const std::initializer_list<Ty> &constList)
+Constant* C(bool i);
+Constant* C(char i);
+Constant* C(uint8_t i);
+Constant* C(int i);
+Constant* C(int64_t i);
+Constant* C(uint64_t i);
+Constant* C(uint16_t i);
+Constant* C(uint32_t i);
+Constant* C(float i);
+
+template <typename Ty>
+Constant* C(const std::initializer_list<Ty>& constList)
{
std::vector<Constant*> vConsts;
- for(auto i : constList) {
-
+ for (auto i : constList)
+ {
vConsts.push_back(C((Ty)i));
}
return ConstantVector::get(vConsts);
}
-template<typename Ty>
-Constant *CA(LLVMContext& ctx, ArrayRef<Ty> constList)
+template <typename Ty>
+Constant* CA(LLVMContext& ctx, ArrayRef<Ty> constList)
{
return ConstantDataArray::get(ctx, constList);
}
-template<typename Ty>
-Constant *CInc(uint32_t base, uint32_t count)
+template <typename Ty>
+Constant* CInc(uint32_t base, uint32_t count)
{
std::vector<Constant*> vConsts;
- for(uint32_t i = 0; i < count; i++) {
+ for (uint32_t i = 0; i < count; i++)
+ {
vConsts.push_back(C((Ty)base));
base++;
}
return ConstantVector::get(vConsts);
}
-Constant *PRED(bool pred);
+Constant* PRED(bool pred);
-Value *VIMMED1(int i);
-Value *VIMMED1_16(int i);
+Value* VIMMED1(int i);
+Value* VIMMED1_16(int i);
-Value *VIMMED1(uint32_t i);
-Value *VIMMED1_16(uint32_t i);
+Value* VIMMED1(uint32_t i);
+Value* VIMMED1_16(uint32_t i);
-Value *VIMMED1(float i);
-Value *VIMMED1_16(float i);
+Value* VIMMED1(float i);
+Value* VIMMED1_16(float i);
-Value *VIMMED1(bool i);
-Value *VIMMED1_16(bool i);
+Value* VIMMED1(bool i);
+Value* VIMMED1_16(bool i);
-Value *VUNDEF(Type* t);
+Value* VUNDEF(Type* t);
-Value *VUNDEF_F();
-Value *VUNDEF_F_16();
+Value* VUNDEF_F();
+Value* VUNDEF_F_16();
-Value *VUNDEF_I();
-Value *VUNDEF_I_16();
+Value* VUNDEF_I();
+Value* VUNDEF_I_16();
-Value *VUNDEF(Type* ty, uint32_t size);
+Value* VUNDEF(Type* ty, uint32_t size);
-Value *VUNDEF_IPTR();
+Value* VUNDEF_IPTR();
-Value *VBROADCAST(Value *src, const llvm::Twine& name = "");
-Value *VBROADCAST_16(Value *src);
+Value* VBROADCAST(Value* src, const llvm::Twine& name = "");
+Value* VBROADCAST_16(Value* src);
-Value *VRCP(Value *va, const llvm::Twine& name = "");
-Value *VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY);
+Value* VRCP(Value* va, const llvm::Twine& name = "");
+Value* VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY);
uint32_t IMMED(Value* i);
-int32_t S_IMMED(Value* i);
+int32_t S_IMMED(Value* i);
-CallInst *CALL(Value *Callee, const std::initializer_list<Value*> &args, const llvm::Twine& name = "");
-CallInst *CALL(Value *Callee) { return CALLA(Callee); }
-CallInst *CALL(Value *Callee, Value* arg);
-CallInst *CALL2(Value *Callee, Value* arg1, Value* arg2);
-CallInst *CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3);
+CallInst*
+ CALL(Value* Callee, const std::initializer_list<Value*>& args, const llvm::Twine& name = "");
+CallInst* CALL(Value* Callee)
+{
+ return CALLA(Callee);
+}
+CallInst* CALL(Value* Callee, Value* arg);
+CallInst* CALL2(Value* Callee, Value* arg1, Value* arg2);
+CallInst* CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3);
-Value *MASK(Value *vmask);
-Value *MASK_16(Value *vmask);
+Value* MASK(Value* vmask);
+Value* MASK_16(Value* vmask);
-Value *VMASK(Value *mask);
-Value *VMASK_16(Value *mask);
+Value* VMASK(Value* mask);
+Value* VMASK_16(Value* mask);
-Value *VMOVMSK(Value *mask);
+Value* VMOVMSK(Value* mask);
//////////////////////////////////////////////////////////////////////////
/// @brief functions that build IR to call x86 intrinsics directly, or
/// emulate them with other instructions if not available on the host
//////////////////////////////////////////////////////////////////////////
-Value *EXTRACT_16(Value *x, uint32_t imm);
-Value *JOIN_16(Value *a, Value *b);
+Value* EXTRACT_16(Value* x, uint32_t imm);
+Value* JOIN_16(Value* a, Value* b);
-Value *PSHUFB(Value* a, Value* b);
-Value *PMOVSXBD(Value* a);
-Value *PMOVSXWD(Value* a);
-Value *CVTPH2PS(Value* a, const llvm::Twine& name = "");
-Value *CVTPS2PH(Value* a, Value* rounding);
-Value *PMAXSD(Value* a, Value* b);
-Value *PMINSD(Value* a, Value* b);
-Value *PMAXUD(Value* a, Value* b);
-Value *PMINUD(Value* a, Value* b);
-Value *VABSPS(Value* a);
-Value *FMADDPS(Value* a, Value* b, Value* c);
+Value* PSHUFB(Value* a, Value* b);
+Value* PMOVSXBD(Value* a);
+Value* PMOVSXWD(Value* a);
+Value* CVTPH2PS(Value* a, const llvm::Twine& name = "");
+Value* CVTPS2PH(Value* a, Value* rounding);
+Value* PMAXSD(Value* a, Value* b);
+Value* PMINSD(Value* a, Value* b);
+Value* PMAXUD(Value* a, Value* b);
+Value* PMINUD(Value* a, Value* b);
+Value* VABSPS(Value* a);
+Value* FMADDPS(Value* a, Value* b, Value* c);
-Value *ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name = "");
-Value *FCLAMP(Value* src, Value* low, Value* high);
-Value *FCLAMP(Value* src, float low, float high);
+Value* ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name = "");
+Value* FCLAMP(Value* src, Value* low, Value* high);
+Value* FCLAMP(Value* src, float low, float high);
-CallInst *PRINT(const std::string &printStr);
-CallInst *PRINT(const std::string &printStr,const std::initializer_list<Value*> &printArgs);
+CallInst* PRINT(const std::string& printStr);
+CallInst* PRINT(const std::string& printStr, const std::initializer_list<Value*>& printArgs);
Value* VPOPCNT(Value* a);
-Value* INT3() { return DEBUGTRAP(); }
+Value* INT3()
+{
+ return DEBUGTRAP();
+}
-Value *VEXTRACTI128(Value* a, Constant* imm8);
-Value *VINSERTI128(Value* a, Value* b, Constant* imm8);
+Value* VEXTRACTI128(Value* a, Constant* imm8);
+Value* VINSERTI128(Value* a, Value* b, Constant* imm8);
// rdtsc buckets macros
void RDTSC_START(Value* pBucketMgr, Value* pId);
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file fetch_jit.cpp
-*
-* @brief Implementation of the fetch jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file fetch_jit.cpp
+ *
+ * @brief Implementation of the fetch jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "builder_gfx_mem.h"
#include "jit_api.h"
//////////////////////////////////////////////////////////////////////////
struct FetchJit : public BuilderGfxMem
{
- FetchJit(JitManager* pJitMgr) :
- BuilderGfxMem(pJitMgr)
- {}
+ FetchJit(JitManager* pJitMgr) : BuilderGfxMem(pJitMgr) {}
Function* Create(const FETCH_COMPILE_STATE& fetchState);
Value* GetSimdValid32bitIndices(Value* vIndices, Value* pLastIndex);
Value* GetSimdValid16bitIndices(Value* vIndices, Value* pLastIndex);
Value* GetSimdValid8bitIndices(Value* vIndices, Value* pLastIndex);
- template<typename T> Value* GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex);
+ template <typename T>
+ Value* GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex);
// package up Shuffle*bpcGatherd args into a tuple for convenience
- typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const ConversionType,
- uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
- const uint32_t(&)[4]> Shuffle8bpcArgs;
-
- void Shuffle8bpcGatherd16(Shuffle8bpcArgs &args);
- void Shuffle8bpcGatherd(Shuffle8bpcArgs &args);
-
- typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const ConversionType,
- uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs;
-
- void Shuffle16bpcGather16(Shuffle16bpcArgs &args);
- void Shuffle16bpcGather(Shuffle16bpcArgs &args);
-
- void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
-
- Value *GenerateCompCtrlVector(const ComponentControl ctrl);
-
- void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
+ typedef std::tuple<Value*&,
+ Value*,
+ const Instruction::CastOps,
+ const ConversionType,
+ uint32_t&,
+ uint32_t&,
+ const ComponentEnable,
+ const ComponentControl (&)[4],
+ Value* (&)[4],
+ const uint32_t (&)[4]>
+ Shuffle8bpcArgs;
+
+ void Shuffle8bpcGatherd16(Shuffle8bpcArgs& args);
+ void Shuffle8bpcGatherd(Shuffle8bpcArgs& args);
+
+ typedef std::tuple<Value* (&)[2],
+ Value*,
+ const Instruction::CastOps,
+ const ConversionType,
+ uint32_t&,
+ uint32_t&,
+ const ComponentEnable,
+ const ComponentControl (&)[4],
+ Value* (&)[4]>
+ Shuffle16bpcArgs;
+
+ void Shuffle16bpcGather16(Shuffle16bpcArgs& args);
+ void Shuffle16bpcGather(Shuffle16bpcArgs& args);
+
+ void StoreVertexElements(Value* pVtxOut,
+ const uint32_t outputElt,
+ const uint32_t numEltsToStore,
+ Value* (&vVertexElements)[4]);
+
+ Value* GenerateCompCtrlVector(const ComponentControl ctrl);
+
+ void JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
+ Value* streams,
+ Value* vIndices,
+ Value* pVtxOut);
bool IsOddFormat(SWR_FORMAT format);
bool IsUniformFormat(SWR_FORMAT format);
void UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4]);
- void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
- void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
+ void CreateGatherOddFormats(
+ SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
+ void ConvertFormat(SWR_FORMAT format, Value* texels[4]);
Value* mpWorkerData;
Value* mpFetchInfo;
std::stringstream fnName("FCH_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << ComputeCRC(0, &fetchState, sizeof(fetchState));
- Function* fetch = Function::Create(JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
- BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", fetch);
+ Function* fetch = Function::Create(
+ JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
+ BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", fetch);
fetch->getParent()->setModuleIdentifier(fetch->getName());
IRB()->SetInsertPoint(entry);
- auto argitr = fetch->arg_begin();
+ auto argitr = fetch->arg_begin();
// Fetch shader arguments
- Value* privateContext = &*argitr; ++argitr;
+ Value* privateContext = &*argitr;
+ ++argitr;
privateContext->setName("privateContext");
SetPrivateContext(privateContext);
- mpWorkerData = &*argitr; ++argitr;
+ mpWorkerData = &*argitr;
+ ++argitr;
mpWorkerData->setName("pWorkerData");
- mpFetchInfo = &*argitr; ++argitr;
+ mpFetchInfo = &*argitr;
+ ++argitr;
mpFetchInfo->setName("fetchInfo");
- Value* pVtxOut = &*argitr;
+ Value* pVtxOut = &*argitr;
pVtxOut->setName("vtxOutput");
uint32_t baseWidth = mVWidth;
pVtxOut = BITCAST(pVtxOut, PointerType::get(mSimdFP32Ty, 0));
// SWR_FETCH_CONTEXT::pStreams
- Value* streams = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pStreams});
+ Value* streams = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_pStreams});
streams->setName("pStreams");
// SWR_FETCH_CONTEXT::pIndices
- Value* indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpIndices});
+ Value* indices = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_xpIndices});
indices->setName("pIndices");
// SWR_FETCH_CONTEXT::pLastIndex
- Value* pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpLastIndex});
+ Value* pLastIndex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_xpLastIndex});
pLastIndex->setName("pLastIndex");
Value* vIndices;
- switch(fetchState.indexType)
+ switch (fetchState.indexType)
{
- case R8_UINT:
- indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
- if(fetchState.bDisableIndexOOBCheck)
- {
- vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
- vIndices = Z_EXT(vIndices, mSimdInt32Ty);
- }
- else
- {
- vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
- }
- break;
- case R16_UINT:
- if(fetchState.bDisableIndexOOBCheck)
- {
- vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
- vIndices = Z_EXT(vIndices, mSimdInt32Ty);
- }
- else
- {
- vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
- }
- break;
- case R32_UINT:
- (fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH)
- : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
- break; // incoming type is already 32bit int
- default:
- SWR_INVALID("Unsupported index type");
- vIndices = nullptr;
- break;
+ case R8_UINT:
+ indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
+ if (fetchState.bDisableIndexOOBCheck)
+ {
+ vIndices = LOAD(
+ BITCAST(indices, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)),
+ {(uint32_t)0});
+ vIndices = Z_EXT(vIndices, mSimdInt32Ty);
+ }
+ else
+ {
+ vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
+ }
+ break;
+ case R16_UINT:
+ if (fetchState.bDisableIndexOOBCheck)
+ {
+ vIndices = LOAD(
+ BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)),
+ {(uint32_t)0});
+ vIndices = Z_EXT(vIndices, mSimdInt32Ty);
+ }
+ else
+ {
+ vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
+ }
+ break;
+ case R32_UINT:
+ (fetchState.bDisableIndexOOBCheck)
+ ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH)
+ : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
+ break; // incoming type is already 32bit int
+ default:
+ SWR_INVALID("Unsupported index type");
+ vIndices = nullptr;
+ break;
}
- if(fetchState.bForceSequentialAccessEnable)
+ if (fetchState.bForceSequentialAccessEnable)
{
- Value* pOffsets = mVWidth == 8 ? C({ 0, 1, 2, 3, 4, 5, 6, 7 }) :
- C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+ Value* pOffsets = mVWidth == 8 ? C({0, 1, 2, 3, 4, 5, 6, 7})
+ : C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
// VertexData buffers are accessed sequentially, the index is equal to the vertex number
- vIndices = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex }));
+ vIndices = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}));
vIndices = ADD(vIndices, pOffsets);
}
Value* vVertexId = vIndices;
if (fetchState.bVertexIDOffsetEnable)
{
- // Assuming one of baseVertex or startVertex is 0, so adding both should be functionally correct
- Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_BaseVertex }));
- Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex }));
- vVertexId = ADD(vIndices, vBaseVertex);
- vVertexId = ADD(vVertexId, vStartVertex);
+ // Assuming one of baseVertex or startVertex is 0, so adding both should be functionally
+ // correct
+ Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex}));
+ Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}));
+ vVertexId = ADD(vIndices, vBaseVertex);
+ vVertexId = ADD(vVertexId, vStartVertex);
}
// store out vertex IDs
// store out in simd8 halves until core supports 16-wide natively
auto vVertexIdLo = EXTRACT_16(vVertexId, 0);
auto vVertexIdHi = EXTRACT_16(vVertexId, 1);
- STORE(vVertexIdLo, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
- STORE(vVertexIdHi, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 }));
+ STORE(vVertexIdLo, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID}));
+ STORE(vVertexIdHi, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID2}));
}
else if (mVWidth == 8)
{
- STORE(vVertexId, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+ STORE(vVertexId, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID}));
}
// store out cut mask if enabled
if (fetchState.bEnableCutIndex)
{
Value* vCutIndex = VIMMED1(fetchState.cutIndex);
- Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex));
-
+ Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex));
+
if (mVWidth == 16)
{
auto cutMaskLo = EXTRACT_16(cutMask, 0);
auto cutMaskHi = EXTRACT_16(cutMask, 1);
- STORE(cutMaskLo, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask }));
- STORE(cutMaskHi, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask2 }));
+ STORE(cutMaskLo, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask}));
+ STORE(cutMaskHi, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask2}));
}
else if (mVWidth == 8)
{
- STORE(cutMask, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask }));
+ STORE(cutMask, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask}));
}
}
#if USE_SIMD16_SHADERS
SetTargetWidth(baseWidth);
#endif
-
+
return fetch;
}
// format is uniform if all components are the same size and type
bool FetchJit::IsUniformFormat(SWR_FORMAT format)
{
- const SWR_FORMAT_INFO& info = GetFormatInfo(format);
- uint32_t bpc0 = info.bpc[0];
- uint32_t type0 = info.type[0];
+ const SWR_FORMAT_INFO& info = GetFormatInfo(format);
+ uint32_t bpc0 = info.bpc[0];
+ uint32_t type0 = info.type[0];
for (uint32_t c = 1; c < info.numComps; ++c)
{
for (uint32_t c = 0; c < info.numComps; ++c)
{
uint32_t swizzledIndex = info.swizzle[c];
- uint32_t compBits = info.bpc[c];
- uint32_t bitmask = ((1 << compBits) - 1) << bitOffset;
- Value* comp = AND(vInput, bitmask);
- comp = LSHR(comp, bitOffset);
+ uint32_t compBits = info.bpc[c];
+ uint32_t bitmask = ((1 << compBits) - 1) << bitOffset;
+ Value* comp = AND(vInput, bitmask);
+ comp = LSHR(comp, bitOffset);
result[swizzledIndex] = comp;
bitOffset += compBits;
// gather for odd component size formats
// gather SIMD full pixels per lane then shift/mask to move each component to their
// own vector
-void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4])
+void FetchJit::CreateGatherOddFormats(
+ SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4])
{
- const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+ const SWR_FORMAT_INFO& info = GetFormatInfo(format);
// only works if pixel size is <= 32bits
SWR_ASSERT(info.bpp <= 32);
- Value *pGather;
+ Value* pGather;
if (info.bpp == 32)
{
pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask);
else
{
// Can't use 32-bit gather for items less than 32-bits, could cause page faults.
- Value *pMem = ALLOCA(mSimdInt32Ty);
+ Value* pMem = ALLOCA(mSimdInt32Ty);
STORE(VIMMED1(0u), pMem);
- pBase = BITCAST(pBase, PointerType::get(mInt8Ty, 0));
+ pBase = BITCAST(pBase, PointerType::get(mInt8Ty, 0));
Value* pDstMem = BITCAST(pMem, mInt32PtrTy);
for (uint32_t lane = 0; lane < mVWidth; ++lane)
{
// Get index
Value* index = VEXTRACT(pOffsets, C(lane));
- Value* mask = VEXTRACT(pMask, C(lane));
+ Value* mask = VEXTRACT(pMask, C(lane));
switch (info.bpp)
{
case 8:
pResult[3] = BITCAST(pResult[3], mSimdFP32Ty);
}
-void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
+void FetchJit::ConvertFormat(SWR_FORMAT format, Value* texels[4])
{
- const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+ const SWR_FORMAT_INFO& info = GetFormatInfo(format);
for (uint32_t c = 0; c < info.numComps; ++c)
{
{
if (info.type[c] == SWR_TYPE_SNORM)
{
- /// @todo The most-negative value maps to -1.0f. e.g. the 5-bit value 10000 maps to -1.0f.
+ /// @todo The most-negative value maps to -1.0f. e.g. the 5-bit value 10000 maps to
+ /// -1.0f.
/// result = c * (1.0f / (2^(n-1) - 1);
- uint32_t n = info.bpc[c];
- uint32_t pow2 = 1 << (n - 1);
- float scale = 1.0f / (float)(pow2 - 1);
- Value *vScale = VIMMED1(scale);
+ uint32_t n = info.bpc[c];
+ uint32_t pow2 = 1 << (n - 1);
+ float scale = 1.0f / (float)(pow2 - 1);
+ Value* vScale = VIMMED1(scale);
texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
texels[compIndex] = FMUL(texels[compIndex], vScale);
SWR_ASSERT(info.type[c] == SWR_TYPE_UNORM);
/// result = c * (1.0f / (2^n - 1))
- uint32_t n = info.bpc[c];
+ uint32_t n = info.bpc[c];
uint32_t pow2 = 1 << n;
- // special case 24bit unorm format, which requires a full divide to meet ULP requirement
+ // special case 24bit unorm format, which requires a full divide to meet ULP
+ // requirement
if (n == 24)
{
- float scale = (float)(pow2 - 1);
- Value* vScale = VIMMED1(scale);
+ float scale = (float)(pow2 - 1);
+ Value* vScale = VIMMED1(scale);
texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
texels[compIndex] = FDIV(texels[compIndex], vScale);
}
else
{
- float scale = 1.0f / (float)(pow2 - 1);
- Value *vScale = VIMMED1(scale);
+ float scale = 1.0f / (float)(pow2 - 1);
+ Value* vScale = VIMMED1(scale);
texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
texels[compIndex] = UI_TO_FP(texels[compIndex], mSimdFP32Ty);
texels[compIndex] = FMUL(texels[compIndex], vScale);
/// @param streams - value pointer to the current vertex stream
/// @param vIndices - vector value of indices to gather
/// @param pVtxOut - value pointer to output simdvertex struct
-void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
- Value* streams, Value* vIndices, Value* pVtxOut)
+void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
+ Value* streams,
+ Value* vIndices,
+ Value* pVtxOut)
{
uint32_t currentVertexElement = 0;
- uint32_t outputElt = 0;
- Value* vVertexElements[4];
+ uint32_t outputElt = 0;
+ Value* vVertexElements[4];
- Value* startVertex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex});
+ Value* startVertex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex});
Value* startInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartInstance});
- Value* curInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance});
- Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_BaseVertex }));
+ Value* curInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance});
+ Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex}));
curInstance->setName("curInstance");
for (uint32_t nInputElt = 0; nInputElt < fetchState.numAttribs; nInputElt += 1)
continue;
}
- const SWR_FORMAT_INFO &info = GetFormatInfo((SWR_FORMAT)ied.Format);
+ const SWR_FORMAT_INFO& info = GetFormatInfo((SWR_FORMAT)ied.Format);
SWR_ASSERT((info.bpp != 0), "Unsupported format in JitGatherVertices.");
- uint32_t bpc = info.bpp / info.numComps; ///@todo Code below assumes all components are same size. Need to fix.
+ uint32_t bpc =
+ info.bpp /
+ info.numComps; ///@todo Code below assumes all components are same size. Need to fix.
- Value *stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
+ Value* stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
// VGATHER* takes an *i8 src pointer
- Value *pStreamBase = INT_TO_PTR(stream, PointerType::get(mInt8Ty, 0));
+ Value* pStreamBase = INT_TO_PTR(stream, PointerType::get(mInt8Ty, 0));
- Value *stride = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
- Value *vStride = VBROADCAST(stride);
+ Value* stride = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
+ Value* vStride = VBROADCAST(stride);
// max vertex index that is fully in bounds
- Value *maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
- maxVertex = LOAD(maxVertex);
+ Value* maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
+ maxVertex = LOAD(maxVertex);
- Value *minVertex = NULL;
+ Value* minVertex = NULL;
if (fetchState.bPartialVertexBuffer)
{
// min vertex index for low bounds OOB checking
curInstance = ADD(curInstance, startInstance);
}
- Value *vCurIndices;
- Value *startOffset;
- Value *vInstanceStride = VIMMED1(0);
+ Value* vCurIndices;
+ Value* startOffset;
+ Value* vInstanceStride = VIMMED1(0);
if (ied.InstanceEnable)
{
// prevent a div by 0 for 0 step rate
Value* isNonZeroStep = ICMP_UGT(stepRate, C(0));
- stepRate = SELECT(isNonZeroStep, stepRate, C(1));
+ stepRate = SELECT(isNonZeroStep, stepRate, C(1));
// calc the current offset into instanced data buffer
Value* calcInstance = UDIV(curInstance, stepRate);
}
else if (ied.InstanceStrideEnable)
{
- // grab the instance advancement state, determines stride in bytes from one instance to the next
+ // grab the instance advancement state, determines stride in bytes from one instance to
+ // the next
Value* stepRate = C(ied.InstanceAdvancementState);
vInstanceStride = VBROADCAST(MUL(curInstance, stepRate));
startOffset = startVertex;
}
- // All of the OOB calculations are in vertices, not VB offsets, to prevent having to
+ // All of the OOB calculations are in vertices, not VB offsets, to prevent having to
// do 64bit address offset calculations.
// calculate byte offset to the start of the VB
- Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
- pStreamBase = GEP(pStreamBase, baseOffset);
+ Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
+ pStreamBase = GEP(pStreamBase, baseOffset);
Value* pStreamBaseGFX = ADD(stream, baseOffset);
// if we have a start offset, subtract from max vertex. Used for OOB check
- maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
+ maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0));
// if we have a negative value, we're already OOB. clamp at 0.
maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty));
if (fetchState.bPartialVertexBuffer)
{
// similary for min vertex
- minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
- Value *minNeg = ICMP_SLT(minVertex, C((int64_t)0));
- minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
+ minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
+ Value* minNeg = ICMP_SLT(minVertex, C((int64_t)0));
+ minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
}
// Load the in bounds size of a partially valid vertex
- Value *partialInboundsSize = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
- partialInboundsSize = LOAD(partialInboundsSize);
- Value *vPartialVertexSize = VBROADCAST(partialInboundsSize);
- Value *vBpp = VBROADCAST(C(info.Bpp));
- Value *vAlignmentOffsets = VBROADCAST(C(ied.AlignedByteOffset));
+ Value* partialInboundsSize =
+ GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
+ partialInboundsSize = LOAD(partialInboundsSize);
+ Value* vPartialVertexSize = VBROADCAST(partialInboundsSize);
+ Value* vBpp = VBROADCAST(C(info.Bpp));
+ Value* vAlignmentOffsets = VBROADCAST(C(ied.AlignedByteOffset));
// is the element is <= the partially valid size
- Value *vElementInBoundsMask = ICMP_SLE(vBpp, SUB(vPartialVertexSize, vAlignmentOffsets));
+ Value* vElementInBoundsMask = ICMP_SLE(vBpp, SUB(vPartialVertexSize, vAlignmentOffsets));
// override cur indices with 0 if pitch is 0
Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0));
- vCurIndices = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices);
+ vCurIndices = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices);
// are vertices partially OOB?
- Value* vMaxVertex = VBROADCAST(maxVertex);
+ Value* vMaxVertex = VBROADCAST(maxVertex);
Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex);
// are vertices fully in bounds?
Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
- Value *vGatherMask;
+ Value* vGatherMask;
if (fetchState.bPartialVertexBuffer)
{
// are vertices below minVertex limit?
- Value *vMinVertex = VBROADCAST(minVertex);
- Value *vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
+ Value* vMinVertex = VBROADCAST(minVertex);
+ Value* vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
// only fetch lanes that pass both tests
vGatherMask = AND(vMaxGatherMask, vMinGatherMask);
// calculate the actual offsets into the VB
Value* vOffsets = MUL(vCurIndices, vStride);
- vOffsets = ADD(vOffsets, vAlignmentOffsets);
+ vOffsets = ADD(vOffsets, vAlignmentOffsets);
// if instance stride enable is:
// true - add product of the instanceID and advancement state to the offst into the VB
// false - value of vInstanceStride has been initialialized to zero
vOffsets = ADD(vOffsets, vInstanceStride);
- // Packing and component control
- ComponentEnable compMask = (ComponentEnable)ied.ComponentPacking;
- const ComponentControl compCtrl[4] { (ComponentControl)ied.ComponentControl0, (ComponentControl)ied.ComponentControl1,
- (ComponentControl)ied.ComponentControl2, (ComponentControl)ied.ComponentControl3};
+ // Packing and component control
+ ComponentEnable compMask = (ComponentEnable)ied.ComponentPacking;
+ const ComponentControl compCtrl[4]{(ComponentControl)ied.ComponentControl0,
+ (ComponentControl)ied.ComponentControl1,
+ (ComponentControl)ied.ComponentControl2,
+ (ComponentControl)ied.ComponentControl3};
// Special gather/conversion for formats without equal component sizes
if (IsOddFormat((SWR_FORMAT)ied.Format))
{
- Value *pResults[4];
- CreateGatherOddFormats((SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults);
+ Value* pResults[4];
+ CreateGatherOddFormats(
+ (SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults);
ConvertFormat((SWR_FORMAT)ied.Format, pResults);
for (uint32_t c = 0; c < 4; c += 1)
}
}
}
- else if(info.type[0] == SWR_TYPE_FLOAT)
+ else if (info.type[0] == SWR_TYPE_FLOAT)
{
///@todo: support 64 bit vb accesses
- Value *gatherSrc = VIMMED1(0.0f);
+ Value* gatherSrc = VIMMED1(0.0f);
- SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
- "Unsupported format for standard gather fetch.");
+ SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
+ "Unsupported format for standard gather fetch.");
// Gather components from memory to store in a simdvertex structure
switch (bpc)
{
- case 16:
- {
- Value *vGatherResult[2];
+ case 16:
+ {
+ Value* vGatherResult[2];
- // if we have at least one component out of x or y to fetch
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
- {
- vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
- // e.g. result of first 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
- //
- }
+ // if we have at least one component out of x or y to fetch
+ if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
+ {
+ vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ // e.g. result of first 8x32bit integer gather for 16bit components
+ // 256i - 0 1 2 3 4 5 6 7
+ // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
+ //
+ }
- // if we have at least one component out of z or w to fetch
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
- {
- // offset base to the next components(zw) in the vertex to gather
- pStreamBase = GEP(pStreamBase, C((char)4));
-
- vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
- // e.g. result of second 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
- //
- }
+ // if we have at least one component out of z or w to fetch
+ if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
+ {
+ // offset base to the next components(zw) in the vertex to gather
+ pStreamBase = GEP(pStreamBase, C((char)4));
- // if we have at least one component to shuffle into place
- if (compMask)
- {
- Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, Instruction::CastOps::FPExt, CONVERT_NONE,
- currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
+ vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ // e.g. result of second 8x32bit integer gather for 16bit components
+ // 256i - 0 1 2 3 4 5 6 7
+ // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
+ //
+ }
- // Shuffle gathered components into place in simdvertex struct
- mVWidth == 16 ? Shuffle16bpcGather16(args) : Shuffle16bpcGather(args); // outputs to vVertexElements ref
- }
+ // if we have at least one component to shuffle into place
+ if (compMask)
+ {
+ Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult,
+ pVtxOut,
+ Instruction::CastOps::FPExt,
+ CONVERT_NONE,
+ currentVertexElement,
+ outputElt,
+ compMask,
+ compCtrl,
+ vVertexElements);
+
+ // Shuffle gathered components into place in simdvertex struct
+ mVWidth == 16 ? Shuffle16bpcGather16(args)
+ : Shuffle16bpcGather(args); // outputs to vVertexElements ref
}
- break;
- case 32:
+ }
+ break;
+ case 32:
+ {
+ for (uint32_t i = 0; i < 4; i += 1)
{
- for (uint32_t i = 0; i < 4; i += 1)
+ if (isComponentEnabled(compMask, i))
{
- if (isComponentEnabled(compMask, i))
+ // if we need to gather the component
+ if (compCtrl[i] == StoreSrc)
{
- // if we need to gather the component
- if (compCtrl[i] == StoreSrc)
- {
- // Gather a SIMD of vertices
- // APIs allow a 4GB range for offsets
- // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
- // But, we know that elements must be aligned for FETCH. :)
- // Right shift the offset by a bit and then scale by 2 to remove the sign extension.
- Value *vShiftedOffsets = LSHR(vOffsets, 1);
- vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBaseGFX, vShiftedOffsets, vGatherMask, 2, GFX_MEM_CLIENT_FETCH);
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
+ // Gather a SIMD of vertices
+ // APIs allow a 4GB range for offsets
+ // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
+ // But, we know that elements must be aligned for FETCH. :)
+ // Right shift the offset by a bit and then scale by 2 to remove the
+ // sign extension.
+ Value* vShiftedOffsets = LSHR(vOffsets, 1);
+ vVertexElements[currentVertexElement++] =
+ GATHERPS(gatherSrc,
+ pStreamBaseGFX,
+ vShiftedOffsets,
+ vGatherMask,
+ 2,
+ GFX_MEM_CLIENT_FETCH);
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
+ GenerateCompCtrlVector(compCtrl[i]);
}
- // offset base to the next component in the vertex to gather
- pStreamBase = GEP(pStreamBase, C((char)4));
- pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
}
+
+ // offset base to the next component in the vertex to gather
+ pStreamBase = GEP(pStreamBase, C((char)4));
+ pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
}
- break;
- case 64:
+ }
+ break;
+ case 64:
+ {
+ for (uint32_t i = 0; i < 4; i += 1)
{
- for (uint32_t i = 0; i < 4; i += 1)
+ if (isComponentEnabled(compMask, i))
{
- if (isComponentEnabled(compMask, i))
+ // if we need to gather the component
+ if (compCtrl[i] == StoreSrc)
{
- // if we need to gather the component
- if (compCtrl[i] == StoreSrc)
- {
- Value* vShufLo;
- Value* vShufHi;
- Value* vShufAll;
+ Value* vShufLo;
+ Value* vShufHi;
+ Value* vShufAll;
- if (mVWidth == 8)
- {
- vShufLo = C({ 0, 1, 2, 3 });
- vShufHi = C({ 4, 5, 6, 7 });
- vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
- }
- else
- {
- SWR_ASSERT(mVWidth == 16);
- vShufLo = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
- vShufHi = C({ 8, 9, 10, 11, 12, 13, 14, 15 });
- vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
- }
+ if (mVWidth == 8)
+ {
+ vShufLo = C({0, 1, 2, 3});
+ vShufHi = C({4, 5, 6, 7});
+ vShufAll = C({0, 1, 2, 3, 4, 5, 6, 7});
+ }
+ else
+ {
+ SWR_ASSERT(mVWidth == 16);
+ vShufLo = C({0, 1, 2, 3, 4, 5, 6, 7});
+ vShufHi = C({8, 9, 10, 11, 12, 13, 14, 15});
+ vShufAll =
+ C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
+ }
- Value *vMaskLo = VSHUFFLE(vGatherMask, vGatherMask, vShufLo);
- Value *vMaskHi = VSHUFFLE(vGatherMask, vGatherMask, vShufHi);
+ Value* vMaskLo = VSHUFFLE(vGatherMask, vGatherMask, vShufLo);
+ Value* vMaskHi = VSHUFFLE(vGatherMask, vGatherMask, vShufHi);
- Value *vOffsetsLo = VSHUFFLE(vOffsets, vOffsets, vShufLo);
- Value *vOffsetsHi = VSHUFFLE(vOffsets, vOffsets, vShufHi);
+ Value* vOffsetsLo = VSHUFFLE(vOffsets, vOffsets, vShufLo);
+ Value* vOffsetsHi = VSHUFFLE(vOffsets, vOffsets, vShufHi);
- Value *vZeroDouble = VECTOR_SPLAT(mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
+ Value* vZeroDouble = VECTOR_SPLAT(
+ mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
- Value* pGatherLo = GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo);
- Value* pGatherHi = GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi);
+ Value* pGatherLo =
+ GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo);
+ Value* pGatherHi =
+ GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi);
- pGatherLo = VCVTPD2PS(pGatherLo);
- pGatherHi = VCVTPD2PS(pGatherHi);
+ pGatherLo = VCVTPD2PS(pGatherLo);
+ pGatherHi = VCVTPD2PS(pGatherHi);
- Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, vShufAll);
+ Value* pGather = VSHUFFLE(pGatherLo, pGatherHi, vShufAll);
- vVertexElements[currentVertexElement++] = pGather;
- }
- else
- {
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
- }
-
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
+ vVertexElements[currentVertexElement++] = pGather;
+ }
+ else
+ {
+ vVertexElements[currentVertexElement++] =
+ GenerateCompCtrlVector(compCtrl[i]);
}
- // offset base to the next component in the vertex to gather
- pStreamBase = GEP(pStreamBase, C((char)8));
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
}
+
+ // offset base to the next component in the vertex to gather
+ pStreamBase = GEP(pStreamBase, C((char)8));
}
- break;
- default:
- SWR_INVALID("Tried to fetch invalid FP format");
- break;
+ }
+ break;
+ default:
+ SWR_INVALID("Tried to fetch invalid FP format");
+ break;
}
}
else
{
Instruction::CastOps extendCastType = Instruction::CastOps::CastOpsEnd;
- ConversionType conversionType = CONVERT_NONE;
+ ConversionType conversionType = CONVERT_NONE;
- SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
- "Unsupported format for standard gather fetch.");
+ SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
+ "Unsupported format for standard gather fetch.");
- switch(info.type[0])
+ switch (info.type[0])
{
- case SWR_TYPE_UNORM:
- conversionType = CONVERT_NORMALIZED;
- case SWR_TYPE_UINT:
- extendCastType = Instruction::CastOps::ZExt;
- break;
- case SWR_TYPE_SNORM:
- conversionType = CONVERT_NORMALIZED;
- case SWR_TYPE_SINT:
- extendCastType = Instruction::CastOps::SExt;
- break;
- case SWR_TYPE_USCALED:
- conversionType = CONVERT_USCALED;
- extendCastType = Instruction::CastOps::UIToFP;
- break;
- case SWR_TYPE_SSCALED:
- conversionType = CONVERT_SSCALED;
- extendCastType = Instruction::CastOps::SIToFP;
- break;
- case SWR_TYPE_SFIXED:
- conversionType = CONVERT_SFIXED;
- extendCastType = Instruction::CastOps::SExt;
- break;
- default:
- break;
+ case SWR_TYPE_UNORM:
+ conversionType = CONVERT_NORMALIZED;
+ case SWR_TYPE_UINT:
+ extendCastType = Instruction::CastOps::ZExt;
+ break;
+ case SWR_TYPE_SNORM:
+ conversionType = CONVERT_NORMALIZED;
+ case SWR_TYPE_SINT:
+ extendCastType = Instruction::CastOps::SExt;
+ break;
+ case SWR_TYPE_USCALED:
+ conversionType = CONVERT_USCALED;
+ extendCastType = Instruction::CastOps::UIToFP;
+ break;
+ case SWR_TYPE_SSCALED:
+ conversionType = CONVERT_SSCALED;
+ extendCastType = Instruction::CastOps::SIToFP;
+ break;
+ case SWR_TYPE_SFIXED:
+ conversionType = CONVERT_SFIXED;
+ extendCastType = Instruction::CastOps::SExt;
+ break;
+ default:
+ break;
}
// value substituted when component of gather is masked
// Gather components from memory to store in a simdvertex structure
switch (bpc)
{
- case 8:
+ case 8:
+ {
+ // if we have at least one component to fetch
+ if (compMask)
{
- // if we have at least one component to fetch
- if (compMask)
- {
- Value *vGatherResult = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
- // e.g. result of an 8x32bit integer gather for 8bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
-
- Shuffle8bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
- currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, info.swizzle);
-
- // Shuffle gathered components into place in simdvertex struct
- mVWidth == 16 ? Shuffle8bpcGatherd16(args) : Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
- }
+ Value* vGatherResult = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ // e.g. result of an 8x32bit integer gather for 8bit components
+ // 256i - 0 1 2 3 4 5 6 7
+ // xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
+
+ Shuffle8bpcArgs args = std::forward_as_tuple(vGatherResult,
+ pVtxOut,
+ extendCastType,
+ conversionType,
+ currentVertexElement,
+ outputElt,
+ compMask,
+ compCtrl,
+ vVertexElements,
+ info.swizzle);
+
+ // Shuffle gathered components into place in simdvertex struct
+ mVWidth == 16 ? Shuffle8bpcGatherd16(args)
+ : Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
}
- break;
- case 16:
- {
- Value *vGatherResult[2];
+ }
+ break;
+ case 16:
+ {
+ Value* vGatherResult[2];
- // if we have at least one component out of x or y to fetch
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
- {
- vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
- // e.g. result of first 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
- //
- }
+ // if we have at least one component out of x or y to fetch
+ if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
+ {
+ vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ // e.g. result of first 8x32bit integer gather for 16bit components
+ // 256i - 0 1 2 3 4 5 6 7
+ // xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
+ //
+ }
- // if we have at least one component out of z or w to fetch
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
- {
- // offset base to the next components(zw) in the vertex to gather
- pStreamBase = GEP(pStreamBase, C((char)4));
-
- vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
- // e.g. result of second 8x32bit integer gather for 16bit components
- // 256i - 0 1 2 3 4 5 6 7
- // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
- //
- }
+ // if we have at least one component out of z or w to fetch
+ if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
+ {
+ // offset base to the next components(zw) in the vertex to gather
+ pStreamBase = GEP(pStreamBase, C((char)4));
- // if we have at least one component to shuffle into place
- if (compMask)
- {
- Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
- currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
+ vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+ // e.g. result of second 8x32bit integer gather for 16bit components
+ // 256i - 0 1 2 3 4 5 6 7
+ // zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
+ //
+ }
- // Shuffle gathered components into place in simdvertex struct
- mVWidth == 16 ? Shuffle16bpcGather16(args) : Shuffle16bpcGather(args); // outputs to vVertexElements ref
- }
+ // if we have at least one component to shuffle into place
+ if (compMask)
+ {
+ Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult,
+ pVtxOut,
+ extendCastType,
+ conversionType,
+ currentVertexElement,
+ outputElt,
+ compMask,
+ compCtrl,
+ vVertexElements);
+
+ // Shuffle gathered components into place in simdvertex struct
+ mVWidth == 16 ? Shuffle16bpcGather16(args)
+ : Shuffle16bpcGather(args); // outputs to vVertexElements ref
}
- break;
- case 32:
+ }
+ break;
+ case 32:
+ {
+ // Gathered components into place in simdvertex struct
+ for (uint32_t i = 0; i < 4; i++)
{
- // Gathered components into place in simdvertex struct
- for (uint32_t i = 0; i < 4; i++)
+ if (isComponentEnabled(compMask, i))
{
- if (isComponentEnabled(compMask, i))
+ // if we need to gather the component
+ if (compCtrl[i] == StoreSrc)
{
- // if we need to gather the component
- if (compCtrl[i] == StoreSrc)
+ Value* pGather =
+ GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+
+ if (conversionType == CONVERT_USCALED)
{
- Value* pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
-
- if (conversionType == CONVERT_USCALED)
- {
- pGather = UI_TO_FP(pGather, mSimdFP32Ty);
- }
- else if (conversionType == CONVERT_SSCALED)
- {
- pGather = SI_TO_FP(pGather, mSimdFP32Ty);
- }
- else if (conversionType == CONVERT_SFIXED)
- {
- pGather = FMUL(SI_TO_FP(pGather, mSimdFP32Ty), VBROADCAST(C(1/65536.0f)));
- }
-
- vVertexElements[currentVertexElement++] = pGather;
-
- // e.g. result of a single 8x32bit integer gather for 32bit components
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
+ pGather = UI_TO_FP(pGather, mSimdFP32Ty);
}
- else
+ else if (conversionType == CONVERT_SSCALED)
{
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+ pGather = SI_TO_FP(pGather, mSimdFP32Ty);
}
-
- if (currentVertexElement > 3)
+ else if (conversionType == CONVERT_SFIXED)
{
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
-
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
+ pGather = FMUL(SI_TO_FP(pGather, mSimdFP32Ty),
+ VBROADCAST(C(1 / 65536.0f)));
}
+ vVertexElements[currentVertexElement++] = pGather;
+
+ // e.g. result of a single 8x32bit integer gather for 32bit components
+ // 256i - 0 1 2 3 4 5 6 7
+ // xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
}
+ else
+ {
+ vVertexElements[currentVertexElement++] =
+ GenerateCompCtrlVector(compCtrl[i]);
+ }
+
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // offset base to the next component in the vertex to gather
- pStreamBase = GEP(pStreamBase, C((char)4));
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
+ }
}
+
+ // offset base to the next component in the vertex to gather
+ pStreamBase = GEP(pStreamBase, C((char)4));
}
- break;
+ }
+ break;
}
}
}
}
}
-template<typename T> Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
+template <typename T>
+Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
{
- SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters.");
+ SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty,
+ "Function expects gfxptr_t for both input parameters.");
Type* Ty = nullptr;
- static_assert(sizeof(T) == sizeof(uint16_t) || sizeof(T) == sizeof(uint8_t), "Unsupported type for use with GetSimdValidIndicesHelper<T>");
+ static_assert(sizeof(T) == sizeof(uint16_t) || sizeof(T) == sizeof(uint8_t),
+ "Unsupported type for use with GetSimdValidIndicesHelper<T>");
constexpr bool bSize = (sizeof(T) == sizeof(uint16_t));
if (bSize)
{
for (int64_t lane = 0; lane < mVWidth; lane++)
{
// Calculate the address of the requested index
- Value *pIndex = GEP(pIndices, C(lane), Ty);
+ Value* pIndex = GEP(pIndices, C(lane), Ty);
pLastIndex = INT_TO_PTR(pLastIndex, Ty);
- // check if the address is less than the max index,
+ // check if the address is less than the max index,
Value* mask = ICMP_ULT(pIndex, pLastIndex);
// if valid, load the index. if not, load 0 from the stack
Value* pValid = SELECT(mask, pIndex, pZeroIndex);
- Value *index = LOAD(pValid, "valid index", Ty, GFX_MEM_CLIENT_FETCH);
+ Value* index = LOAD(pValid, "valid index", Ty, GFX_MEM_CLIENT_FETCH);
// zero extended index to 32 bits and insert into the correct simd lane
- index = Z_EXT(index, mInt32Ty);
+ index = Z_EXT(index, mInt32Ty);
vIndices = VINSERT(vIndices, index, lane);
}
}
Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
{
DataLayout dL(JM()->mpCurrentModule);
- Value* iLastIndex = pLastIndex;
- Value* iIndices = pIndices;
+ Value* iLastIndex = pLastIndex;
+ Value* iIndices = pIndices;
// get the number of indices left in the buffer (endPtr - curPtr) / sizeof(index)
- Value* numIndicesLeft = SUB(iLastIndex,iIndices);
- numIndicesLeft = TRUNC(numIndicesLeft, mInt32Ty);
- numIndicesLeft = SDIV(numIndicesLeft, C(4));
+ Value* numIndicesLeft = SUB(iLastIndex, iIndices);
+ numIndicesLeft = TRUNC(numIndicesLeft, mInt32Ty);
+ numIndicesLeft = SDIV(numIndicesLeft, C(4));
// create a vector of index counts from the base index ptr passed into the fetch
Constant* vIndexOffsets;
if (mVWidth == 8)
{
- vIndexOffsets = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
+ vIndexOffsets = C({0, 1, 2, 3, 4, 5, 6, 7});
}
else
{
- vIndexOffsets = C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+ vIndexOffsets = C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
}
// compare index count to the max valid index
// ------------------------------
// vIndexMask -1-1-1-1 0 0 0 0 : offsets < max pass
// vLoadedIndices 0 1 2 3 0 0 0 0 : offsets >= max masked to 0
- Value* vMaxIndex = VBROADCAST(numIndicesLeft);
+ Value* vMaxIndex = VBROADCAST(numIndicesLeft);
Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets);
// Load the indices; OOB loads 0
- return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH);
+ return MASKED_LOAD(pIndices,
+ 4,
+ vIndexMask,
+ VIMMED1(0),
+ "vIndices",
+ PointerType::get(mSimdInt32Ty, 0),
+ GFX_MEM_CLIENT_FETCH);
}
//////////////////////////////////////////////////////////////////////////
-/// @brief Takes a SIMD of gathered 8bpc verts, zero or sign extends,
-/// denormalizes if needed, converts to F32 if needed, and positions in
+/// @brief Takes a SIMD of gathered 8bpc verts, zero or sign extends,
+/// denormalizes if needed, converts to F32 if needed, and positions in
// the proper SIMD rows to be output to the simdvertex structure
/// @param args: (tuple of args, listed below)
/// @param vGatherResult - 8 gathered 8bpc vertices
/// @param compCtrl - component control val
/// @param vVertexElements[4] - vertex components to output
/// @param swizzle[4] - component swizzle location
-void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
+void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs& args)
{
// Unpack tuple args
- Value*& vGatherResult = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t ¤tVertexElement = std::get<4>(args);
- uint32_t &outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value* (&vVertexElements)[4] = std::get<8>(args);
- const uint32_t(&swizzle)[4] = std::get<9>(args);
+ Value*& vGatherResult = std::get<0>(args);
+ Value* pVtxOut = std::get<1>(args);
+ const Instruction::CastOps extendType = std::get<2>(args);
+ const ConversionType conversionType = std::get<3>(args);
+ uint32_t& currentVertexElement = std::get<4>(args);
+ uint32_t& outputElt = std::get<5>(args);
+ const ComponentEnable compMask = std::get<6>(args);
+ const ComponentControl(&compCtrl)[4] = std::get<7>(args);
+ Value*(&vVertexElements)[4] = std::get<8>(args);
+ const uint32_t(&swizzle)[4] = std::get<9>(args);
// cast types
- Type *vGatherTy = VectorType::get(mInt32Ty, 8);
- Type *v32x8Ty = VectorType::get(mInt8Ty, 32);
+ Type* vGatherTy = VectorType::get(mInt32Ty, 8);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, 32);
// have to do extra work for sign extending
if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP))
{
- Type *v16x8Ty = VectorType::get(mInt8Ty, 16); // 8x16bit ints in a 128bit lane
- Type *v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
+ Type* v16x8Ty = VectorType::get(mInt8Ty, 16); // 8x16bit ints in a 128bit lane
+ Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
// shuffle mask, including any swizzling
- const char x = (char)swizzle[0]; const char y = (char)swizzle[1];
- const char z = (char)swizzle[2]; const char w = (char)swizzle[3];
- Value *vConstMask = C<char>({ char(x), char(x + 4), char(x + 8), char(x + 12),
- char(y), char(y + 4), char(y + 8), char(y + 12),
- char(z), char(z + 4), char(z + 8), char(z + 12),
- char(w), char(w + 4), char(w + 8), char(w + 12),
- char(x), char(x + 4), char(x + 8), char(x + 12),
- char(y), char(y + 4), char(y + 8), char(y + 12),
- char(z), char(z + 4), char(z + 8), char(z + 12),
- char(w), char(w + 4), char(w + 8), char(w + 12) });
+ const char x = (char)swizzle[0];
+ const char y = (char)swizzle[1];
+ const char z = (char)swizzle[2];
+ const char w = (char)swizzle[3];
+ Value* vConstMask = C<char>(
+ {char(x), char(x + 4), char(x + 8), char(x + 12), char(y), char(y + 4),
+ char(y + 8), char(y + 12), char(z), char(z + 4), char(z + 8), char(z + 12),
+ char(w), char(w + 4), char(w + 8), char(w + 12), char(x), char(x + 4),
+ char(x + 8), char(x + 12), char(y), char(y + 4), char(y + 8), char(y + 12),
+ char(z), char(z + 4), char(z + 8), char(z + 12), char(w), char(w + 4),
+ char(w + 8), char(w + 12)});
// SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
- Value *vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
- Value *vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
+ Value* vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
+ Value* vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
- Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
- Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
+ Value* vShufResult_lo =
+ BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
+ Value* vShufResult_hi =
+ BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
// after pshufb: group components together in each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
- Value *vi128XY_lo = nullptr;
- Value *vi128XY_hi = nullptr;
+ Value* vi128XY_lo = nullptr;
+ Value* vi128XY_hi = nullptr;
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
{
- vi128XY_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
- vi128XY_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
+ vi128XY_lo = BITCAST(
+ VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})),
+ v128Ty);
+ vi128XY_hi = BITCAST(
+ VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})),
+ v128Ty);
// after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
}
// do the same for zw components
- Value *vi128ZW_lo = nullptr;
- Value *vi128ZW_hi = nullptr;
+ Value* vi128ZW_lo = nullptr;
+ Value* vi128ZW_hi = nullptr;
if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
{
- vi128ZW_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
- vi128ZW_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
+ vi128ZW_lo = BITCAST(
+ VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})),
+ v128Ty);
+ vi128ZW_hi = BITCAST(
+ VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})),
+ v128Ty);
}
// init denormalize variables if needed
Instruction::CastOps fpCast;
- Value *conversionFactor;
+ Value* conversionFactor;
switch (conversionType)
{
case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::SIToFP;
+ fpCast = Instruction::CastOps::SIToFP;
conversionFactor = VIMMED1((float)(1.0 / 127.0));
break;
case CONVERT_SSCALED:
- fpCast = Instruction::CastOps::SIToFP;
+ fpCast = Instruction::CastOps::SIToFP;
conversionFactor = VIMMED1((float)(1.0));
break;
case CONVERT_USCALED:
break;
}
- // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+ // sign extend all enabled components. If we have a fill vVertexElements, output to current
+ // simdvertex
for (uint32_t i = 0; i < 4; i++)
{
if (isComponentEnabled(compMask, i))
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
// if x or y, use vi128XY permute result, else use vi128ZW
- Value *selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
- Value *selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
+ Value* selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
+ Value* selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
// sign extend
- Value *temp_lo = PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v16x8Ty));
- Value *temp_hi = PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v16x8Ty));
+ Value* temp_lo =
+ PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v16x8Ty));
+ Value* temp_hi =
+ PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v16x8Ty));
Value* temp = JOIN_16(temp_lo, temp_hi);
}
}
// else zero extend
- else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
+ else if ((extendType == Instruction::CastOps::ZExt) ||
+ (extendType == Instruction::CastOps::UIToFP))
{
// init denormalize variables if needed
Instruction::CastOps fpCast;
- Value *conversionFactor;
+ Value* conversionFactor;
switch (conversionType)
{
case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::UIToFP;
+ fpCast = Instruction::CastOps::UIToFP;
conversionFactor = VIMMED1((float)(1.0 / 255.0));
break;
case CONVERT_USCALED:
- fpCast = Instruction::CastOps::UIToFP;
+ fpCast = Instruction::CastOps::UIToFP;
conversionFactor = VIMMED1((float)(1.0));
break;
case CONVERT_SSCALED:
if (compCtrl[i] == ComponentControl::StoreSrc)
{
// pshufb masks for each component
- Value *vConstMask;
+ Value* vConstMask;
switch (swizzle[i])
{
case 0:
// x shuffle mask
- vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
- 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1 });
+ vConstMask =
+ C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
+ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
break;
case 1:
// y shuffle mask
- vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
- 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1 });
+ vConstMask =
+ C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
+ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
break;
case 2:
// z shuffle mask
- vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
- 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1 });
+ vConstMask =
+ C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
+ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
break;
case 3:
// w shuffle mask
- vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
- 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1 });
+ vConstMask =
+ C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
+ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
break;
default:
vConstMask = nullptr;
break;
}
- Value *vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
- Value *vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
+ Value* vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
+ Value* vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
- Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
- Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
+ Value* temp_lo =
+ BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
+ Value* temp_hi =
+ BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
// after pshufb for x channel
// 256i - 0 1 2 3 4 5 6 7
- // x000 x000 x000 x000 x000 x000 x000 x000
+ // x000 x000 x000 x000 x000 x000 x000 x000
Value* temp = JOIN_16(temp_lo, temp_hi);
}
}
-void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
+void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs& args)
{
// Unpack tuple args
- Value*& vGatherResult = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t ¤tVertexElement = std::get<4>(args);
- uint32_t &outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value* (&vVertexElements)[4] = std::get<8>(args);
- const uint32_t(&swizzle)[4] = std::get<9>(args);
+ Value*& vGatherResult = std::get<0>(args);
+ Value* pVtxOut = std::get<1>(args);
+ const Instruction::CastOps extendType = std::get<2>(args);
+ const ConversionType conversionType = std::get<3>(args);
+ uint32_t& currentVertexElement = std::get<4>(args);
+ uint32_t& outputElt = std::get<5>(args);
+ const ComponentEnable compMask = std::get<6>(args);
+ const ComponentControl(&compCtrl)[4] = std::get<7>(args);
+ Value*(&vVertexElements)[4] = std::get<8>(args);
+ const uint32_t(&swizzle)[4] = std::get<9>(args);
// cast types
Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
if (compCtrl[i] == ComponentControl::StoreSrc)
{
std::vector<uint32_t> vShuffleMasks[4] = {
- { 0, 4, 8, 12, 16, 20, 24, 28 }, // x
- { 1, 5, 9, 13, 17, 21, 25, 29 }, // y
- { 2, 6, 10, 14, 18, 22, 26, 30 }, // z
- { 3, 7, 11, 15, 19, 23, 27, 31 }, // w
+ {0, 4, 8, 12, 16, 20, 24, 28}, // x
+ {1, 5, 9, 13, 17, 21, 25, 29}, // y
+ {2, 6, 10, 14, 18, 22, 26, 30}, // z
+ {3, 7, 11, 15, 19, 23, 27, 31}, // w
};
- Value *val = VSHUFFLE(BITCAST(vGatherResult, v32x8Ty),
- UndefValue::get(v32x8Ty),
- vShuffleMasks[swizzle[i]]);
+ Value* val = VSHUFFLE(BITCAST(vGatherResult, v32x8Ty),
+ UndefValue::get(v32x8Ty),
+ vShuffleMasks[swizzle[i]]);
if ((extendType == Instruction::CastOps::SExt) ||
- (extendType == Instruction::CastOps::SIToFP)) {
+ (extendType == Instruction::CastOps::SIToFP))
+ {
switch (conversionType)
{
case CONVERT_NORMALIZED:
}
}
else if ((extendType == Instruction::CastOps::ZExt) ||
- (extendType == Instruction::CastOps::UIToFP)) {
+ (extendType == Instruction::CastOps::UIToFP))
+ {
switch (conversionType)
{
case CONVERT_NORMALIZED:
}
//////////////////////////////////////////////////////////////////////////
-/// @brief Takes a SIMD of gathered 16bpc verts, zero or sign extends,
-/// denormalizes if needed, converts to F32 if needed, and positions in
+/// @brief Takes a SIMD of gathered 16bpc verts, zero or sign extends,
+/// denormalizes if needed, converts to F32 if needed, and positions in
// the proper SIMD rows to be output to the simdvertex structure
/// @param args: (tuple of args, listed below)
/// @param vGatherResult[2] - array of gathered 16bpc vertices, 4 per index
/// @param compMask - component packing mask
/// @param compCtrl - component control val
/// @param vVertexElements[4] - vertex components to output
-void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
+void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs& args)
{
// Unpack tuple args
- Value* (&vGatherResult)[2] = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t ¤tVertexElement = std::get<4>(args);
- uint32_t &outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value* (&vVertexElements)[4] = std::get<8>(args);
+ Value*(&vGatherResult)[2] = std::get<0>(args);
+ Value* pVtxOut = std::get<1>(args);
+ const Instruction::CastOps extendType = std::get<2>(args);
+ const ConversionType conversionType = std::get<3>(args);
+ uint32_t& currentVertexElement = std::get<4>(args);
+ uint32_t& outputElt = std::get<5>(args);
+ const ComponentEnable compMask = std::get<6>(args);
+ const ComponentControl(&compCtrl)[4] = std::get<7>(args);
+ Value*(&vVertexElements)[4] = std::get<8>(args);
// cast types
- Type *vGatherTy = VectorType::get(mInt32Ty, 8);
- Type *v32x8Ty = VectorType::get(mInt8Ty, 32);
+ Type* vGatherTy = VectorType::get(mInt32Ty, 8);
+ Type* v32x8Ty = VectorType::get(mInt8Ty, 32);
// have to do extra work for sign extending
- if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
+ if ((extendType == Instruction::CastOps::SExt) ||
+ (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
{
// is this PP float?
bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
- Type *v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
- Type *v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
+ Type* v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
// shuffle mask
- Value *vConstMask = C<uint8_t>({ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
- 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 });
- Value *vi128XY_lo = nullptr;
- Value *vi128XY_hi = nullptr;
+ Value* vConstMask = C<uint8_t>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
+ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
+ Value* vi128XY_lo = nullptr;
+ Value* vi128XY_hi = nullptr;
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
{
- // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
+ // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for
+ // now..
- Value *vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[0], 0), v32x8Ty);
- Value *vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[0], 1), v32x8Ty);
+ Value* vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[0], 0), v32x8Ty);
+ Value* vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[0], 1), v32x8Ty);
- Value *vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
- Value *vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
+ Value* vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
+ Value* vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
// after pshufb: group components together in each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
- vi128XY_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
- vi128XY_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ vi128XY_lo = BITCAST(
+ VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+ v128bitTy);
+ vi128XY_hi = BITCAST(
+ VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+ v128bitTy);
// after PERMD: move and pack xy components into each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
}
// do the same for zw components
- Value *vi128ZW_lo = nullptr;
- Value *vi128ZW_hi = nullptr;
+ Value* vi128ZW_lo = nullptr;
+ Value* vi128ZW_hi = nullptr;
if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
{
- Value *vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[1], 0), v32x8Ty);
- Value *vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[1], 1), v32x8Ty);
-
- Value *vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
- Value *vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
-
- vi128ZW_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
- vi128ZW_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ Value* vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[1], 0), v32x8Ty);
+ Value* vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[1], 1), v32x8Ty);
+
+ Value* vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
+ Value* vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
+
+ vi128ZW_lo = BITCAST(
+ VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+ v128bitTy);
+ vi128ZW_hi = BITCAST(
+ VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+ v128bitTy);
}
// init denormalize variables if needed
Instruction::CastOps IntToFpCast;
- Value *conversionFactor;
+ Value* conversionFactor;
switch (conversionType)
{
case CONVERT_NORMALIZED:
- IntToFpCast = Instruction::CastOps::SIToFP;
+ IntToFpCast = Instruction::CastOps::SIToFP;
conversionFactor = VIMMED1((float)(1.0 / 32767.0));
break;
case CONVERT_SSCALED:
- IntToFpCast = Instruction::CastOps::SIToFP;
+ IntToFpCast = Instruction::CastOps::SIToFP;
conversionFactor = VIMMED1((float)(1.0));
break;
case CONVERT_USCALED:
break;
}
- // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+ // sign extend all enabled components. If we have a fill vVertexElements, output to current
+ // simdvertex
for (uint32_t i = 0; i < 4; i++)
{
if (isComponentEnabled(compMask, i))
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
// if x or y, use vi128XY permute result, else use vi128ZW
- Value *selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
- Value *selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
+ Value* selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
+ Value* selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
if (bFP)
{
// extract 128 bit lanes to sign extend each component
- Value *temp_lo = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
- Value *temp_hi = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
+ Value* temp_lo =
+ CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
+ Value* temp_hi =
+ CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
vVertexElements[currentVertexElement] = JOIN_16(temp_lo, temp_hi);
}
else
{
// extract 128 bit lanes to sign extend each component
- Value *temp_lo = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
- Value *temp_hi = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
+ Value* temp_lo =
+ PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
+ Value* temp_hi =
+ PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
Value* temp = JOIN_16(temp_lo, temp_hi);
}
}
// else zero extend
- else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
+ else if ((extendType == Instruction::CastOps::ZExt) ||
+ (extendType == Instruction::CastOps::UIToFP))
{
// pshufb masks for each component
- Value *vConstMask[2];
+ Value* vConstMask[2];
if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2))
{
// x/z shuffle mask
- vConstMask[0] = C<char>({ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
+ vConstMask[0] = C<char>({
+ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+ });
}
if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3))
{
// y/w shuffle mask
- vConstMask[1] = C<char>({ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
- 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1 });
+ vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
+ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
}
// init denormalize variables if needed
Instruction::CastOps fpCast;
- Value* conversionFactor;
+ Value* conversionFactor;
switch (conversionType)
{
case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::UIToFP;
+ fpCast = Instruction::CastOps::UIToFP;
conversionFactor = VIMMED1((float)(1.0 / 65535.0));
break;
case CONVERT_USCALED:
- fpCast = Instruction::CastOps::UIToFP;
+ fpCast = Instruction::CastOps::UIToFP;
conversionFactor = VIMMED1((float)(1.0f));
break;
case CONVERT_SSCALED:
// if x or y, use vi128XY permute result, else use vi128ZW
uint32_t selectedGather = (i < 2) ? 0 : 1;
- // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
+ // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL,
+ // for now..
- Value *vGatherResult_lo = EXTRACT_16(vGatherResult[selectedGather], 0);
- Value *vGatherResult_hi = EXTRACT_16(vGatherResult[selectedGather], 1);
+ Value* vGatherResult_lo = EXTRACT_16(vGatherResult[selectedGather], 0);
+ Value* vGatherResult_hi = EXTRACT_16(vGatherResult[selectedGather], 1);
- Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]), vGatherTy);
- Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]), vGatherTy);
+ Value* temp_lo = BITCAST(
+ PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]),
+ vGatherTy);
+ Value* temp_hi = BITCAST(
+ PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]),
+ vGatherTy);
- // after pshufb mask for x channel; z uses the same shuffle from the second gather
- // 256i - 0 1 2 3 4 5 6 7
- // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
+ // after pshufb mask for x channel; z uses the same shuffle from the second
+ // gather 256i - 0 1 2 3 4 5 6 7
+ // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
Value* temp = JOIN_16(temp_lo, temp_hi);
}
}
-void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
+void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs& args)
{
// Unpack tuple args
- Value* (&vGatherResult)[2] = std::get<0>(args);
- Value* pVtxOut = std::get<1>(args);
- const Instruction::CastOps extendType = std::get<2>(args);
- const ConversionType conversionType = std::get<3>(args);
- uint32_t ¤tVertexElement = std::get<4>(args);
- uint32_t &outputElt = std::get<5>(args);
- const ComponentEnable compMask = std::get<6>(args);
- const ComponentControl(&compCtrl)[4] = std::get<7>(args);
- Value* (&vVertexElements)[4] = std::get<8>(args);
+ Value*(&vGatherResult)[2] = std::get<0>(args);
+ Value* pVtxOut = std::get<1>(args);
+ const Instruction::CastOps extendType = std::get<2>(args);
+ const ConversionType conversionType = std::get<3>(args);
+ uint32_t& currentVertexElement = std::get<4>(args);
+ uint32_t& outputElt = std::get<5>(args);
+ const ComponentEnable compMask = std::get<6>(args);
+ const ComponentControl(&compCtrl)[4] = std::get<7>(args);
+ Value*(&vVertexElements)[4] = std::get<8>(args);
// cast types
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+ Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
- // have to do extra work for sign extending
- if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP) ||
- (extendType == Instruction::CastOps::FPExt))
+ // have to do extra work for sign extending
+ if ((extendType == Instruction::CastOps::SExt) ||
+ (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
{
// is this PP float?
bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
- Type* v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
- Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
+ Type* v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
+ Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
+ mVWidth / 4); // vwidth is units of 32 bits
- // shuffle mask
- Value* vConstMask = C<char>({ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
- 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 });
- Value* vi128XY = nullptr;
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) {
- Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherResult[0], v32x8Ty), vConstMask), vGatherTy);
+ // shuffle mask
+ Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
+ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
+ Value* vi128XY = nullptr;
+ if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
+ {
+ Value* vShufResult =
+ BITCAST(PSHUFB(BITCAST(vGatherResult[0], v32x8Ty), vConstMask), vGatherTy);
// after pshufb: group components together in each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
- vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
// after PERMD: move and pack xy components into each 128bit lane
// 256i - 0 1 2 3 4 5 6 7
// xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
// do the same for zw components
Value* vi128ZW = nullptr;
- if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)) {
- Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherResult[1], v32x8Ty), vConstMask), vGatherTy);
- vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+ if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
+ {
+ Value* vShufResult =
+ BITCAST(PSHUFB(BITCAST(vGatherResult[1], v32x8Ty), vConstMask), vGatherTy);
+ vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
}
// init denormalize variables if needed
Instruction::CastOps IntToFpCast;
- Value* conversionFactor;
+ Value* conversionFactor;
switch (conversionType)
{
case CONVERT_NORMALIZED:
- IntToFpCast = Instruction::CastOps::SIToFP;
+ IntToFpCast = Instruction::CastOps::SIToFP;
conversionFactor = VIMMED1((float)(1.0 / 32767.0));
break;
case CONVERT_SSCALED:
- IntToFpCast = Instruction::CastOps::SIToFP;
+ IntToFpCast = Instruction::CastOps::SIToFP;
conversionFactor = VIMMED1((float)(1.0));
break;
case CONVERT_USCALED:
break;
}
- // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+ // sign extend all enabled components. If we have a fill vVertexElements, output to current
+ // simdvertex
for (uint32_t i = 0; i < 4; i++)
{
if (isComponentEnabled(compMask, i))
// if x or y, use vi128XY permute result, else use vi128ZW
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
- if (bFP) {
+ if (bFP)
+ {
// extract 128 bit lanes to sign extend each component
- vVertexElements[currentVertexElement] = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
+ vVertexElements[currentVertexElement] =
+ CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
}
- else {
+ else
+ {
// extract 128 bit lanes to sign extend each component
- vVertexElements[currentVertexElement] = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
+ vVertexElements[currentVertexElement] =
+ PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
// denormalize if needed
- if (conversionType != CONVERT_NONE) {
- vVertexElements[currentVertexElement] = FMUL(CAST(IntToFpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
+ if (conversionType != CONVERT_NONE)
+ {
+ vVertexElements[currentVertexElement] =
+ FMUL(CAST(IntToFpCast,
+ vVertexElements[currentVertexElement],
+ mSimdFP32Ty),
+ conversionFactor);
}
}
currentVertexElement++;
}
}
// else zero extend
- else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
+ else if ((extendType == Instruction::CastOps::ZExt) ||
+ (extendType == Instruction::CastOps::UIToFP))
{
// pshufb masks for each component
Value* vConstMask[2];
- if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2)) {
+ if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2))
+ {
// x/z shuffle mask
- vConstMask[0] = C<char>({ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
- 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
+ vConstMask[0] = C<char>({
+ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+ });
}
- if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3)) {
+ if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3))
+ {
// y/w shuffle mask
- vConstMask[1] = C<char>({ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
- 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1 });
+ vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
+ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
}
// init denormalize variables if needed
Instruction::CastOps fpCast;
- Value* conversionFactor;
+ Value* conversionFactor;
switch (conversionType)
{
case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::UIToFP;
+ fpCast = Instruction::CastOps::UIToFP;
conversionFactor = VIMMED1((float)(1.0 / 65535.0));
break;
case CONVERT_USCALED:
- fpCast = Instruction::CastOps::UIToFP;
+ fpCast = Instruction::CastOps::UIToFP;
conversionFactor = VIMMED1((float)(1.0f));
break;
case CONVERT_SSCALED:
// if x or y, use vi128XY permute result, else use vi128ZW
uint32_t selectedGather = (i < 2) ? 0 : 1;
- vVertexElements[currentVertexElement] = BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
- // after pshufb mask for x channel; z uses the same shuffle from the second gather
- // 256i - 0 1 2 3 4 5 6 7
- // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
+ vVertexElements[currentVertexElement] =
+ BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty),
+ vConstMask[selectedMask]),
+ vGatherTy);
+ // after pshufb mask for x channel; z uses the same shuffle from the second
+ // gather 256i - 0 1 2 3 4 5 6 7
+ // xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
// denormalize if needed
if (conversionType != CONVERT_NONE)
{
- vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
+ vVertexElements[currentVertexElement] =
+ FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
+ conversionFactor);
}
currentVertexElement++;
}
/// @param outputElt - simdvertex offset in VIN to write to
/// @param numEltsToStore - number of simdvertex rows to write out
/// @param vVertexElements - LLVM Value*[] simdvertex to write out
-void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4])
+void FetchJit::StoreVertexElements(Value* pVtxOut,
+ const uint32_t outputElt,
+ const uint32_t numEltsToStore,
+ Value* (&vVertexElements)[4])
{
SWR_ASSERT(numEltsToStore <= 4, "Invalid element count.");
if (!vVertexElements[c]->getType()->getScalarType()->isFloatTy())
{
#if FETCH_DUMP_VERTEX
- PRINT("vVertexElements[%d]: 0x%x\n", { C(c), vVertexElements[c] });
+ PRINT("vVertexElements[%d]: 0x%x\n", {C(c), vVertexElements[c]});
#endif
vVertexElements[c] = BITCAST(vVertexElements[c], mSimdFP32Ty);
}
#if FETCH_DUMP_VERTEX
else
{
- PRINT("vVertexElements[%d]: %f\n", { C(c), vVertexElements[c] });
+ PRINT("vVertexElements[%d]: %f\n", {C(c), vVertexElements[c]});
}
#endif
// outputElt * 4 = offsetting by the size of a simdvertex
}
//////////////////////////////////////////////////////////////////////////
-/// @brief Generates a constant vector of values based on the
+/// @brief Generates a constant vector of values based on the
/// ComponentControl value
/// @param ctrl - ComponentControl value
-Value *FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
+Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
{
switch (ctrl)
{
{
if (mVWidth == 16)
{
- Type* pSimd8FPTy = VectorType::get(mFP32Ty, 8);
- Value *pIdLo = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), pSimd8FPTy);
- Value *pIdHi = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 })), pSimd8FPTy);
+ Type* pSimd8FPTy = VectorType::get(mFP32Ty, 8);
+ Value* pIdLo =
+ BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID})), pSimd8FPTy);
+ Value* pIdHi =
+ BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID2})), pSimd8FPTy);
return JOIN_16(pIdLo, pIdHi);
}
else
{
- return BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
+ return BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID})), mSimdFP32Ty);
}
}
case StoreInstanceId:
- {
- Value *pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance })), mFP32Ty);
- return VBROADCAST(pId);
- }
+ {
+ Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance})), mFP32Ty);
+ return VBROADCAST(pId);
+ }
case StoreSrc:
switch (component)
{
// X
- case 0: return (enableMask & ComponentEnable::X);
+ case 0:
+ return (enableMask & ComponentEnable::X);
// Y
- case 1: return (enableMask & ComponentEnable::Y);
+ case 1:
+ return (enableMask & ComponentEnable::Y);
// Z
- case 2: return (enableMask & ComponentEnable::Z);
+ case 2:
+ return (enableMask & ComponentEnable::Z);
// W
- case 3: return (enableMask & ComponentEnable::W);
+ case 3:
+ return (enableMask & ComponentEnable::W);
- default: return false;
+ default:
+ return false;
}
}
/// @return PFN_FETCH_FUNC - pointer to fetch code
PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const HANDLE hFunc)
{
- const llvm::Function* func = (const llvm::Function*)hFunc;
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
- PFN_FETCH_FUNC pfnFetch;
+ const llvm::Function* func = (const llvm::Function*)hFunc;
+ JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
+ PFN_FETCH_FUNC pfnFetch;
gFetchCodegenMutex.lock();
pfnFetch = (PFN_FETCH_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
- // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
+ // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
+ // add new IR to the module
pJitMgr->mIsModuleFinalized = true;
#if defined(KNOB_SWRC_TRACING)
- char fName[1024];
- const char *funcName = func->getName().data();
+ char fName[1024];
+ const char* funcName = func->getName().data();
sprintf(fName, "%s.bin", funcName);
- FILE *fd = fopen(fName, "wb");
- fwrite((void *)pfnFetch, 1, 2048, fd);
+ FILE* fd = fopen(fName, "wb");
+ fwrite((void*)pfnFetch, 1, 2048, fd);
fclose(fd);
#endif
gFetchCodegenMutex.unlock();
-
return pfnFetch;
}
pJitMgr->SetupNewModule();
FetchJit theJit(pJitMgr);
- HANDLE hFunc = theJit.Create(state);
+ HANDLE hFunc = theJit.Create(state);
return JitFetchFunc(hJitMgr, hFunc);
}
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file fetch_jit.h
-*
-* @brief Definition of the fetch jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file fetch_jit.h
+ *
+ * @brief Definition of the fetch jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "common/formats.h"
{
struct
{
- uint32_t AlignedByteOffset : 12;
- uint32_t Format : 10;
- uint32_t StreamIndex : 6;
- uint32_t InstanceEnable : 1;
- uint32_t InstanceStrideEnable : 1;
- uint32_t ComponentControl0 : 4;
- uint32_t ComponentControl1 : 4;
- uint32_t ComponentControl2 : 4;
- uint32_t ComponentControl3 : 4;
- uint32_t ComponentPacking : 4;
- uint32_t _reserved : 14;
+ uint32_t AlignedByteOffset : 12;
+ uint32_t Format : 10;
+ uint32_t StreamIndex : 6;
+ uint32_t InstanceEnable : 1;
+ uint32_t InstanceStrideEnable : 1;
+ uint32_t ComponentControl0 : 4;
+ uint32_t ComponentControl1 : 4;
+ uint32_t ComponentControl2 : 4;
+ uint32_t ComponentControl3 : 4;
+ uint32_t ComponentPacking : 4;
+ uint32_t _reserved : 14;
};
uint64_t bits;
};
//////////////////////////////////////////////////////////////////////////
struct FETCH_COMPILE_STATE
{
- uint32_t numAttribs{ 0 };
+ uint32_t numAttribs{0};
INPUT_ELEMENT_DESC layout[SWR_VTX_NUM_SLOTS];
- SWR_FORMAT indexType;
- uint32_t cutIndex{ 0xffffffff };
+ SWR_FORMAT indexType;
+ uint32_t cutIndex{0xffffffff};
// Options that effect the JIT'd code
- bool bDisableIndexOOBCheck; // If enabled, FetchJit will exclude index OOB check
- bool bEnableCutIndex{ false }; // Compares indices with the cut index and returns a cut mask
- bool bVertexIDOffsetEnable{ false }; // Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws
- bool bPartialVertexBuffer{ false }; // for indexed draws, map illegal indices to a known resident vertex
+ bool bDisableIndexOOBCheck; // If enabled, FetchJit will exclude index OOB check
+ bool bEnableCutIndex{false}; // Compares indices with the cut index and returns a cut mask
+ bool bVertexIDOffsetEnable{false}; // Offset vertexID by StartVertex for non-indexed draws or
+ // BaseVertex for indexed draws
+ bool bPartialVertexBuffer{
+ false}; // for indexed draws, map illegal indices to a known resident vertex
- bool bForceSequentialAccessEnable{ false };
- bool bInstanceIDOffsetEnable{ false };
+ bool bForceSequentialAccessEnable{false};
+ bool bInstanceIDOffsetEnable{false};
- FETCH_COMPILE_STATE(bool diableIndexOOBCheck = false):
- bDisableIndexOOBCheck(diableIndexOOBCheck){ };
+ FETCH_COMPILE_STATE(bool diableIndexOOBCheck = false) :
+ bDisableIndexOOBCheck(diableIndexOOBCheck){};
- bool operator==(const FETCH_COMPILE_STATE &other) const
+ bool operator==(const FETCH_COMPILE_STATE& other) const
{
- if (numAttribs != other.numAttribs) return false;
- if (indexType != other.indexType) return false;
- if (bDisableIndexOOBCheck != other.bDisableIndexOOBCheck) return false;
- if (bEnableCutIndex != other.bEnableCutIndex) return false;
- if (cutIndex != other.cutIndex) return false;
- if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false;
- if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false;
- if (bForceSequentialAccessEnable != other.bForceSequentialAccessEnable) return false;
- if (bInstanceIDOffsetEnable != other.bInstanceIDOffsetEnable) return false;
+ if (numAttribs != other.numAttribs)
+ return false;
+ if (indexType != other.indexType)
+ return false;
+ if (bDisableIndexOOBCheck != other.bDisableIndexOOBCheck)
+ return false;
+ if (bEnableCutIndex != other.bEnableCutIndex)
+ return false;
+ if (cutIndex != other.cutIndex)
+ return false;
+ if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable)
+ return false;
+ if (bPartialVertexBuffer != other.bPartialVertexBuffer)
+ return false;
+ if (bForceSequentialAccessEnable != other.bForceSequentialAccessEnable)
+ return false;
+ if (bInstanceIDOffsetEnable != other.bInstanceIDOffsetEnable)
+ return false;
for (uint32_t i = 0; i < numAttribs; ++i)
{
if ((layout[i].bits != other.layout[i].bits) ||
- (((layout[i].InstanceEnable == 1) || (layout[i].InstanceStrideEnable == 1)) &&
- (layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState))){
+ (((layout[i].InstanceEnable == 1) || (layout[i].InstanceStrideEnable == 1)) &&
+ (layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState)))
+ {
return false;
}
}
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file lower_x86.cpp
-*
-* @brief llvm pass to lower meta code to x86
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file lower_x86.cpp
+ *
+ * @brief llvm pass to lower meta code to x86
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "passes.h"
#include <unordered_map>
-
namespace llvm
{
// foward declare the initializer
- void initializeLowerX86Pass(PassRegistry&);
-}
+ void initializeLowerX86Pass(PassRegistry &);
+} // namespace llvm
namespace SwrJit
{
enum TargetArch
{
- AVX = 0,
- AVX2 = 1,
+ AVX = 0,
+ AVX2 = 1,
AVX512 = 2
};
enum TargetWidth
{
- W256 = 0,
- W512 = 1,
+ W256 = 0,
+ W512 = 1,
NUM_WIDTHS = 2
};
struct LowerX86;
- typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc;
+ typedef std::function<Instruction *(LowerX86 *, TargetArch, TargetWidth, CallInst *)> EmuFunc;
struct X86Intrinsic
{
Intrinsic::ID intrin[NUM_WIDTHS];
- EmuFunc emuFunc;
+ EmuFunc emuFunc;
};
- // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the previous behavior of
- // mapping directly to avx/avx2 intrinsics.
+ // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the
+ // previous behavior of mapping directly to avx/avx2 intrinsics.
static std::map<std::string, Intrinsic::ID> intrinsicMap = {
- {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
- {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
- {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
- {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
- {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
- {"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
- {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
- {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
- {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
+ {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
+ {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
+ {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
+ {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
+ {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
+ {"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
+ {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
+ {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
+ {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
};
// Forward decls
- Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
- Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
-
- Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst, Intrinsic::ID intrin);
-
+ Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+ Instruction *
+ VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+ Instruction *
+ VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+ Instruction *
+ VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+ Instruction *
+ VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+
+ Instruction *DOUBLE_EMU(LowerX86 * pThis,
+ TargetArch arch,
+ TargetWidth width,
+ CallInst * pCallInst,
+ Intrinsic::ID intrin);
+
static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
// 256 wide 512 wide
- { // AVX
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- { // AVX2
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
- },
- { // AVX512
- {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
- {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx512_mask_permvar_sf_256, Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
- {"meta.intrinsic.VPERMD", {{Intrinsic::x86_avx512_mask_permvar_si_256, Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
- {"meta.intrinsic.VGATHERPD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VGATHERDD", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
- {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512 }, NO_EMU}},
- {"meta.intrinsic.VCVTPH2PS", {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512 }, NO_EMU}},
- {"meta.intrinsic.VROUND", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
- {"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
- }
- };
+ {
+ // AVX
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS",
+ {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS",
+ {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ },
+ {
+ // AVX2
+ {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS",
+ {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VPERMD",
+ {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
+ {"meta.intrinsic.VGATHERPD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS",
+ {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+ {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+ {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+ },
+ {
+ // AVX512
+ {"meta.intrinsic.VRCPPS",
+ {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
+ {"meta.intrinsic.VPERMPS",
+ {{Intrinsic::x86_avx512_mask_permvar_sf_256,
+ Intrinsic::x86_avx512_mask_permvar_sf_512},
+ NO_EMU}},
+ {"meta.intrinsic.VPERMD",
+ {{Intrinsic::x86_avx512_mask_permvar_si_256,
+ Intrinsic::x86_avx512_mask_permvar_si_512},
+ NO_EMU}},
+ {"meta.intrinsic.VGATHERPD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VGATHERDD",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+ {"meta.intrinsic.VCVTPD2PS",
+ {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512},
+ NO_EMU}},
+ {"meta.intrinsic.VCVTPH2PS",
+ {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512},
+ NO_EMU}},
+ {"meta.intrinsic.VROUND",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
+ {"meta.intrinsic.VHSUBPS",
+ {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
+ }};
struct LowerX86 : public FunctionPass
{
- LowerX86(Builder* b = nullptr)
- : FunctionPass(ID), B(b)
+ LowerX86(Builder *b = nullptr) : FunctionPass(ID), B(b)
{
initializeLowerX86Pass(*PassRegistry::getPassRegistry());
else if (JM()->mArch.AVX())
{
mTarget = AVX;
-
}
else
{
// across all intrinsics, and will have to be rethought. Probably need something
// similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed
// intrinsic.
- void GetRequestedWidthAndType(CallInst* pCallInst, const StringRef intrinName, TargetWidth* pWidth, Type** pTy)
+ void GetRequestedWidthAndType(CallInst * pCallInst,
+ const StringRef intrinName,
+ TargetWidth * pWidth,
+ Type ** pTy)
{
- Type* pVecTy = pCallInst->getType();
+ Type *pVecTy = pCallInst->getType();
// Check for intrinsic specific types
// VCVTPD2PS type comes from src, not dst
if (!pVecTy->isVectorTy())
{
- for (auto& op : pCallInst->arg_operands())
+ for (auto &op : pCallInst->arg_operands())
{
if (op.get()->getType()->isVectorTy())
{
uint32_t width = cast<VectorType>(pVecTy)->getBitWidth();
switch (width)
{
- case 256: *pWidth = W256; break;
- case 512: *pWidth = W512; break;
- default: SWR_ASSERT(false, "Unhandled vector width %d", width);
+ case 256:
+ *pWidth = W256;
+ break;
+ case 512:
+ *pWidth = W512;
+ break;
+ default:
+ SWR_ASSERT(false, "Unhandled vector width %d", width);
*pWidth = W256;
}
*pTy = pVecTy->getScalarType();
}
- Value* GetZeroVec(TargetWidth width, Type* pTy)
+ Value *GetZeroVec(TargetWidth width, Type *pTy)
{
uint32_t numElem = 0;
switch (width)
{
- case W256: numElem = 8; break;
- case W512: numElem = 16; break;
- default: SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
+ case W256:
+ numElem = 8;
+ break;
+ case W512:
+ numElem = 16;
+ break;
+ default:
+ SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
}
return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
}
- Value* GetMask(TargetWidth width)
+ Value *GetMask(TargetWidth width)
{
- Value* mask;
+ Value *mask;
switch (width)
{
- case W256: mask = B->C((uint8_t)-1); break;
- case W512: mask = B->C((uint16_t)-1); break;
- default: SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
+ case W256:
+ mask = B->C((uint8_t)-1);
+ break;
+ case W512:
+ mask = B->C((uint16_t)-1);
+ break;
+ default:
+ SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
}
return mask;
}
// Convert <N x i1> mask to <N x i32> x86 mask
- Value* VectorMask(Value* vi1Mask)
+ Value *VectorMask(Value *vi1Mask)
{
uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
}
- Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
+ Instruction *ProcessIntrinsicAdvanced(CallInst *pCallInst)
{
- Function* pFunc = pCallInst->getCalledFunction();
- auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
+ Function * pFunc = pCallInst->getCalledFunction();
+ auto & intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
TargetWidth vecWidth;
- Type* pElemTy;
+ Type * pElemTy;
GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
// Check if there is a native intrinsic for this instruction
// Double pump the next smaller SIMD intrinsic
SWR_ASSERT(vecWidth != 0, "Cannot double pump smallest SIMD width.");
Intrinsic::ID id2 = intrinsic.intrin[vecWidth - 1];
- SWR_ASSERT(id2 != Intrinsic::not_intrinsic, "Cannot find intrinsic to double pump.");
+ SWR_ASSERT(id2 != Intrinsic::not_intrinsic,
+ "Cannot find intrinsic to double pump.");
return DOUBLE_EMU(this, mTarget, vecWidth, pCallInst, id2);
}
else if (id != Intrinsic::not_intrinsic)
{
- Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
- SmallVector<Value*, 8> args;
- for (auto& arg : pCallInst->arg_operands())
+ Function *pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
+ SmallVector<Value *, 8> args;
+ for (auto &arg : pCallInst->arg_operands())
{
args.push_back(arg.get());
}
- // If AVX512, all instructions add a src operand and mask. We'll pass in 0 src and full mask for now
- // Assuming the intrinsics are consistent and place the src operand and mask last in the argument list.
+ // If AVX512, all instructions add a src operand and mask. We'll pass in 0 src and
+ // full mask for now Assuming the intrinsics are consistent and place the src
+ // operand and mask last in the argument list.
if (mTarget == AVX512)
{
- if (pFunc->getName().equals("meta.intrinsic.VCVTPD2PS")) {
+ if (pFunc->getName().equals("meta.intrinsic.VCVTPD2PS"))
+ {
args.push_back(GetZeroVec(W256, pCallInst->getType()->getScalarType()));
args.push_back(GetMask(W256));
// for AVX512 VCVTPD2PS, we also have to add rounding mode
- args.push_back(B->C(_MM_FROUND_TO_NEAREST_INT |
- _MM_FROUND_NO_EXC));
- } else {
+ args.push_back(B->C(_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+ }
+ else
+ {
args.push_back(GetZeroVec(vecWidth, pElemTy));
args.push_back(GetMask(vecWidth));
}
return nullptr;
}
- Instruction* ProcessIntrinsic(CallInst* pCallInst)
+ Instruction *ProcessIntrinsic(CallInst *pCallInst)
{
- Function* pFunc = pCallInst->getCalledFunction();
-
+ Function *pFunc = pCallInst->getCalledFunction();
+
// Forward to the advanced support if found
if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end())
{
return ProcessIntrinsicAdvanced(pCallInst);
}
- SWR_ASSERT(intrinsicMap.find(pFunc->getName()) != intrinsicMap.end(), "Unimplemented intrinsic %s.", pFunc->getName());
+ SWR_ASSERT(intrinsicMap.find(pFunc->getName()) != intrinsicMap.end(),
+ "Unimplemented intrinsic %s.",
+ pFunc->getName());
Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()];
- Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
+ Function * pX86IntrinFunc =
+ Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
- SmallVector<Value*, 8> args;
- for (auto& arg : pCallInst->arg_operands())
+ SmallVector<Value *, 8> args;
+ for (auto &arg : pCallInst->arg_operands())
{
args.push_back(arg.get());
}
//////////////////////////////////////////////////////////////////////////
/// @brief LLVM funtion pass run method.
/// @param f- The function we're working on with this pass.
- virtual bool runOnFunction(Function& F)
+ virtual bool runOnFunction(Function &F)
{
- std::vector<Instruction*> toRemove;
+ std::vector<Instruction *> toRemove;
- for (auto& BB : F.getBasicBlockList())
+ for (auto &BB : F.getBasicBlockList())
{
- for (auto& I : BB.getInstList())
+ for (auto &I : BB.getInstList())
{
- if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
+ if (CallInst *pCallInst = dyn_cast<CallInst>(&I))
{
- Function* pFunc = pCallInst->getCalledFunction();
+ Function *pFunc = pCallInst->getCalledFunction();
if (pFunc)
{
if (pFunc->getName().startswith("meta.intrinsic"))
{
B->IRB()->SetInsertPoint(&I);
- Instruction* pReplace = ProcessIntrinsic(pCallInst);
+ Instruction *pReplace = ProcessIntrinsic(pCallInst);
SWR_ASSERT(pReplace);
toRemove.push_back(pCallInst);
pCallInst->replaceAllUsesWith(pReplace);
}
}
-
}
}
}
- for (auto* pInst : toRemove)
+ for (auto *pInst : toRemove)
{
pInst->eraseFromParent();
}
return true;
}
- virtual void getAnalysisUsage(AnalysisUsage& AU) const
- {
- }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
- JitManager* JM() { return B->JM(); }
+ JitManager *JM() { return B->JM(); }
- Builder* B;
+ Builder *B;
TargetArch mTarget;
- static char ID; ///< Needed by LLVM to generate ID for FunctionPass.
+ static char ID; ///< Needed by LLVM to generate ID for FunctionPass.
};
- char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID.
+ char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID.
- FunctionPass* createLowerX86Pass(Builder* b)
- {
- return new LowerX86(b);
- }
+ FunctionPass *createLowerX86Pass(Builder *b) { return new LowerX86(b); }
- Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
{
SWR_ASSERT(false, "Unimplemented intrinsic emulation.");
return nullptr;
}
- Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ Instruction *VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
{
// Only need vperm emulation for AVX
SWR_ASSERT(arch == AVX);
- Builder* B = pThis->B;
- auto v32A = pCallInst->getArgOperand(0);
- auto vi32Index = pCallInst->getArgOperand(1);
+ Builder *B = pThis->B;
+ auto v32A = pCallInst->getArgOperand(0);
+ auto vi32Index = pCallInst->getArgOperand(1);
- Value* v32Result;
+ Value *v32Result;
if (isa<Constant>(vi32Index))
{
// Can use llvm shuffle vector directly with constant shuffle indices
for (uint32_t l = 0; l < v32A->getType()->getVectorNumElements(); ++l)
{
auto i32Index = B->VEXTRACT(vi32Index, B->C(l));
- auto val = B->VEXTRACT(v32A, i32Index);
- v32Result = B->VINSERT(v32Result, val, B->C(l));
+ auto val = B->VEXTRACT(v32A, i32Index);
+ v32Result = B->VINSERT(v32Result, val, B->C(l));
}
}
return cast<Instruction>(v32Result);
}
- Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ Instruction *
+ VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
{
- Builder* B = pThis->B;
- auto vSrc = pCallInst->getArgOperand(0);
- auto pBase = pCallInst->getArgOperand(1);
- auto vi32Indices = pCallInst->getArgOperand(2);
- auto vi1Mask = pCallInst->getArgOperand(3);
- auto i8Scale = pCallInst->getArgOperand(4);
-
- pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0));
- uint32_t numElem = vSrc->getType()->getVectorNumElements();
- auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
- auto srcTy = vSrc->getType()->getVectorElementType();
- Value* v32Gather;
+ Builder *B = pThis->B;
+ auto vSrc = pCallInst->getArgOperand(0);
+ auto pBase = pCallInst->getArgOperand(1);
+ auto vi32Indices = pCallInst->getArgOperand(2);
+ auto vi1Mask = pCallInst->getArgOperand(3);
+ auto i8Scale = pCallInst->getArgOperand(4);
+
+ pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0));
+ uint32_t numElem = vSrc->getType()->getVectorNumElements();
+ auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
+ auto srcTy = vSrc->getType()->getVectorElementType();
+ Value * v32Gather;
if (arch == AVX)
{
// Full emulation for AVX
// Store source on stack to provide a valid address to load from inactive lanes
auto pStack = B->STACKSAVE();
- auto pTmp = B->ALLOCA(vSrc->getType());
+ auto pTmp = B->ALLOCA(vSrc->getType());
B->STORE(vSrc, pTmp);
- v32Gather = UndefValue::get(vSrc->getType());
- auto vi32Scale = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
+ v32Gather = UndefValue::get(vSrc->getType());
+ auto vi32Scale = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
auto vi32Offsets = B->MUL(vi32Indices, vi32Scale);
for (uint32_t i = 0; i < numElem; ++i)
{
- auto i32Offset = B->VEXTRACT(vi32Offsets, B->C(i));
- auto pLoadAddress = B->GEP(pBase, i32Offset);
- pLoadAddress = B->BITCAST(pLoadAddress, PointerType::get(srcTy, 0));
- auto pMaskedLoadAddress = B->GEP(pTmp, { 0, i });
- auto i1Mask = B->VEXTRACT(vi1Mask, B->C(i));
- auto pValidAddress = B->SELECT(i1Mask, pLoadAddress, pMaskedLoadAddress);
- auto val = B->LOAD(pValidAddress);
- v32Gather = B->VINSERT(v32Gather, val, B->C(i));
+ auto i32Offset = B->VEXTRACT(vi32Offsets, B->C(i));
+ auto pLoadAddress = B->GEP(pBase, i32Offset);
+ pLoadAddress = B->BITCAST(pLoadAddress, PointerType::get(srcTy, 0));
+ auto pMaskedLoadAddress = B->GEP(pTmp, {0, i});
+ auto i1Mask = B->VEXTRACT(vi1Mask, B->C(i));
+ auto pValidAddress = B->SELECT(i1Mask, pLoadAddress, pMaskedLoadAddress);
+ auto val = B->LOAD(pValidAddress);
+ v32Gather = B->VINSERT(v32Gather, val, B->C(i));
}
B->STACKRESTORE(pStack);
}
else if (arch == AVX2 || (arch == AVX512 && width == W256))
{
- Function* pX86IntrinFunc;
+ Function *pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_ps_256);
- }
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx2_gather_d_ps_256);
+ }
else if (srcTy == B->mInt32Ty)
{
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_d_256);
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx2_gather_d_d_256);
}
else if (srcTy == B->mDoubleTy)
{
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_q_256);
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx2_gather_d_q_256);
}
else
{
if (width == W256)
{
auto v32Mask = B->BITCAST(pThis->VectorMask(vi1Mask), vSrc->getType());
- v32Gather = B->CALL(pX86IntrinFunc, { vSrc, pBase, vi32Indices, v32Mask, i8Scale });
+ v32Gather = B->CALL(pX86IntrinFunc, {vSrc, pBase, vi32Indices, v32Mask, i8Scale});
}
else if (width == W512)
{
if (vSrc->getType()->getVectorElementType() == B->mDoubleTy)
{
auto v64Mask = pThis->VectorMask(vi1Mask);
- v64Mask = B->S_EXT(v64Mask,
- VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
+ v64Mask = B->S_EXT(
+ v64Mask,
+ VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
v64Mask = B->BITCAST(v64Mask, vSrc->getType());
- Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({ 0, 1, 2, 3 }));
- Value* src1 = B->VSHUFFLE(vSrc, vSrc, B->C({ 4, 5, 6, 7 }));
-
- Value* indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({ 0, 1, 2, 3 }));
- Value* indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({ 4, 5, 6, 7 }));
-
- Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({ 0, 1, 2, 3 }));
- Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({ 4, 5, 6, 7 }));
-
- src0 = B->BITCAST(src0, VectorType::get(B->mInt64Ty, src0->getType()->getVectorNumElements()));
- mask0 = B->BITCAST(mask0, VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
- Value* gather0 = B->CALL(pX86IntrinFunc, { src0, pBase, indices0, mask0, i8Scale });
- src1 = B->BITCAST(src1, VectorType::get(B->mInt64Ty, src1->getType()->getVectorNumElements()));
- mask1 = B->BITCAST(mask1, VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
- Value* gather1 = B->CALL(pX86IntrinFunc, { src1, pBase, indices1, mask1, i8Scale });
-
- v32Gather = B->VSHUFFLE(gather0, gather1, B->C({ 0, 1, 2, 3, 4, 5, 6, 7 }));
+ Value *src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
+ Value *src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
+
+ Value *indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
+ Value *indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
+
+ Value *mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
+ Value *mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
+
+ src0 = B->BITCAST(
+ src0,
+ VectorType::get(B->mInt64Ty, src0->getType()->getVectorNumElements()));
+ mask0 = B->BITCAST(
+ mask0,
+ VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
+ Value *gather0 =
+ B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
+ src1 = B->BITCAST(
+ src1,
+ VectorType::get(B->mInt64Ty, src1->getType()->getVectorNumElements()));
+ mask1 = B->BITCAST(
+ mask1,
+ VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
+ Value *gather1 =
+ B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
+
+ v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
v32Gather = B->BITCAST(v32Gather, vSrc->getType());
}
else
{
// Double pump 8-wide for 32bit elements
auto v32Mask = pThis->VectorMask(vi1Mask);
- v32Mask = B->BITCAST(v32Mask, vSrc->getType());
- Value* src0 = B->EXTRACT_16(vSrc, 0);
- Value* src1 = B->EXTRACT_16(vSrc, 1);
+ v32Mask = B->BITCAST(v32Mask, vSrc->getType());
+ Value *src0 = B->EXTRACT_16(vSrc, 0);
+ Value *src1 = B->EXTRACT_16(vSrc, 1);
- Value* indices0 = B->EXTRACT_16(vi32Indices, 0);
- Value* indices1 = B->EXTRACT_16(vi32Indices, 1);
+ Value *indices0 = B->EXTRACT_16(vi32Indices, 0);
+ Value *indices1 = B->EXTRACT_16(vi32Indices, 1);
- Value* mask0 = B->EXTRACT_16(v32Mask, 0);
- Value* mask1 = B->EXTRACT_16(v32Mask, 1);
+ Value *mask0 = B->EXTRACT_16(v32Mask, 0);
+ Value *mask1 = B->EXTRACT_16(v32Mask, 1);
- Value* gather0 = B->CALL(pX86IntrinFunc, { src0, pBase, indices0, mask0, i8Scale });
- Value* gather1 = B->CALL(pX86IntrinFunc, { src1, pBase, indices1, mask1, i8Scale });
+ Value *gather0 =
+ B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
+ Value *gather1 =
+ B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
v32Gather = B->JOIN_16(gather0, gather1);
}
}
else if (arch == AVX512)
{
- Value* iMask;
- Function* pX86IntrinFunc;
+ Value * iMask;
+ Function *pX86IntrinFunc;
if (srcTy == B->mFP32Ty)
{
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dps_512);
- iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx512_gather_dps_512);
+ iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
}
else if (srcTy == B->mInt32Ty)
{
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dpi_512);
- iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx512_gather_dpi_512);
+ iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
}
else if (srcTy == B->mDoubleTy)
{
- pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dpd_512);
- iMask = B->BITCAST(vi1Mask, B->mInt8Ty);
+ pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+ Intrinsic::x86_avx512_gather_dpd_512);
+ iMask = B->BITCAST(vi1Mask, B->mInt8Ty);
}
else
{
}
auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
- v32Gather = B->CALL(pX86IntrinFunc, { vSrc, pBase, vi32Indices, iMask, i32Scale });
+ v32Gather = B->CALL(pX86IntrinFunc, {vSrc, pBase, vi32Indices, iMask, i32Scale});
}
return cast<Instruction>(v32Gather);
}
- // No support for vroundps in avx512 (it is available in kncni), so emulate with avx instructions
- Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ // No support for vroundps in avx512 (it is available in kncni), so emulate with avx
+ // instructions
+ Instruction *
+ VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
{
SWR_ASSERT(arch == AVX512);
- auto B = pThis->B;
+ auto B = pThis->B;
auto vf32Src = pCallInst->getOperand(0);
auto i8Round = pCallInst->getOperand(1);
- auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
+ auto pfnFunc =
+ Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
if (width == W256)
{
}
// No support for hsub in AVX512
- Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+ Instruction *VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
{
SWR_ASSERT(arch == AVX512);
- auto B = pThis->B;
+ auto B = pThis->B;
auto src0 = pCallInst->getOperand(0);
auto src1 = pCallInst->getOperand(1);
// 256b hsub can just use avx intrinsic
if (width == W256)
{
- auto pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_hsub_ps_256);
+ auto pX86IntrinFunc =
+ Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_hsub_ps_256);
return cast<Instruction>(B->CALL2(pX86IntrinFunc, src0, src1));
}
else if (width == W512)
{
// 512b hsub can be accomplished with shuf/sub combo
- auto minuend = B->VSHUFFLE(src0, src1, B->C({ 0, 2, 8, 10, 4, 6, 12, 14 }));
- auto subtrahend = B->VSHUFFLE(src0, src1, B->C({ 1, 3, 9, 11, 5, 7, 13, 15 }));
+ auto minuend = B->VSHUFFLE(src0, src1, B->C({0, 2, 8, 10, 4, 6, 12, 14}));
+ auto subtrahend = B->VSHUFFLE(src0, src1, B->C({1, 3, 9, 11, 5, 7, 13, 15}));
return cast<Instruction>(B->SUB(minuend, subtrahend));
}
else
}
}
- // Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from each vector argument and
- // calls the 256 wide intrinsic, then merges the results to 512 wide
- Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst, Intrinsic::ID intrin)
+ // Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from
+ // each vector argument and calls the 256 wide intrinsic, then merges the results to 512 wide
+ Instruction *DOUBLE_EMU(LowerX86 * pThis,
+ TargetArch arch,
+ TargetWidth width,
+ CallInst * pCallInst,
+ Intrinsic::ID intrin)
{
auto B = pThis->B;
SWR_ASSERT(width == W512);
- Value* result[2];
- Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
+ Value * result[2];
+ Function *pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
for (uint32_t i = 0; i < 2; ++i)
{
- SmallVector<Value*, 8> args;
- for (auto& arg : pCallInst->arg_operands())
+ SmallVector<Value *, 8> args;
+ for (auto &arg : pCallInst->arg_operands())
{
auto argType = arg.get()->getType();
if (argType->isVectorTy())
{
- uint32_t vecWidth = argType->getVectorNumElements();
- Value *lanes = B->CInc<int>(i*vecWidth/2, vecWidth/2);
- Value *argToPush = B->VSHUFFLE(arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
+ uint32_t vecWidth = argType->getVectorNumElements();
+ Value * lanes = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
+ Value * argToPush = B->VSHUFFLE(
+ arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
args.push_back(argToPush);
}
else
{
assert(result[1]->getType()->isVectorTy());
vecWidth = result[0]->getType()->getVectorNumElements() +
- result[1]->getType()->getVectorNumElements();
+ result[1]->getType()->getVectorNumElements();
}
else
{
return cast<Instruction>(B->VSHUFFLE(result[0], result[1], lanes));
}
-}
+} // namespace SwrJit
using namespace SwrJit;
INITIALIZE_PASS_BEGIN(LowerX86, "LowerX86", "LowerX86", false, false)
INITIALIZE_PASS_END(LowerX86, "LowerX86", "LowerX86", false, false)
-
/****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file passes.h
-*
-* @brief Include file for llvm passes
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file passes.h
+ *
+ * @brief Include file for llvm passes
+ *
+ ******************************************************************************/
#include "JitManager.h"
#include "builder.h"
using namespace llvm;
FunctionPass* createLowerX86Pass(Builder* b);
-}
+} // namespace SwrJit
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file jit_api.h
-*
-* @brief Platform independent JIT interface
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file jit_api.h
+ *
+ * @brief Platform independent JIT interface
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "common/os.h"
#include "core/utils.h"
#endif
-
struct ShaderInfo;
//////////////////////////////////////////////////////////////////////////
SWR_SHADER_TYPE type;
uint32_t crc;
- const void* pIR; ///< Pointer to LLVM IR text.
- size_t irLength;
+ const void* pIR; ///< Pointer to LLVM IR text.
+ size_t irLength;
bool enableJitSampler;
};
-extern "C"
-{
+
+extern "C" {
//////////////////////////////////////////////////////////////////////////
/// @brief Create JIT context.
/// @param hJitContext - Jit Context
/// @param input - Input containing LLVM IR and other information
/// @param output - Output containing information about JIT shader
-ShaderInfo* JITCALL JitCompileShader(
- HANDLE hJitContext,
- const JIT_COMPILE_INPUT& input);
+ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input);
//////////////////////////////////////////////////////////////////////////
/// @brief JIT destroy shader.
/// @param hJitContext - Jit Context
/// @param pShaderInfo - pointer to shader object.
-void JITCALL JitDestroyShader(
- HANDLE hJitContext,
- ShaderInfo*& pShaderInfo);
+void JITCALL JitDestroyShader(HANDLE hJitContext, ShaderInfo*& pShaderInfo);
//////////////////////////////////////////////////////////////////////////
/// @brief JIT compiles fetch shader
/****************************************************************************
-* Copyright (C) 2017-2018 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file jit_pch.hpp
-*
-* @brief Pre-compiled header for jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2017-2018 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file jit_pch.hpp
+ *
+ * @brief Pre-compiled header for jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "llvm/IR/LegacyPassManager.h"
using FunctionPassManager = llvm::legacy::FunctionPassManager;
-using PassManager = llvm::legacy::PassManager;
+using PassManager = llvm::legacy::PassManager;
#include "llvm/CodeGen/Passes.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/Transforms/Utils/Cloning.h"
-
#if defined(_WIN32)
#include "llvm/ADT/Triple.h"
#endif
#endif
#if LLVM_VERSION_MAJOR >= 5
-static const auto Sync_CrossThread = llvm::SyncScope::System;
-static const auto Attrib_FunctionIndex = llvm::AttributeList::FunctionIndex;
-static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, const llvm::AttrBuilder &b)
+static const auto Sync_CrossThread = llvm::SyncScope::System;
+static const auto Attrib_FunctionIndex = llvm::AttributeList::FunctionIndex;
+static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx,
+ const llvm::AttrBuilder& b)
{
return llvm::AttributeSet::get(ctx, b);
}
#else
-static const auto Sync_CrossThread = llvm::SynchronizationScope::CrossThread;
-static const auto Attrib_FunctionIndex = llvm::AttributeSet::FunctionIndex;
-static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, const llvm::AttrBuilder &b)
+static const auto Sync_CrossThread = llvm::SynchronizationScope::CrossThread;
+static const auto Attrib_FunctionIndex = llvm::AttributeSet::FunctionIndex;
+static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx,
+ const llvm::AttrBuilder& b)
{
return llvm::AttributeSet::get(ctx, Attrib_FunctionIndex, b);
}
#pragma pop_macro("DEBUG")
-
#include <deque>
#include <list>
#include <unordered_map>
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file DebugOutput.cpp
-*
-* @brief Shader support library implementation for printed Debug output
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file DebugOutput.cpp
+ *
+ * @brief Shader support library implementation for printed Debug output
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include <stdarg.h>
#include "common/os.h"
-
//////////////////////////////////////////////////////////////////////////
/// @brief called in JIT code, inserted by PRINT
/// output to both stdout and visual studio debug console
va_start(args, fmt);
vprintf(fmt, args);
-#if defined( _WIN32 )
+#if defined(_WIN32)
char strBuf[1024];
vsnprintf_s(strBuf, _TRUNCATE, fmt, args);
OutputDebugStringA(strBuf);
va_end(args);
}
-
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file streamout_jit.cpp
-*
-* @brief Implementation of the streamout jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file streamout_jit.cpp
+ *
+ * @brief Implementation of the streamout jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#include "jit_pch.hpp"
#include "builder.h"
#include "jit_api.h"
{
StreamOutJit(JitManager* pJitMgr) : Builder(pJitMgr){};
- // returns pointer to SWR_STREAMOUT_BUFFER
+ // returns pointer to SWR_STREAMOUT_BUFFER
Value* getSOBuffer(Value* pSoCtx, uint32_t buffer)
{
- return LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pBuffer, buffer });
+ return LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_pBuffer, buffer});
}
-
//////////////////////////////////////////////////////////////////////////
// @brief checks if streamout buffer is oob
// @return <i1> true/false
// load enable
// @todo bool data types should generate <i1> llvm type
- Value* enabled = TRUNC(LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_enable }), IRB()->getInt1Ty());
+ Value* enabled = TRUNC(LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_enable}), IRB()->getInt1Ty());
// load buffer size
- Value* bufferSize = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_bufferSize });
-
+ Value* bufferSize = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_bufferSize});
+
// load current streamOffset
- Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
+ Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
// load buffer pitch
- Value* pitch = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
+ Value* pitch = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
// buffer is considered oob if in use in a decl but not enabled
returnMask = OR(returnMask, NOT(enabled));
// buffer is oob if cannot fit a prims worth of verts
Value* newOffset = ADD(streamOffset, MUL(pitch, C(state.numVertsPerPrim)));
- returnMask = OR(returnMask, ICMP_SGT(newOffset, bufferSize));
+ returnMask = OR(returnMask, ICMP_SGT(newOffset, bufferSize));
return returnMask;
}
-
//////////////////////////////////////////////////////////////////////////
// @brief converts scalar bitmask to <4 x i32> suitable for shuffle vector,
// packing the active mask bits
Value* PackMask(uint32_t bitmask)
{
std::vector<Constant*> indices(4, C(0));
- DWORD index;
- uint32_t elem = 0;
+ DWORD index;
+ uint32_t elem = 0;
while (_BitScanForward(&index, bitmask))
{
indices[elem++] = C((int)index);
void buildDecl(Value* pStream, Value* pOutBuffers[4], const STREAMOUT_DECL& decl)
{
uint32_t numComponents = _mm_popcnt_u32(decl.componentMask);
- uint32_t packedMask = (1 << numComponents) - 1;
+ uint32_t packedMask = (1 << numComponents) - 1;
if (!decl.hole)
{
// increment stream pointer to correct slot
Value* pAttrib = GEP(pStream, C(4 * decl.attribSlot));
// load 4 components from stream
- Type* simd4Ty = VectorType::get(IRB()->getFloatTy(), 4);
+ Type* simd4Ty = VectorType::get(IRB()->getFloatTy(), 4);
Type* simd4PtrTy = PointerType::get(simd4Ty, 0);
- pAttrib = BITCAST(pAttrib, simd4PtrTy);
- Value *vattrib = LOAD(pAttrib);
+ pAttrib = BITCAST(pAttrib, simd4PtrTy);
+ Value* vattrib = LOAD(pAttrib);
// shuffle/pack enabled components
Value* vpackedAttrib = VSHUFFLE(vattrib, vattrib, PackMask(decl.componentMask));
}
}
- void buildStream(const STREAMOUT_COMPILE_STATE& state, const STREAMOUT_STREAM& streamState, Value* pSoCtx, BasicBlock* returnBB, Function* soFunc)
+ void buildStream(const STREAMOUT_COMPILE_STATE& state,
+ const STREAMOUT_STREAM& streamState,
+ Value* pSoCtx,
+ BasicBlock* returnBB,
+ Function* soFunc)
{
// get list of active SO buffers
std::unordered_set<uint32_t> activeSOBuffers;
}
// always increment numPrimStorageNeeded
- Value *numPrimStorageNeeded = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
- numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1));
- STORE(numPrimStorageNeeded, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
+ Value* numPrimStorageNeeded = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded});
+ numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1));
+ STORE(numPrimStorageNeeded, pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded});
// check OOB on active SO buffers. If any buffer is out of bound, don't write
// the primitive to any buffer
IRB()->SetInsertPoint(validBB);
- Value* numPrimsWritten = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
- numPrimsWritten = ADD(numPrimsWritten, C(1));
- STORE(numPrimsWritten, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
+ Value* numPrimsWritten = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimsWritten});
+ numPrimsWritten = ADD(numPrimsWritten, C(1));
+ STORE(numPrimsWritten, pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimsWritten});
// compute start pointer for each output buffer
Value* pOutBuffer[4];
Value* pOutBufferStartVertex[4];
Value* outBufferPitch[4];
- for (uint32_t b: activeSOBuffers)
+ for (uint32_t b : activeSOBuffers)
{
- Value* pBuf = getSOBuffer(pSoCtx, b);
- Value* pData = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pBuffer });
- Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
- pOutBuffer[b] = GEP(pData, streamOffset);
+ Value* pBuf = getSOBuffer(pSoCtx, b);
+ Value* pData = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
+ Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
+ pOutBuffer[b] = GEP(pData, streamOffset);
pOutBufferStartVertex[b] = pOutBuffer[b];
- outBufferPitch[b] = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
+ outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
}
// loop over the vertices of the prim
- Value* pStreamData = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pPrimData });
+ Value* pStreamData = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_pPrimData});
for (uint32_t v = 0; v < state.numVertsPerPrim; ++v)
{
buildVertex(streamState, pStreamData, pOutBuffer);
for (uint32_t b : activeSOBuffers)
{
pOutBufferStartVertex[b] = GEP(pOutBufferStartVertex[b], outBufferPitch[b]);
- pOutBuffer[b] = pOutBufferStartVertex[b];
+ pOutBuffer[b] = pOutBufferStartVertex[b];
}
}
// update each active buffer's streamOffset
for (uint32_t b : activeSOBuffers)
{
- Value* pBuf = getSOBuffer(pSoCtx, b);
- Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
+ Value* pBuf = getSOBuffer(pSoCtx, b);
+ Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
streamOffset = ADD(streamOffset, MUL(C(state.numVertsPerPrim), outBufferPitch[b]));
- STORE(streamOffset, pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
+ STORE(streamOffset, pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
}
}
Function* Create(const STREAMOUT_COMPILE_STATE& state)
{
- std::stringstream fnName("SO_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
+ std::stringstream fnName("SO_",
+ std::ios_base::in | std::ios_base::out | std::ios_base::ate);
fnName << ComputeCRC(0, &state, sizeof(state));
// SO function signature
PointerType::get(Gen_SWR_STREAMOUT_CONTEXT(JM()), 0), // SWR_STREAMOUT_CONTEXT*
};
- FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
- Function* soFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
+ FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
+ Function* soFunc = Function::Create(
+ fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
soFunc->getParent()->setModuleIdentifier(soFunc->getName());
// create return basic block
- BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc);
+ BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc);
BasicBlock* returnBB = BasicBlock::Create(JM()->mContext, "return", soFunc);
IRB()->SetInsertPoint(entry);
// arguments
- auto argitr = soFunc->arg_begin();
+ auto argitr = soFunc->arg_begin();
Value* pSoCtx = &*argitr++;
pSoCtx->setName("pSoCtx");
/// @return PFN_SO_FUNC - pointer to SOS function
PFN_SO_FUNC JitStreamoutFunc(HANDLE hJitMgr, const HANDLE hFunc)
{
- llvm::Function *func = (llvm::Function*)hFunc;
- JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
- PFN_SO_FUNC pfnStreamOut;
+ llvm::Function* func = (llvm::Function*)hFunc;
+ JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
+ PFN_SO_FUNC pfnStreamOut;
pfnStreamOut = (PFN_SO_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
- // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
+ // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
+ // add new IR to the module
pJitMgr->mIsModuleFinalized = true;
pJitMgr->DumpAsm(func, "SoFunc_optimized");
/// @brief JIT compiles streamout shader
/// @param hJitMgr - JitManager handle
/// @param state - SO state to build function from
-extern "C" PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitMgr, const STREAMOUT_COMPILE_STATE& state)
+extern "C" PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitMgr,
+ const STREAMOUT_COMPILE_STATE& state)
{
JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
pJitMgr->SetupNewModule();
StreamOutJit theJit(pJitMgr);
- HANDLE hFunc = theJit.Create(soState);
+ HANDLE hFunc = theJit.Create(soState);
return JitStreamoutFunc(hJitMgr, hFunc);
}
/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file streamout_jit.h
-*
-* @brief Definition of the streamout jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file streamout_jit.h
+ *
+ * @brief Definition of the streamout jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
#pragma once
#include "common/formats.h"
// attribute to stream
uint32_t attribSlot;
- // attribute component mask
+ // attribute component mask
uint32_t componentMask;
// indicates this decl is a hole
{
// number of verts per primitive
uint32_t numVertsPerPrim;
- uint32_t offsetAttribs; ///< attrib offset to subtract from all STREAMOUT_DECL::attribSlot values.
+ uint32_t
+ offsetAttribs; ///< attrib offset to subtract from all STREAMOUT_DECL::attribSlot values.
uint64_t streamMask;
// stream decls
STREAMOUT_STREAM stream;
- bool operator==(const STREAMOUT_COMPILE_STATE &other) const
+ bool operator==(const STREAMOUT_COMPILE_STATE& other) const
{
- if (numVertsPerPrim != other.numVertsPerPrim) return false;
- if (stream.numDecls != other.stream.numDecls) return false;
+ if (numVertsPerPrim != other.numVertsPerPrim)
+ return false;
+ if (stream.numDecls != other.stream.numDecls)
+ return false;
for (uint32_t i = 0; i < stream.numDecls; ++i)
{
- if (stream.decl[i].bufferIndex != other.stream.decl[i].bufferIndex) return false;
- if (stream.decl[i].attribSlot != other.stream.decl[i].attribSlot) return false;
- if (stream.decl[i].componentMask != other.stream.decl[i].componentMask) return false;
- if (stream.decl[i].hole != other.stream.decl[i].hole) return false;
+ if (stream.decl[i].bufferIndex != other.stream.decl[i].bufferIndex)
+ return false;
+ if (stream.decl[i].attribSlot != other.stream.decl[i].attribSlot)
+ return false;
+ if (stream.decl[i].componentMask != other.stream.decl[i].componentMask)
+ return false;
+ if (stream.decl[i].hole != other.stream.decl[i].hole)
+ return false;
}
return true;