swr/rast: Clang-Format most rasterizer source code

author Alok Hota <alok.hota@intel.com>

Tue, 5 Jun 2018 18:59:53 +0000 (13:59 -0500)

committer Tim Rowley <timothy.o.rowley@intel.com>

Mon, 18 Jun 2018 18:57:38 +0000 (13:57 -0500)
author Alok Hota <alok.hota@intel.com>
Tue, 5 Jun 2018 18:59:53 +0000 (13:59 -0500)
committer Tim Rowley <timothy.o.rowley@intel.com>
Mon, 18 Jun 2018 18:57:38 +0000 (13:57 -0500)
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp

index 502835ca80130fd4d46f710fa9d61fd10c08f7dd..ceb06ae471f32a5852b82df7556ab22ed11ae8ba 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file archrast.cpp
-*
-* @brief Implementation for archrast.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file archrast.cpp
+ *
+ * @brief Implementation for archrast.
+ *
+ ******************************************************************************/
  #include <atomic>
  
  #include "common/os.h"
@@ -38,14 +38,14 @@ namespace ArchRast
      /// @brief struct that keeps track of depth and stencil event information
      struct DepthStencilStats
      {
-        uint32_t earlyZTestPassCount = 0;
-        uint32_t earlyZTestFailCount = 0;
-        uint32_t lateZTestPassCount = 0;
-        uint32_t lateZTestFailCount = 0;
+        uint32_t earlyZTestPassCount       = 0;
+        uint32_t earlyZTestFailCount       = 0;
+        uint32_t lateZTestPassCount        = 0;
+        uint32_t lateZTestFailCount        = 0;
          uint32_t earlyStencilTestPassCount = 0;
          uint32_t earlyStencilTestFailCount = 0;
-        uint32_t lateStencilTestPassCount = 0;
-        uint32_t lateStencilTestFailCount = 0;
+        uint32_t lateStencilTestPassCount  = 0;
+        uint32_t lateStencilTestFailCount  = 0;
      };
  
      struct CStats
@@ -76,12 +76,12 @@ namespace ArchRast
      struct CullStats
      {
          uint32_t degeneratePrimCount = 0;
-        uint32_t backfacePrimCount = 0;
+        uint32_t backfacePrimCount   = 0;
      };
  
      struct AlphaStats
      {
-        uint32_t alphaTestCount = 0;
+        uint32_t alphaTestCount  = 0;
          uint32_t alphaBlendCount = 0;
      };
  
@@ -93,20 +93,26 @@ namespace ArchRast
      class EventHandlerApiStats : public EventHandlerFile
      {
      public:
-        EventHandlerApiStats(uint32_t id) : EventHandlerFile(id) {
+        EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
+        {
  #if defined(_WIN32)
-            // Attempt to copy the events.proto file to the ArchRast output dir. It's common for tools to place the events.proto file
-            // in the DEBUG_OUTPUT_DIR when launching AR. If it exists, this will attempt to copy it the first time we get here to package
-            // it with the stats. Otherwise, the user would need to specify the events.proto location when parsing the stats in post.
+            // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
+            // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
+            // exists, this will attempt to copy it the first time we get here to package it with
+            // the stats. Otherwise, the user would need to specify the events.proto location when
+            // parsing the stats in post.
              std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
              eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
-            eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1) << "\\events.proto" << std::ends;
+            eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
+                                   << "\\events.proto" << std::ends;
  
              // If event.proto already exists, we're done; else do the copy
              struct stat buf; // Use a Posix stat for file existence check
-            if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0) {
+            if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
+            {
                  // Now check to make sure the events.proto source exists
-                if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0) {
+                if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
+                {
                      std::ifstream srcFile;
                      srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
                      if (srcFile.is_open())
@@ -125,18 +131,40 @@ namespace ArchRast
  
          virtual void Handle(const DrawInstancedEvent& event)
          {
-            DrawInfoEvent e(event.data.drawId, ArchRast::Instanced, event.data.topology, 
-                event.data.numVertices, 0, 0, event.data.startVertex, event.data.numInstances, 
-                event.data.startInstance, event.data.tsEnable, event.data.gsEnable, event.data.soEnable, event.data.soTopology, event.data.splitId);
-            
+            DrawInfoEvent e(event.data.drawId,
+                            ArchRast::Instanced,
+                            event.data.topology,
+                            event.data.numVertices,
+                            0,
+                            0,
+                            event.data.startVertex,
+                            event.data.numInstances,
+                            event.data.startInstance,
+                            event.data.tsEnable,
+                            event.data.gsEnable,
+                            event.data.soEnable,
+                            event.data.soTopology,
+                            event.data.splitId);
+
              EventHandlerFile::Handle(e);
          }
  
          virtual void Handle(const DrawIndexedInstancedEvent& event)
          {
-            DrawInfoEvent e(event.data.drawId, ArchRast::IndexedInstanced, event.data.topology, 0,
-                event.data.numIndices, event.data.indexOffset, event.data.baseVertex, event.data.numInstances,
-                event.data.startInstance, event.data.tsEnable, event.data.gsEnable, event.data.soEnable, event.data.soTopology, event.data.splitId);
+            DrawInfoEvent e(event.data.drawId,
+                            ArchRast::IndexedInstanced,
+                            event.data.topology,
+                            0,
+                            event.data.numIndices,
+                            event.data.indexOffset,
+                            event.data.baseVertex,
+                            event.data.numInstances,
+                            event.data.startInstance,
+                            event.data.tsEnable,
+                            event.data.gsEnable,
+                            event.data.soEnable,
+                            event.data.soTopology,
+                            event.data.splitId);
  
              EventHandlerFile::Handle(e);
          }
@@ -156,127 +184,148 @@ namespace ArchRast
  
          virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
          {
-            //earlyZ test compute
+            // earlyZ test compute
              mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSSingleSample.earlyZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //earlyStencil test compute
+            // earlyStencil test compute
              mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSSingleSample.earlyStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
-            //earlyZ test single and multi sample
+            // earlyZ test single and multi sample
              mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSCombined.earlyZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //earlyStencil test single and multi sample
+            // earlyStencil test single and multi sample
              mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSCombined.earlyStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
              mNeedFlush = true;
          }
  
          virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
          {
-            //earlyZ test compute
+            // earlyZ test compute
              mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSSampleRate.earlyZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //earlyStencil test compute
+            // earlyStencil test compute
              mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSSampleRate.earlyStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
-            //earlyZ test single and multi sample
+            // earlyZ test single and multi sample
              mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSCombined.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSCombined.earlyZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //earlyStencil test single and multi sample
+            // earlyStencil test single and multi sample
              mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSCombined.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSCombined.earlyStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
              mNeedFlush = true;
          }
  
          virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
          {
-            //earlyZ test compute
+            // earlyZ test compute
              mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSNullPS.earlyZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //earlyStencil test compute
+            // earlyStencil test compute
              mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSNullPS.earlyStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
              mNeedFlush = true;
          }
  
          virtual void Handle(const LateDepthStencilInfoSingleSample& event)
          {
-            //lateZ test compute
+            // lateZ test compute
              mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSSingleSample.lateZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //lateStencil test compute
+            // lateStencil test compute
              mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSSingleSample.lateStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
-            //lateZ test single and multi sample
+            // lateZ test single and multi sample
              mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSCombined.lateZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //lateStencil test single and multi sample
+            // lateStencil test single and multi sample
              mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSCombined.lateStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
              mNeedFlush = true;
          }
  
          virtual void Handle(const LateDepthStencilInfoSampleRate& event)
          {
-            //lateZ test compute
+            // lateZ test compute
              mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSSampleRate.lateZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //lateStencil test compute
+            // lateStencil test compute
              mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSSampleRate.lateStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
-
-            //lateZ test single and multi sample
+            // lateZ test single and multi sample
              mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSCombined.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSCombined.lateZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //lateStencil test single and multi sample
+            // lateStencil test single and multi sample
              mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSCombined.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSCombined.lateStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
  
              mNeedFlush = true;
          }
  
          virtual void Handle(const LateDepthStencilInfoNullPS& event)
          {
-            //lateZ test compute
+            // lateZ test compute
              mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
-            mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
+            mDSNullPS.lateZTestFailCount +=
+                _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
  
-            //lateStencil test compute
+            // lateStencil test compute
              mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
-            mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
+            mDSNullPS.lateStencilTestFailCount +=
+                _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
              mNeedFlush = true;
          }
  
          virtual void Handle(const EarlyDepthInfoPixelRate& event)
          {
-            //earlyZ test compute
+            // earlyZ test compute
              mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
-            mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+            mDSPixelRate.earlyZTestFailCount +=
+                (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
              mNeedFlush = true;
          }
  
  
          virtual void Handle(const LateDepthInfoPixelRate& event)
          {
-            //lateZ test compute
+            // lateZ test compute
              mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
-            mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
+            mDSPixelRate.lateZTestFailCount +=
+                (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
              mNeedFlush = true;
          }
  
@@ -284,8 +333,10 @@ namespace ArchRast
          virtual void Handle(const ClipInfoEvent& event)
          {
              mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
-            mClipper.trivialRejectCount += event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
-            mClipper.trivialAcceptCount += _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
+            mClipper.trivialRejectCount +=
+                event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
+            mClipper.trivialAcceptCount +=
+                _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
          }
  
          struct ShaderStats
@@ -328,58 +379,86 @@ namespace ArchRast
          // Flush cached events for this draw
          virtual void FlushDraw(uint32_t drawId)
          {
-            if (mNeedFlush == false) return;
+            if (mNeedFlush == false)
+                return;
  
              EventHandlerFile::Handle(PSInfo(drawId, mShaderStats[SHADER_PIXEL].numInstExecuted));
              EventHandlerFile::Handle(CSInfo(drawId, mShaderStats[SHADER_COMPUTE].numInstExecuted));
  
-            //singleSample
-            EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
-            EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
-            EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
-            EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
-
-            //sampleRate
-            EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
-            EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
-            EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
-            EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
-
-            //combined
-            EventHandlerFile::Handle(EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
-            EventHandlerFile::Handle(LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
-            EventHandlerFile::Handle(EarlyStencil(drawId, mDSCombined.earlyStencilTestPassCount, mDSCombined.earlyStencilTestFailCount));
-            EventHandlerFile::Handle(LateStencil(drawId, mDSCombined.lateStencilTestPassCount, mDSCombined.lateStencilTestFailCount));
-
-            //pixelRate
-            EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
-            EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
-
-
-            //NullPS
-            EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
-            EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
+            // singleSample
+            EventHandlerFile::Handle(EarlyZSingleSample(
+                drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZSingleSample(
+                drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
+            EventHandlerFile::Handle(
+                EarlyStencilSingleSample(drawId,
+                                         mDSSingleSample.earlyStencilTestPassCount,
+                                         mDSSingleSample.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(
+                LateStencilSingleSample(drawId,
+                                        mDSSingleSample.lateStencilTestPassCount,
+                                        mDSSingleSample.lateStencilTestFailCount));
+
+            // sampleRate
+            EventHandlerFile::Handle(EarlyZSampleRate(
+                drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZSampleRate(
+                drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
+            EventHandlerFile::Handle(
+                EarlyStencilSampleRate(drawId,
+                                       mDSSampleRate.earlyStencilTestPassCount,
+                                       mDSSampleRate.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(LateStencilSampleRate(drawId,
+                                                           mDSSampleRate.lateStencilTestPassCount,
+                                                           mDSSampleRate.lateStencilTestFailCount));
+
+            // combined
+            EventHandlerFile::Handle(
+                EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
+            EventHandlerFile::Handle(
+                LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencil(drawId,
+                                                  mDSCombined.earlyStencilTestPassCount,
+                                                  mDSCombined.earlyStencilTestFailCount));
+            EventHandlerFile::Handle(LateStencil(drawId,
+                                                 mDSCombined.lateStencilTestPassCount,
+                                                 mDSCombined.lateStencilTestFailCount));
+
+            // pixelRate
+            EventHandlerFile::Handle(EarlyZPixelRate(
+                drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
+            EventHandlerFile::Handle(LateZPixelRate(
+                drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
+
+
+            // NullPS
+            EventHandlerFile::Handle(
+                EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
+            EventHandlerFile::Handle(EarlyStencilNullPS(
+                drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
  
              // Rasterized Subspans
              EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
  
              // Alpha Subspans
-            EventHandlerFile::Handle(AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
+            EventHandlerFile::Handle(
+                AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
  
              // Primitive Culling
-            EventHandlerFile::Handle(CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
+            EventHandlerFile::Handle(
+                CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
  
              mDSSingleSample = {};
-            mDSSampleRate = {};
-            mDSCombined = {};
-            mDSPixelRate = {};
+            mDSSampleRate   = {};
+            mDSCombined     = {};
+            mDSPixelRate    = {};
              mDSNullPS = {};
  
-            rastStats = {};
-            mCullStats = {};
+            rastStats   = {};
+            mCullStats  = {};
              mAlphaStats = {};
  
-            mShaderStats[SHADER_PIXEL] = {};
+            mShaderStats[SHADER_PIXEL]   = {};
              mShaderStats[SHADER_COMPUTE] = {};
  
              mNeedFlush = false;
@@ -387,31 +466,38 @@ namespace ArchRast
  
          virtual void Handle(const FrontendDrawEndEvent& event)
          {
-            //Clipper
-            EventHandlerFile::Handle(ClipperEvent(event.data.drawId, mClipper.trivialRejectCount, mClipper.trivialAcceptCount, mClipper.mustClipCount));
+            // Clipper
+            EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
+                                                  mClipper.trivialRejectCount,
+                                                  mClipper.trivialAcceptCount,
+                                                  mClipper.mustClipCount));
  
-            //Tesselator
+            // Tesselator
              EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
  
-            //Geometry Shader
+            // Geometry Shader
              EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
              EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
              EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
  
-            EventHandlerFile::Handle(VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
-            EventHandlerFile::Handle(HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
-            EventHandlerFile::Handle(DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
-            EventHandlerFile::Handle(GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
-
-            mShaderStats[SHADER_VERTEX] = {};
-            mShaderStats[SHADER_HULL] = {};
-            mShaderStats[SHADER_DOMAIN] = {};
+            EventHandlerFile::Handle(
+                VSInfo(event.data.drawId, mShaderStats[SHADER_VERTEX].numInstExecuted));
+            EventHandlerFile::Handle(
+                HSInfo(event.data.drawId, mShaderStats[SHADER_HULL].numInstExecuted));
+            EventHandlerFile::Handle(
+                DSInfo(event.data.drawId, mShaderStats[SHADER_DOMAIN].numInstExecuted));
+            EventHandlerFile::Handle(
+                GSInfo(event.data.drawId, mShaderStats[SHADER_GEOMETRY].numInstExecuted));
+
+            mShaderStats[SHADER_VERTEX]   = {};
+            mShaderStats[SHADER_HULL]     = {};
+            mShaderStats[SHADER_DOMAIN]   = {};
              mShaderStats[SHADER_GEOMETRY] = {};
  
-            //Reset Internal Counters
+            // Reset Internal Counters
              mClipper = {};
-            mTS = {};
-            mGS = {};
+            mTS      = {};
+            mGS      = {};
          }
  
          virtual void Handle(const GSPrimInfo& event)
@@ -421,10 +507,7 @@ namespace ArchRast
              mGS.vertsInput += event.data.vertsInput;
          }
  
-        virtual void Handle(const TessPrimCount& event)
-        {
-            mTS.inputPrims += event.data.primCount;
-        }
+        virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
  
          virtual void Handle(const RasterTileCount& event)
          {
@@ -433,13 +516,15 @@ namespace ArchRast
  
          virtual void Handle(const CullInfoEvent& event)
          {
-            mCullStats.degeneratePrimCount += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
-            mCullStats.backfacePrimCount   += _mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
+            mCullStats.degeneratePrimCount += _mm_popcnt_u32(
+                event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
+            mCullStats.backfacePrimCount += _mm_popcnt_u32(
+                event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
          }
  
          virtual void Handle(const AlphaInfoEvent& event)
          {
-            mAlphaStats.alphaTestCount  += event.data.alphaTestEnable;
+            mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
              mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
          }
  
@@ -447,17 +532,17 @@ namespace ArchRast
          bool mNeedFlush;
          // Per draw stats
          DepthStencilStats mDSSingleSample = {};
-        DepthStencilStats mDSSampleRate = {};
-        DepthStencilStats mDSPixelRate = {};
-        DepthStencilStats mDSCombined = {};
-        DepthStencilStats mDSNullPS = {};
-        DepthStencilStats mDSOmZ = {};
-        CStats mClipper = {};
-        TEStats mTS = {};
-        GSStateInfo mGS = {};
-        RastStats rastStats = {};
-        CullStats mCullStats = {};
-        AlphaStats mAlphaStats = {};
+        DepthStencilStats mDSSampleRate   = {};
+        DepthStencilStats mDSPixelRate    = {};
+        DepthStencilStats mDSCombined     = {};
+        DepthStencilStats mDSNullPS       = {};
+        DepthStencilStats mDSOmZ          = {};
+        CStats            mClipper        = {};
+        TEStats           mTS             = {};
+        GSStateInfo       mGS             = {};
+        RastStats         rastStats       = {};
+        CullStats         mCullStats      = {};
+        AlphaStats        mAlphaStats     = {};
  
          ShaderStats mShaderStats[NUM_SHADER_TYPES];
  
@@ -473,7 +558,7 @@ namespace ArchRast
      {
          // Can we assume single threaded here?
          static std::atomic<uint32_t> counter(0);
-        uint32_t id = counter.fetch_add(1);
+        uint32_t                     id = counter.fetch_add(1);
  
          EventManager* pManager = new EventManager();
  
@@ -528,4 +613,4 @@ namespace ArchRast
  
          pManager->FlushDraw(drawId);
      }
-}
+} // namespace ArchRast
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h

index c74d6ad9097d2c67a64a4dd56dbb52851da01f83..d42c197bcda13ce42ec72fd4f6767774b063b22b 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file archrast.h
-*
-* @brief Definitions for archrast.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file archrast.h
+ *
+ * @brief Definitions for archrast.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "common/os.h"
@@ -35,15 +35,14 @@ namespace ArchRast
  {
      enum class AR_THREAD
      {
-        API = 0,
+        API    = 0,
          WORKER = 1
      };
  
      HANDLE CreateThreadContext(AR_THREAD type);
-    void DestroyThreadContext(HANDLE hThreadContext);
+    void   DestroyThreadContext(HANDLE hThreadContext);
  
      // Dispatch event for this thread.
      void Dispatch(HANDLE hThreadContext, const Event& event);
      void FlushDraw(HANDLE hThreadContext, uint32_t drawId);
-};
-
+}; // namespace ArchRast
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h b/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h

index 10e0dce6ad9649b5a9b0fb8adb952ede24284605..118a100e850eec296492811411f97b980da6c293 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
+++ b/src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file archrast.h
-*
-* @brief Definitions for the event manager.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file archrast.h
+ *
+ * @brief Definitions for the event manager.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "common/os.h"
@@ -78,12 +78,11 @@ namespace ArchRast
                  pHandler->FlushDraw(drawId);
              }
          }
-    private:
  
+    private:
          // Handlers stay registered for life
          void Detach(EventHandler* pHandler) { SWR_INVALID("Should not be called"); }
  
          std::vector<EventHandler*> mHandlers;
      };
-};
-
+}; // namespace ArchRast
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp

index 1ecb455c3a1c051a8cfe53677ecb4d7bd8265a31..e696dd2096ad7eb38d8d4a710d7ef4b3cb0cbcd7 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp
@@ -1,35 +1,36 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Implementation for events.  auto-generated file
-* 
-* DO NOT EDIT
-*
-* Generation Command Line:
-*  ${'\n*    '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Implementation for events.  auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ *  ${'\n *    '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
  #include "common/os.h"
  #include "gen_ar_event.hpp"
  #include "gen_ar_eventhandler.hpp"
@@ -42,3 +43,5 @@ void ${name}::Accept(EventHandler* pHandler) const
      pHandler->Handle(*this);
  }
  % endfor
+// clan-format on
+
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp

index 685a10b3867238902e84dc52e0c5649f759c3675..fe3f261f68093150cd7b7eef89f5b615d145b2fb 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp
@@ -1,35 +1,36 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Definitions for events.  auto-generated file
-* 
-* DO NOT EDIT
-*
-* Generation Command Line:
-*  ${'\n*    '.join(cmdline)}
-* 
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Definitions for events.  auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ *  ${'\n *    '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
  #pragma once
  
  #include "common/os.h"
@@ -46,7 +47,7 @@ namespace ArchRast
      };
  % endfor
  
-    //Forward decl
+    // Forward decl
      class EventHandler;
  
      //////////////////////////////////////////////////////////////////////////
@@ -104,5 +105,6 @@ namespace ArchRast
  
          virtual void Accept(EventHandler* pHandler) const;
      };
-% endfor
-}
-\ No newline at end of file
+    % endfor
+} // namespace ArchRast
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp

index 87d0ef47cab3fb4a240dbba350e3924b809d841f..140dd00dbeb963929a9818cf5fd63d12db8ddbbd 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp
@@ -1,35 +1,36 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Event handler interface.  auto-generated file
-* 
-* DO NOT EDIT
-*
-* Generation Command Line:
-*  ${'\n*    '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Event handler interface.  auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ *  ${'\n *    '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format on
  #pragma once
  
  #include "${event_header}"
@@ -51,4 +52,5 @@ namespace ArchRast
          virtual void Handle(const ${name}& event) {}
  % endfor
      };
-}
+} // namespace ArchRast
+// clan-format off
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp

index 79612f312086feeaef2ce02340f2a7ab2c53be43..7c10e124c3cd016d148ee0a56860a599c5319fa2 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp
@@ -1,35 +1,36 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief Event handler interface.  auto-generated file
-*
-* DO NOT EDIT
-*
-* Generation Command Line:
-*  ${'\n*    '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief Event handler interface.  auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ *  ${'\n *    '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
  #pragma once
  
  #include "common/os.h"
@@ -47,19 +48,22 @@ namespace ArchRast
      class EventHandlerFile : public EventHandler
      {
      public:
-        EventHandlerFile(uint32_t id)
-        : mBufOffset(0)
+        EventHandlerFile(uint32_t id) : mBufOffset(0)
          {
  #if defined(_WIN32)
              DWORD pid = GetCurrentProcessId();
              TCHAR procname[MAX_PATH];
              GetModuleFileName(NULL, procname, MAX_PATH);
-            const char* pBaseName = strrchr(procname, '\\');
+            const char*       pBaseName = strrchr(procname, '\\');
              std::stringstream outDir;
              outDir << KNOB_DEBUG_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
              mOutputDir = outDir.str();
-            if (CreateDirectory(mOutputDir.c_str(), NULL)) {
-                std::cout << std::endl << "ArchRast Dir:       " << mOutputDir << std::endl << std::endl << std::flush;
+            if (CreateDirectory(mOutputDir.c_str(), NULL))
+            {
+                std::cout << std::endl
+                          << "ArchRast Dir:       " << mOutputDir << std::endl
+                          << std::endl
+                          << std::flush;
              }
  
              // There could be multiple threads creating thread pools. We
@@ -80,10 +84,7 @@ namespace ArchRast
  #endif
          }
  
-        virtual ~EventHandlerFile()
-        {
-            FlushBuffer();
-        }
+        virtual ~EventHandlerFile() { FlushBuffer(); }
  
          //////////////////////////////////////////////////////////////////////////
          /// @brief Flush buffer to file.
@@ -109,7 +110,7 @@ namespace ArchRast
                  file.write((char*)mBuffer, mBufOffset);
                  file.close();
  
-                mBufOffset = 0;
+                mBufOffset       = 0;
                  mHeaderBufOffset = 0; // Reset header offset so its no longer considered.
              }
              return true;
@@ -124,7 +125,8 @@ namespace ArchRast
                  if (!FlushBuffer())
                  {
                      // Don't corrupt what's already in the buffer?
-                    /// @todo Maybe add corrupt marker to buffer here in case we can open file in future?
+                    /// @todo Maybe add corrupt marker to buffer here in case we can open file in
+                    /// future?
                      return;
                  }
              }
@@ -159,8 +161,9 @@ namespace ArchRast
          std::string mOutputDir;
  
          static const uint32_t mBufferSize = 1024;
-        uint8_t mBuffer[mBufferSize];
+        uint8_t               mBuffer[mBufferSize];
          uint32_t mBufOffset{0};
          uint32_t mHeaderBufOffset{0};
      };
-}
+} // namespace ArchRast
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp

index 088b1cd79d5fb12c17e89f8b3d771352c22bcfe5..b8da5298f3de398790cba3714e784662f0a0b084 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp
@@ -19,11 +19,11 @@
  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  // IN THE SOFTWARE.
-// 
+//
  // @file BackendPixelRate${fileNum}.cpp
-// 
+//
  // @brief auto-generated file
-// 
+//
  // DO NOT EDIT
  //
  // Generation Command Line:
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp

index bcbcb30cc14c5991d35802e7655391f83e66a19c..5182bc4259f0e5efc3dab458d4598537506dfa89 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp
@@ -30,6 +30,7 @@
  //  ${'\n//    '.join(cmdline)}
  //
  //============================================================================
+// clang-format off
  #pragma once
  
  //============================================================================
@@ -57,10 +58,10 @@ ${func['decl']}
      %for arg in func['types']:
      args.push_back(${arg}->getType());
      %endfor
-    Function * pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']}, args);
+    Function* pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']}, args);
      return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
      %else:
-    Function * pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']});
+    Function* pFunc = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::${func['intrin']});
      return CALL(pFunc, std::initializer_list<Value*>{${argList}}, name);
      %endif
  %else:
@@ -68,4 +69,5 @@ ${func['decl']}
  %endif
  }
  
-%endfor
+% endfor
+    // clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp

index 5625ef8a0de0aa106e9c25f4f260ab3b868f08c5..d0682c55f03babd92b64b035143a1000ac789001 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp
@@ -19,11 +19,11 @@
  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  // IN THE SOFTWARE.
-// 
+//
  // @file ${filename}
-// 
+//
  // @brief auto-generated file
-// 
+//
  // DO NOT EDIT
  //
  // Generation Command Line:
@@ -31,6 +31,8 @@
  //
  //============================================================================
  
+// clang-format off
+
  %for num in range(numFiles):
  void Init${tableName}${num}();
  %endfor
@@ -41,3 +43,4 @@ static INLINE void Init${tableName}()
      Init${tableName}${num}();
      %endfor
  }
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp

index cfdc37072e5d6257ff18ee34866db235da438513..9375569ebeb131e04bdd9163bbaef9c76c230712 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
@@ -1,35 +1,36 @@
  /******************************************************************************
-* Copyright (C) 2015-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}.cpp
-*
-* @brief Dynamic Knobs for Core.
-*
-* ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
-*
-* Generation Command Line:
-*  ${'\n*    '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2015-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}.cpp
+ *
+ * @brief Dynamic Knobs for Core.
+ *
+ * ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
+ *
+ * Generation Command Line:
+ *  ${'\n *    '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
  <% calc_max_knob_len(knobs) %>
  % for inc in includes:
  #include <${inc}>
@@ -40,13 +41,14 @@
  //========================================================
  // Implementation
  //========================================================
-void KnobBase::autoExpandEnvironmentVariables(std::string &text)
+void KnobBase::autoExpandEnvironmentVariables(std::string& text)
  {
  #if (__GNUC__) && (GCC_VERSION < 409000)
      // <regex> isn't implemented prior to gcc-4.9.0
      // unix style variable replacement
      size_t start;
-    while ((start = text.find("${'${'}")) != std::string::npos) {
+    while ((start = text.find("${'${'}")) != std::string::npos)
+    {
          size_t end = text.find("}");
          if (end == std::string::npos)
              break;
@@ -54,7 +56,8 @@ void KnobBase::autoExpandEnvironmentVariables(std::string &text)
          text.replace(start, end - start + 1, var);
      }
      // win32 style variable replacement
-    while ((start = text.find("%")) != std::string::npos) {
+    while ((start = text.find("%")) != std::string::npos)
+    {
          size_t end = text.find("%", start + 1);
          if (end == std::string::npos)
              break;
@@ -65,7 +68,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string &text)
      {
          // unix style variable replacement
          static std::regex env("\\$\\{([^}]+)\\}");
-        std::smatch match;
+        std::smatch       match;
          while (std::regex_search(text, match, env))
          {
              const std::string var = GetEnv(match[1].str());
@@ -77,7 +80,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string &text)
      {
          // win32 style variable replacement
          static std::regex env("\\%([^}]+)\\%");
-        std::smatch match;
+        std::smatch       match;
          while (std::regex_search(text, match, env))
          {
              const std::string var = GetEnv(match[1].str());
@@ -89,7 +92,6 @@ void KnobBase::autoExpandEnvironmentVariables(std::string &text)
  #endif
  }
  
-
  //========================================================
  // Static Data Members
  //========================================================
@@ -113,7 +115,10 @@ std::string GlobalKnobs::ToString(const char* optPerLinePrefix)
      std::basic_stringstream<char> str;
      str << std::showbase << std::setprecision(1) << std::fixed;
  
-    if (optPerLinePrefix == nullptr) { optPerLinePrefix = ""; }
+    if (optPerLinePrefix == nullptr)
+    {
+        optPerLinePrefix = "";
+    }
  
      % for knob in knobs:
      str << optPerLinePrefix << "KNOB_${knob[0]}:${space_knob(knob[0])}";
@@ -157,3 +162,4 @@ std::string GlobalKnobs::ToString(const char* optPerLinePrefix)
          name_len = len(name)
          return ' '*(max_len - name_len)
  %>
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h

index 4213f334433d61ea827680283963def549b79ea1..71dbdacfd1d4bbb7783b68e3c07eb3222f39c6ad 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h
@@ -1,35 +1,36 @@
  /******************************************************************************
-* Copyright (C) 2015-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}.h
-*
-* @brief Dynamic Knobs for Core.
-*
-* ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
-*
-* Generation Command Line:
-*  ${'\n*    '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2015-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}.h
+ *
+ * @brief Dynamic Knobs for Core.
+ *
+ * ======================= AUTO GENERATED: DO NOT EDIT !!! ====================
+ *
+ * Generation Command Line:
+ *  ${'\n *    '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
  <% calc_max_knob_len(knobs) %>
  #pragma once
  #include <string>
@@ -38,11 +39,11 @@ struct KnobBase
  {
  private:
      // Update the input string.
-    static void autoExpandEnvironmentVariables(std::string &text);
+    static void autoExpandEnvironmentVariables(std::string& text);
  
  protected:
      // Leave input alone and return new string.
-    static std::string expandEnvironmentVariables(std::string const &input)
+    static std::string expandEnvironmentVariables(std::string const& input)
      {
          std::string text = input;
          autoExpandEnvironmentVariables(text);
@@ -50,7 +51,7 @@ protected:
      }
  
      template <typename T>
-    static T expandEnvironmentVariables(T const &input)
+    static T expandEnvironmentVariables(T const& input)
      {
          return input;
      }
@@ -60,8 +61,8 @@ template <typename T>
  struct Knob : KnobBase
  {
  public:
-    const   T&  Value() const               { return m_Value; }
-    const   T&  Value(T const &newValue)
+    const T& Value() const { return m_Value; }
+    const T& Value(T const& newValue)
      {
          m_Value = expandEnvironmentVariables(newValue);
          return Value();
@@ -150,3 +151,4 @@ extern GlobalKnobs g_GlobalKnobs;
          name_len = len(name)
          return ' '*(max_len - name_len)
  %>
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp

index 190e660ad1cfaccf5630ca91655a7b23acba99b1..df2934fa6157786921bdf9931fdc98deb91010e0 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp
@@ -1,35 +1,37 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file ${filename}
-*
-* @brief auto-generated file
-*
-* DO NOT EDIT
-*
-* Generation Command Line:
-*   ${'\n*     '.join(cmdline)}
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file ${filename}
+ *
+ * @brief auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ * Generation Command Line:
+ *   ${'\n *     '.join(cmdline)}
+ *
+ ******************************************************************************/
+// clang-format off
+
  #pragma once
  
  namespace SwrJit
@@ -37,7 +39,7 @@ namespace SwrJit
      using namespace llvm;
  
  %for type in types:
-    INLINE static StructType *Gen_${type['name']}(JitManager* pJitMgr)
+    INLINE static StructType* Gen_${type['name']}(JitManager* pJitMgr)
      {
          %if needs_ctx(type):
          LLVMContext& ctx = pJitMgr->mContext;
@@ -76,7 +78,7 @@ namespace SwrJit
      %endfor
  
  %endfor
-} // ns SwrJit
+} // namespace SwrJit
  
  <%! # Global function definitions
      import os
@@ -98,3 +100,4 @@ namespace SwrJit
          pad_amt = max_len - cur_len
          return ' '*pad_amt
  %>
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp

index 06c876231b930d34a1bb997c93f33f7fbceb5ee8..92e0f40623554aab5c9e22227ec85eb9726c1222 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp
@@ -19,17 +19,18 @@
  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  // IN THE SOFTWARE.
-// 
+//
  // @file gen_rasterizer${fileNum}.cpp
-// 
+//
  // @brief auto-generated file
-// 
+//
  // DO NOT EDIT
  //
  // Generation Command Line:
  //  ${'\n//    '.join(cmdline)}
  //
  //============================================================================
+// clang-format off
  
  #include "core/rasterizer.h"
  #include "core/rasterizer_impl.h"
@@ -40,3 +41,4 @@ void InitRasterizerFuncs${fileNum}()
      ${func}
      %endfor
  }
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.cpp b/src/gallium/drivers/swr/rasterizer/common/formats.cpp

index 1c086ff1882589841ddbad1917176da263e5d434..e0800f5e88e4cf27dbc5f2cd701db216760587b4 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/formats.cpp
+++ b/src/gallium/drivers/swr/rasterizer/common/formats.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file formats.cpp
-*
-* @brief auto-generated file
-*
-* DO NOT EDIT
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file formats.cpp
+ *
+ * @brief auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ ******************************************************************************/
  
  #include "formats.h"
  
@@ -72,6842 +72,9227 @@ const SWR_FORMAT_INFO gFormatInfo[] = {
      // R32G32B32A32_FLOAT (0x0)
      {
          "R32G32B32A32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 32, 32 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 32, 32},             // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32A32_SINT (0x1)
      {
          "R32G32B32A32_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 32, 32 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 32, 32},             // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32A32_UINT (0x2)
      {
          "R32G32B32A32_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 32, 32 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 32, 32},             // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x3)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R64G64_FLOAT (0x5)
      {
          "R64G64_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 64, 64, 0, 0 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {64, 64, 0, 0},               // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32X32_FLOAT (0x6)
      {
          "R32G32B32X32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 32, 32 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 32, 32},             // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32A32_SSCALED (0x7)
      {
          "R32G32B32A32_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 32, 32 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 32, 32},             // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32A32_USCALED (0x8)
      {
          "R32G32B32A32_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 32, 32 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 32, 32},             // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x9)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x10)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x11)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x12)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x13)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x14)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x15)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x16)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x17)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x18)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x19)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R32G32B32A32_SFIXED (0x20)
      {
          "R32G32B32A32_SFIXED",
-        { SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 32, 32 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 32, 32},             // Bits per component
+        128,                          // Bits per element
+        16,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x21)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x22)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x23)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x24)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x25)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x26)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x27)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x28)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x29)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x2A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x2B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x2C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x2D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x2E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x2F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x30)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x31)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x32)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x33)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x34)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x35)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x36)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x37)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x38)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x39)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x3A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x3B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x3C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x3D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x3E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x3F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R32G32B32_FLOAT (0x40)
      {
          "R32G32B32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 32, 32, 32, 0 }, // Bits per component
-        96, // Bits per element
-        12, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {32, 32, 32, 0},              // Bits per component
+        96,                           // Bits per element
+        12,                           // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32_SINT (0x41)
      {
          "R32G32B32_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 32, 32, 32, 0 }, // Bits per component
-        96, // Bits per element
-        12, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {32, 32, 32, 0},              // Bits per component
+        96,                           // Bits per element
+        12,                           // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32_UINT (0x42)
      {
          "R32G32B32_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 32, 32, 32, 0 }, // Bits per component
-        96, // Bits per element
-        12, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {32, 32, 32, 0},              // Bits per component
+        96,                           // Bits per element
+        12,                           // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x43)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x44)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R32G32B32_SSCALED (0x45)
      {
          "R32G32B32_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 32, 32, 32, 0 }, // Bits per component
-        96, // Bits per element
-        12, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {32, 32, 32, 0},              // Bits per component
+        96,                           // Bits per element
+        12,                           // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32B32_USCALED (0x46)
      {
          "R32G32B32_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 32, 32, 32, 0 }, // Bits per component
-        96, // Bits per element
-        12, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {32, 32, 32, 0},              // Bits per component
+        96,                           // Bits per element
+        12,                           // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x47)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x48)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x49)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x4A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x4B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x4C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x4D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x4E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x4F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R32G32B32_SFIXED (0x50)
      {
          "R32G32B32_SFIXED",
-        { SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 32, 32, 32, 0 }, // Bits per component
-        96, // Bits per element
-        12, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {32, 32, 32, 0},              // Bits per component
+        96,                           // Bits per element
+        12,                           // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x51)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x52)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x53)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x54)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x55)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x56)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x57)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x58)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x59)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x5A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x5B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x5C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x5D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x5E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x5F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x60)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x61)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x62)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x63)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x64)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x65)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x66)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x67)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x68)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x69)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x6A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x6B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x6C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x6D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x6E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x6F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x70)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x71)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x72)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x73)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x74)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x75)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x76)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x77)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x78)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x79)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x7A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x7B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x7C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x7D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x7E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x7F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R16G16B16A16_UNORM (0x80)
      {
          "R16G16B16A16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {16, 16, 16, 16},         // Bits per component
+        64,                       // Bits per element
+        8,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 65535.0f,
+         1.0f / 65535.0f,
+         1.0f / 65535.0f,
+         1.0f / 65535.0f}, // To float scale factor
+        1,                 // bcWidth
+        1,                 // bcHeight
      },
  
      // R16G16B16A16_SNORM (0x81)
      {
          "R16G16B16A16_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {16, 16, 16, 16},         // Bits per component
+        64,                       // Bits per element
+        8,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 32767.0f,
+         1.0f / 32767.0f,
+         1.0f / 32767.0f,
+         1.0f / 32767.0f}, // To float scale factor
+        1,                 // bcWidth
+        1,                 // bcHeight
      },
  
      // R16G16B16A16_SINT (0x82)
      {
          "R16G16B16A16_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {16, 16, 16, 16},             // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16A16_UINT (0x83)
      {
          "R16G16B16A16_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {16, 16, 16, 16},             // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16A16_FLOAT (0x84)
      {
          "R16G16B16A16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {16, 16, 16, 16},             // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32_FLOAT (0x85)
      {
          "R32G32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32_SINT (0x86)
      {
          "R32G32_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32_UINT (0x87)
      {
          "R32G32_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32_FLOAT_X8X24_TYPELESS (0x88)
      {
          "R32_FLOAT_X8X24_TYPELESS",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // X32_TYPELESS_G8X24_UINT (0x89)
      {
          "X32_TYPELESS_G8X24_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNUSED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // L32A32_FLOAT (0x8A)
      {
          "L32A32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 3, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x8B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x8C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R64_FLOAT (0x8D)
      {
          "R64_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 64, 0, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {64, 0, 0, 0},                // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16X16_UNORM (0x8E)
      {
          "R16G16B16X16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},     // Defaults for missing components
+        {0, 1, 2, 3},              // Swizzle
+        {16, 16, 16, 16},          // Bits per component
+        64,                        // Bits per element
+        8,                         // Bytes per element
+        4,                         // Num components
+        false,                     // isSRGB
+        false,                     // isBC
+        false,                     // isSubsampled
+        false,                     // isLuminance
+        {true, true, true, false}, // Is normalized?
+        {1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f}, // To float scale factor
+        1,                                                         // bcWidth
+        1,                                                         // bcHeight
      },
  
      // R16G16B16X16_FLOAT (0x8F)
      {
          "R16G16B16X16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {16, 16, 16, 16},             // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x90)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // L32X32_FLOAT (0x91)
      {
          "L32X32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 3, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // I32X32_FLOAT (0x92)
      {
          "I32X32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 3, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16A16_SSCALED (0x93)
      {
          "R16G16B16A16_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {16, 16, 16, 16},             // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16A16_USCALED (0x94)
      {
          "R16G16B16A16_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 16, 16, 16, 16 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {16, 16, 16, 16},             // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32_SSCALED (0x95)
      {
          "R32G32_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32G32_USCALED (0x96)
      {
          "R32G32_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x97)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x98)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x99)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x9A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x9B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x9C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x9D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x9E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x9F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R32G32_SFIXED (0xA0)
      {
          "R32G32_SFIXED",
-        { SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 32, 32, 0, 0 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SFIXED, SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {32, 32, 0, 0},               // Bits per component
+        64,                           // Bits per element
+        8,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0xA1)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA3)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA8)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xA9)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xAA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xAB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xAC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xAD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xAE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xAF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB0)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB1)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB3)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB8)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xB9)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xBA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xBB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xBC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xBD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xBE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xBF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // B8G8R8A8_UNORM (0xC0)
      {
          "B8G8R8A8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {2, 1, 0, 3},             // Swizzle
+        {8, 8, 8, 8},             // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+        1,                                                            // bcWidth
+        1,                                                            // bcHeight
      },
  
      // B8G8R8A8_UNORM_SRGB (0xC1)
      {
          "B8G8R8A8_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {2, 1, 0, 3},             // Swizzle
+        {8, 8, 8, 8},             // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        true,                     // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+        1,                                                            // bcWidth
+        1,                                                            // bcHeight
      },
  
      // R10G10B10A2_UNORM (0xC2)
      {
          "R10G10B10A2_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {10, 10, 10, 2},          // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+        1,                                                             // bcWidth
+        1,                                                             // bcHeight
      },
  
      // R10G10B10A2_UNORM_SRGB (0xC3)
      {
          "R10G10B10A2_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {10, 10, 10, 2},          // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        true,                     // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+        1,                                                             // bcWidth
+        1,                                                             // bcHeight
      },
  
      // R10G10B10A2_UINT (0xC4)
      {
          "R10G10B10A2_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0xC5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xC6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R8G8B8A8_UNORM (0xC7)
      {
          "R8G8B8A8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {8, 8, 8, 8},             // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+        1,                                                            // bcWidth
+        1,                                                            // bcHeight
      },
  
      // R8G8B8A8_UNORM_SRGB (0xC8)
      {
          "R8G8B8A8_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {8, 8, 8, 8},             // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        true,                     // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}, // To float scale factor
+        1,                                                            // bcWidth
+        1,                                                            // bcHeight
      },
  
      // R8G8B8A8_SNORM (0xC9)
      {
          "R8G8B8A8_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {8, 8, 8, 8},             // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f}, // To float scale factor
+        1,                                                            // bcWidth
+        1,                                                            // bcHeight
      },
  
      // R8G8B8A8_SINT (0xCA)
      {
          "R8G8B8A8_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {8, 8, 8, 8},                 // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8G8B8A8_UINT (0xCB)
      {
          "R8G8B8A8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {8, 8, 8, 8},                 // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16_UNORM (0xCC)
      {
          "R16G16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                    // Defaults for missing components
+        {0, 1, 0, 0},                             // Swizzle
+        {16, 16, 0, 0},                           // Bits per component
+        32,                                       // Bits per element
+        4,                                        // Bytes per element
+        2,                                        // Num components
+        false,                                    // isSRGB
+        false,                                    // isBC
+        false,                                    // isSubsampled
+        false,                                    // isLuminance
+        {true, true, false, false},               // Is normalized?
+        {1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0}, // To float scale factor
+        1,                                        // bcWidth
+        1,                                        // bcHeight
      },
  
      // R16G16_SNORM (0xCD)
      {
          "R16G16_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 32767.0f, 1.0f / 32767.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                    // Defaults for missing components
+        {0, 1, 0, 0},                             // Swizzle
+        {16, 16, 0, 0},                           // Bits per component
+        32,                                       // Bits per element
+        4,                                        // Bytes per element
+        2,                                        // Num components
+        false,                                    // isSRGB
+        false,                                    // isBC
+        false,                                    // isSubsampled
+        false,                                    // isLuminance
+        {true, true, false, false},               // Is normalized?
+        {1.0f / 32767.0f, 1.0f / 32767.0f, 0, 0}, // To float scale factor
+        1,                                        // bcWidth
+        1,                                        // bcHeight
      },
  
      // R16G16_SINT (0xCE)
      {
          "R16G16_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {16, 16, 0, 0},               // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16_UINT (0xCF)
      {
          "R16G16_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {16, 16, 0, 0},               // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16_FLOAT (0xD0)
      {
          "R16G16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {16, 16, 0, 0},               // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // B10G10R10A2_UNORM (0xD1)
      {
          "B10G10R10A2_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {2, 1, 0, 3},             // Swizzle
+        {10, 10, 10, 2},          // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+        1,                                                             // bcWidth
+        1,                                                             // bcHeight
      },
  
      // B10G10R10A2_UNORM_SRGB (0xD2)
      {
          "B10G10R10A2_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {2, 1, 0, 3},             // Swizzle
+        {10, 10, 10, 2},          // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        true,                     // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 3.0f}, // To float scale factor
+        1,                                                             // bcWidth
+        1,                                                             // bcHeight
      },
  
      // R11G11B10_FLOAT (0xD3)
      {
          "R11G11B10_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 11, 11, 10, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {11, 11, 10, 0},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0xD4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
  
      // R10G10B10_FLOAT_A2_UNORM (0xD5)
      {
          "R10G10B10_FLOAT_A2_UNORM",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f / 3.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},           // Defaults for missing components
+        {0, 1, 2, 3},                    // Swizzle
+        {10, 10, 10, 2},                 // Bits per component
+        32,                              // Bits per element
+        4,                               // Bytes per element
+        4,                               // Num components
+        false,                           // isSRGB
+        false,                           // isBC
+        false,                           // isSubsampled
+        false,                           // isLuminance
+        {false, false, false, false},    // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f / 3.0f}, // To float scale factor
+        1,                               // bcWidth
+        1,                               // bcHeight
      },
  
      // R32_SINT (0xD6)
      {
          "R32_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32_UINT (0xD7)
      {
          "R32_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32_FLOAT (0xD8)
      {
          "R32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R24_UNORM_X8_TYPELESS (0xD9)
      {
          "R24_UNORM_X8_TYPELESS",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 24, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 16777215.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},         // Defaults for missing components
+        {0, 1, 2, 3},                  // Swizzle
+        {24, 0, 0, 0},                 // Bits per component
+        32,                            // Bits per element
+        4,                             // Bytes per element
+        1,                             // Num components
+        false,                         // isSRGB
+        false,                         // isBC
+        false,                         // isSubsampled
+        false,                         // isLuminance
+        {true, false, false, false},   // Is normalized?
+        {1.0f / 16777215.0f, 0, 0, 0}, // To float scale factor
+        1,                             // bcWidth
+        1,                             // bcHeight
      },
  
      // X24_TYPELESS_G8_UINT (0xDA)
      {
          "X24_TYPELESS_G8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 1, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {1, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0xDB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xDC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // L32_UNORM (0xDD)
      {
          "L32_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 4294967295.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},           // Defaults for missing components
+        {0, 0, 0, 0},                    // Swizzle
+        {32, 0, 0, 0},                   // Bits per component
+        32,                              // Bits per element
+        4,                               // Bytes per element
+        1,                               // Num components
+        false,                           // isSRGB
+        false,                           // isBC
+        false,                           // isSubsampled
+        true,                            // isLuminance
+        {true, false, false, false},     // Is normalized?
+        {1.0f / 4294967295.0f, 0, 0, 0}, // To float scale factor
+        1,                               // bcWidth
+        1,                               // bcHeight
      },
  
      // padding (0xDE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // L16A16_UNORM (0xDF)
      {
          "L16A16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                    // Defaults for missing components
+        {0, 3, 0, 0},                             // Swizzle
+        {16, 16, 0, 0},                           // Bits per component
+        32,                                       // Bits per element
+        4,                                        // Bytes per element
+        2,                                        // Num components
+        false,                                    // isSRGB
+        false,                                    // isBC
+        false,                                    // isSubsampled
+        true,                                     // isLuminance
+        {true, true, false, false},               // Is normalized?
+        {1.0f / 65535.0f, 1.0f / 65535.0f, 0, 0}, // To float scale factor
+        1,                                        // bcWidth
+        1,                                        // bcHeight
      },
  
      // I24X8_UNORM (0xE0)
      {
          "I24X8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 24, 8, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                     // Defaults for missing components
+        {0, 3, 0, 0},                              // Swizzle
+        {24, 8, 0, 0},                             // Bits per component
+        32,                                        // Bits per element
+        4,                                         // Bytes per element
+        2,                                         // Num components
+        false,                                     // isSRGB
+        false,                                     // isBC
+        false,                                     // isSubsampled
+        true,                                      // isLuminance
+        {true, true, false, false},                // Is normalized?
+        {1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+        1,                                         // bcWidth
+        1,                                         // bcHeight
      },
  
      // L24X8_UNORM (0xE1)
      {
          "L24X8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 24, 8, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                     // Defaults for missing components
+        {0, 3, 0, 0},                              // Swizzle
+        {24, 8, 0, 0},                             // Bits per component
+        32,                                        // Bits per element
+        4,                                         // Bytes per element
+        2,                                         // Num components
+        false,                                     // isSRGB
+        false,                                     // isBC
+        false,                                     // isSubsampled
+        true,                                      // isLuminance
+        {true, true, false, false},                // Is normalized?
+        {1.0f / 16777215.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+        1,                                         // bcWidth
+        1,                                         // bcHeight
      },
  
      // padding (0xE2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // I32_FLOAT (0xE3)
      {
          "I32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // L32_FLOAT (0xE4)
      {
          "L32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // A32_FLOAT (0xE5)
      {
          "A32_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 3, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {3, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0xE6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xE7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xE8)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // B8G8R8X8_UNORM (0xE9)
      {
          "B8G8R8X8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},                               // Defaults for missing components
+        {2, 1, 0, 3},                                        // Swizzle
+        {8, 8, 8, 8},                                        // Bits per component
+        32,                                                  // Bits per element
+        4,                                                   // Bytes per element
+        4,                                                   // Num components
+        false,                                               // isSRGB
+        false,                                               // isBC
+        false,                                               // isSubsampled
+        false,                                               // isLuminance
+        {true, true, true, false},                           // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+        1,                                                   // bcWidth
+        1,                                                   // bcHeight
      },
  
      // B8G8R8X8_UNORM_SRGB (0xEA)
      {
          "B8G8R8X8_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},                               // Defaults for missing components
+        {2, 1, 0, 3},                                        // Swizzle
+        {8, 8, 8, 8},                                        // Bits per component
+        32,                                                  // Bits per element
+        4,                                                   // Bytes per element
+        4,                                                   // Num components
+        true,                                                // isSRGB
+        false,                                               // isBC
+        false,                                               // isSubsampled
+        false,                                               // isLuminance
+        {true, true, true, false},                           // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+        1,                                                   // bcWidth
+        1,                                                   // bcHeight
      },
  
      // R8G8B8X8_UNORM (0xEB)
      {
          "R8G8B8X8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},                               // Defaults for missing components
+        {0, 1, 2, 3},                                        // Swizzle
+        {8, 8, 8, 8},                                        // Bits per component
+        32,                                                  // Bits per element
+        4,                                                   // Bytes per element
+        4,                                                   // Num components
+        false,                                               // isSRGB
+        false,                                               // isBC
+        false,                                               // isSubsampled
+        false,                                               // isLuminance
+        {true, true, true, false},                           // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+        1,                                                   // bcWidth
+        1,                                                   // bcHeight
      },
  
      // R8G8B8X8_UNORM_SRGB (0xEC)
      {
          "R8G8B8X8_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},                               // Defaults for missing components
+        {0, 1, 2, 3},                                        // Swizzle
+        {8, 8, 8, 8},                                        // Bits per component
+        32,                                                  // Bits per element
+        4,                                                   // Bytes per element
+        4,                                                   // Num components
+        true,                                                // isSRGB
+        false,                                               // isBC
+        false,                                               // isSubsampled
+        false,                                               // isLuminance
+        {true, true, true, false},                           // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f}, // To float scale factor
+        1,                                                   // bcWidth
+        1,                                                   // bcHeight
      },
  
      // R9G9B9E5_SHAREDEXP (0xED)
      {
          "R9G9B9E5_SHAREDEXP",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 9, 9, 9, 5 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {9, 9, 9, 5},                 // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // B10G10R10X2_UNORM (0xEE)
      {
          "B10G10R10X2_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},                                  // Defaults for missing components
+        {2, 1, 0, 3},                                           // Swizzle
+        {10, 10, 10, 2},                                        // Bits per component
+        32,                                                     // Bits per element
+        4,                                                      // Bytes per element
+        4,                                                      // Num components
+        false,                                                  // isSRGB
+        false,                                                  // isBC
+        false,                                                  // isSubsampled
+        false,                                                  // isLuminance
+        {true, true, true, false},                              // Is normalized?
+        {1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f}, // To float scale factor
+        1,                                                      // bcWidth
+        1,                                                      // bcHeight
      },
  
      // padding (0xEF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // L16A16_FLOAT (0xF0)
      {
          "L16A16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 3, 0, 0},                 // Swizzle
+        {16, 16, 0, 0},               // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0xF1)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xF2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R10G10B10X2_USCALED (0xF3)
      {
          "R10G10B10X2_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8G8B8A8_SSCALED (0xF4)
      {
          "R8G8B8A8_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {8, 8, 8, 8},                 // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8G8B8A8_USCALED (0xF5)
      {
          "R8G8B8A8_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {8, 8, 8, 8},                 // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16_SSCALED (0xF6)
      {
          "R16G16_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {16, 16, 0, 0},               // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16_USCALED (0xF7)
      {
          "R16G16_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 16, 16, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {16, 16, 0, 0},               // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32_SSCALED (0xF8)
      {
          "R32_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32_USCALED (0xF9)
      {
          "R32_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0xFA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xFB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xFC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xFD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xFE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0xFF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // B5G6R5_UNORM (0x100)
      {
          "B5G6R5_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 0 }, // Swizzle
-        { 5, 6, 5, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                         // Defaults for missing components
+        {2, 1, 0, 0},                                  // Swizzle
+        {5, 6, 5, 0},                                  // Bits per component
+        16,                                            // Bits per element
+        2,                                             // Bytes per element
+        3,                                             // Num components
+        false,                                         // isSRGB
+        false,                                         // isBC
+        false,                                         // isSubsampled
+        false,                                         // isLuminance
+        {true, true, true, false},                     // Is normalized?
+        {1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0}, // To float scale factor
+        1,                                             // bcWidth
+        1,                                             // bcHeight
      },
  
      // B5G6R5_UNORM_SRGB (0x101)
      {
          "B5G6R5_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 0 }, // Swizzle
-        { 5, 6, 5, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        3, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                         // Defaults for missing components
+        {2, 1, 0, 0},                                  // Swizzle
+        {5, 6, 5, 0},                                  // Bits per component
+        16,                                            // Bits per element
+        2,                                             // Bytes per element
+        3,                                             // Num components
+        true,                                          // isSRGB
+        false,                                         // isBC
+        false,                                         // isSubsampled
+        false,                                         // isLuminance
+        {true, true, true, false},                     // Is normalized?
+        {1.0f / 31.0f, 1.0f / 63.0f, 1.0f / 31.0f, 0}, // To float scale factor
+        1,                                             // bcWidth
+        1,                                             // bcHeight
      },
  
      // B5G5R5A1_UNORM (0x102)
      {
          "B5G5R5A1_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 5, 5, 5, 1 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},                                   // Defaults for missing components
+        {2, 1, 0, 3},                                            // Swizzle
+        {5, 5, 5, 1},                                            // Bits per component
+        16,                                                      // Bits per element
+        2,                                                       // Bytes per element
+        4,                                                       // Num components
+        false,                                                   // isSRGB
+        false,                                                   // isBC
+        false,                                                   // isSubsampled
+        false,                                                   // isLuminance
+        {true, true, true, true},                                // Is normalized?
+        {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f}, // To float scale factor
+        1,                                                       // bcWidth
+        1,                                                       // bcHeight
      },
  
      // B5G5R5A1_UNORM_SRGB (0x103)
      {
          "B5G5R5A1_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 5, 5, 5, 1 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},                                   // Defaults for missing components
+        {2, 1, 0, 3},                                            // Swizzle
+        {5, 5, 5, 1},                                            // Bits per component
+        16,                                                      // Bits per element
+        2,                                                       // Bytes per element
+        4,                                                       // Num components
+        true,                                                    // isSRGB
+        false,                                                   // isBC
+        false,                                                   // isSubsampled
+        false,                                                   // isLuminance
+        {true, true, true, true},                                // Is normalized?
+        {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 1.0f}, // To float scale factor
+        1,                                                       // bcWidth
+        1,                                                       // bcHeight
      },
  
      // B4G4R4A4_UNORM (0x104)
      {
          "B4G4R4A4_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 4, 4, 4, 4 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},                                    // Defaults for missing components
+        {2, 1, 0, 3},                                             // Swizzle
+        {4, 4, 4, 4},                                             // Bits per component
+        16,                                                       // Bits per element
+        2,                                                        // Bytes per element
+        4,                                                        // Num components
+        false,                                                    // isSRGB
+        false,                                                    // isBC
+        false,                                                    // isSubsampled
+        false,                                                    // isLuminance
+        {true, true, true, true},                                 // Is normalized?
+        {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
+        1,                                                        // bcWidth
+        1,                                                        // bcHeight
      },
  
      // B4G4R4A4_UNORM_SRGB (0x105)
      {
          "B4G4R4A4_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 4, 4, 4, 4 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},                                    // Defaults for missing components
+        {2, 1, 0, 3},                                             // Swizzle
+        {4, 4, 4, 4},                                             // Bits per component
+        16,                                                       // Bits per element
+        2,                                                        // Bytes per element
+        4,                                                        // Num components
+        true,                                                     // isSRGB
+        false,                                                    // isBC
+        false,                                                    // isSubsampled
+        false,                                                    // isLuminance
+        {true, true, true, true},                                 // Is normalized?
+        {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
+        1,                                                        // bcWidth
+        1,                                                        // bcHeight
      },
  
      // R8G8_UNORM (0x106)
      {
          "R8G8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                // Defaults for missing components
+        {0, 1, 0, 0},                         // Swizzle
+        {8, 8, 0, 0},                         // Bits per component
+        16,                                   // Bits per element
+        2,                                    // Bytes per element
+        2,                                    // Num components
+        false,                                // isSRGB
+        false,                                // isBC
+        false,                                // isSubsampled
+        false,                                // isLuminance
+        {true, true, false, false},           // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+        1,                                    // bcWidth
+        1,                                    // bcHeight
      },
  
      // R8G8_SNORM (0x107)
      {
          "R8G8_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 127.0f, 1.0f / 127.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                // Defaults for missing components
+        {0, 1, 0, 0},                         // Swizzle
+        {8, 8, 0, 0},                         // Bits per component
+        16,                                   // Bits per element
+        2,                                    // Bytes per element
+        2,                                    // Num components
+        false,                                // isSRGB
+        false,                                // isBC
+        false,                                // isSubsampled
+        false,                                // isLuminance
+        {true, true, false, false},           // Is normalized?
+        {1.0f / 127.0f, 1.0f / 127.0f, 0, 0}, // To float scale factor
+        1,                                    // bcWidth
+        1,                                    // bcHeight
      },
  
      // R8G8_SINT (0x108)
      {
          "R8G8_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {8, 8, 0, 0},                 // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8G8_UINT (0x109)
      {
          "R8G8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {8, 8, 0, 0},                 // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16_UNORM (0x10A)
      {
          "R16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {16, 0, 0, 0},               // Bits per component
+        16,                          // Bits per element
+        2,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 65535.0f, 0, 0, 0},  // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // R16_SNORM (0x10B)
      {
          "R16_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 32767.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {16, 0, 0, 0},               // Bits per component
+        16,                          // Bits per element
+        2,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 32767.0f, 0, 0, 0},  // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // R16_SINT (0x10C)
      {
          "R16_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16_UINT (0x10D)
      {
          "R16_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16_FLOAT (0x10E)
      {
          "R16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x10F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x110)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // I16_UNORM (0x111)
      {
          "I16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {16, 0, 0, 0},               // Bits per component
+        16,                          // Bits per element
+        2,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        true,                        // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 65535.0f, 0, 0, 0},  // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // L16_UNORM (0x112)
      {
          "L16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {16, 0, 0, 0},               // Bits per component
+        16,                          // Bits per element
+        2,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        true,                        // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 65535.0f, 0, 0, 0},  // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // A16_UNORM (0x113)
      {
          "A16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 3, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 65535.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {3, 0, 0, 0},                // Swizzle
+        {16, 0, 0, 0},               // Bits per component
+        16,                          // Bits per element
+        2,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 65535.0f, 0, 0, 0},  // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // L8A8_UNORM (0x114)
      {
          "L8A8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                // Defaults for missing components
+        {0, 3, 0, 0},                         // Swizzle
+        {8, 8, 0, 0},                         // Bits per component
+        16,                                   // Bits per element
+        2,                                    // Bytes per element
+        2,                                    // Num components
+        false,                                // isSRGB
+        false,                                // isBC
+        false,                                // isSubsampled
+        true,                                 // isLuminance
+        {true, true, false, false},           // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+        1,                                    // bcWidth
+        1,                                    // bcHeight
      },
  
      // I16_FLOAT (0x115)
      {
          "I16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // L16_FLOAT (0x116)
      {
          "L16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // A16_FLOAT (0x117)
      {
          "A16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 3, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {3, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // L8A8_UNORM_SRGB (0x118)
      {
          "L8A8_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, true, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                // Defaults for missing components
+        {0, 3, 0, 0},                         // Swizzle
+        {8, 8, 0, 0},                         // Bits per component
+        16,                                   // Bits per element
+        2,                                    // Bytes per element
+        2,                                    // Num components
+        true,                                 // isSRGB
+        false,                                // isBC
+        false,                                // isSubsampled
+        true,                                 // isLuminance
+        {true, true, false, false},           // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 0, 0}, // To float scale factor
+        1,                                    // bcWidth
+        1,                                    // bcHeight
      },
  
      // padding (0x119)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // B5G5R5X1_UNORM (0x11A)
      {
          "B5G5R5X1_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 5, 5, 5, 1 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},                            // Defaults for missing components
+        {2, 1, 0, 3},                                     // Swizzle
+        {5, 5, 5, 1},                                     // Bits per component
+        16,                                               // Bits per element
+        2,                                                // Bytes per element
+        4,                                                // Num components
+        false,                                            // isSRGB
+        false,                                            // isBC
+        false,                                            // isSubsampled
+        false,                                            // isLuminance
+        {true, true, true, false},                        // Is normalized?
+        {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f}, // To float scale factor
+        1,                                                // bcWidth
+        1,                                                // bcHeight
      },
  
      // B5G5R5X1_UNORM_SRGB (0x11B)
      {
          "B5G5R5X1_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 5, 5, 5, 1 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNUSED},
+        {0, 0, 0, 0x3f800000},                            // Defaults for missing components
+        {2, 1, 0, 3},                                     // Swizzle
+        {5, 5, 5, 1},                                     // Bits per component
+        16,                                               // Bits per element
+        2,                                                // Bytes per element
+        4,                                                // Num components
+        true,                                             // isSRGB
+        false,                                            // isBC
+        false,                                            // isSubsampled
+        false,                                            // isLuminance
+        {true, true, true, false},                        // Is normalized?
+        {1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f}, // To float scale factor
+        1,                                                // bcWidth
+        1,                                                // bcHeight
      },
  
      // R8G8_SSCALED (0x11C)
      {
          "R8G8_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {8, 8, 0, 0},                 // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8G8_USCALED (0x11D)
      {
          "R8G8_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 0, 0},                 // Swizzle
+        {8, 8, 0, 0},                 // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16_SSCALED (0x11E)
      {
          "R16_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16_USCALED (0x11F)
      {
          "R16_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 16, 0, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {16, 0, 0, 0},                // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x120)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x121)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x122)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x123)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // A1B5G5R5_UNORM (0x124)
      {
          "A1B5G5R5_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 3, 2, 1, 0 }, // Swizzle
-        { 1, 5, 5, 5 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 1.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},                                   // Defaults for missing components
+        {3, 2, 1, 0},                                            // Swizzle
+        {1, 5, 5, 5},                                            // Bits per component
+        16,                                                      // Bits per element
+        2,                                                       // Bytes per element
+        4,                                                       // Num components
+        false,                                                   // isSRGB
+        false,                                                   // isBC
+        false,                                                   // isSubsampled
+        false,                                                   // isLuminance
+        {true, true, true, true},                                // Is normalized?
+        {1.0f / 1.0f, 1.0f / 31.0f, 1.0f / 31.0f, 1.0f / 31.0f}, // To float scale factor
+        1,                                                       // bcWidth
+        1,                                                       // bcHeight
      },
  
      // A4B4G4R4_UNORM (0x125)
      {
          "A4B4G4R4_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 3, 2, 1, 0 }, // Swizzle
-        { 4, 4, 4, 4 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM},
+        {0, 0, 0, 0x3f800000},                                    // Defaults for missing components
+        {3, 2, 1, 0},                                             // Swizzle
+        {4, 4, 4, 4},                                             // Bits per component
+        16,                                                       // Bits per element
+        2,                                                        // Bytes per element
+        4,                                                        // Num components
+        false,                                                    // isSRGB
+        false,                                                    // isBC
+        false,                                                    // isSubsampled
+        false,                                                    // isLuminance
+        {true, true, true, true},                                 // Is normalized?
+        {1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f, 1.0f / 15.0f}, // To float scale factor
+        1,                                                        // bcWidth
+        1,                                                        // bcHeight
      },
  
      // L8A8_UINT (0x126)
      {
          "L8A8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 3, 0, 0},                 // Swizzle
+        {8, 8, 0, 0},                 // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // L8A8_SINT (0x127)
      {
          "L8A8_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 3, 0, 0 }, // Swizzle
-        { 8, 8, 0, 0 }, // Bits per component
-        16, // Bits per element
-        2, // Bytes per element
-        2, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 3, 0, 0},                 // Swizzle
+        {8, 8, 0, 0},                 // Bits per component
+        16,                           // Bits per element
+        2,                            // Bytes per element
+        2,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 0, 0},           // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x128)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x129)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x12A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x12B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x12C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x12D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x12E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x12F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x130)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x131)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x132)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x133)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x134)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x135)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x136)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x137)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x138)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x139)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x13A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x13B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x13C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x13D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x13E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x13F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R8_UNORM (0x140)
      {
          "R8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {8, 0, 0, 0},                // Bits per component
+        8,                           // Bits per element
+        1,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // R8_SNORM (0x141)
      {
          "R8_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {8, 0, 0, 0},                // Bits per component
+        8,                           // Bits per element
+        1,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 127.0f, 0, 0, 0},    // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // R8_SINT (0x142)
      {
          "R8_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8_UINT (0x143)
      {
          "R8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // A8_UNORM (0x144)
      {
          "A8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 3, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {3, 0, 0, 0},                // Swizzle
+        {8, 0, 0, 0},                // Bits per component
+        8,                           // Bits per element
+        1,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // I8_UNORM (0x145)
      {
          "I8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {8, 0, 0, 0},                // Bits per component
+        8,                           // Bits per element
+        1,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        true,                        // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // L8_UNORM (0x146)
      {
          "L8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {8, 0, 0, 0},                // Bits per component
+        8,                           // Bits per element
+        1,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        true,                        // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // padding (0x147)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x148)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R8_SSCALED (0x149)
      {
          "R8_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8_USCALED (0x14A)
      {
          "R8_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x14B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // L8_UNORM_SRGB (0x14C)
      {
          "L8_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 0, 0, 0},                // Swizzle
+        {8, 0, 0, 0},                // Bits per component
+        8,                           // Bits per element
+        1,                           // Bytes per element
+        1,                           // Num components
+        true,                        // isSRGB
+        false,                       // isBC
+        false,                       // isSubsampled
+        true,                        // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        1,                           // bcWidth
+        1,                           // bcHeight
      },
  
      // padding (0x14D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x14E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
-    // padding (0x14F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
+    // padding (0x14F)
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x150)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x151)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // L8_UINT (0x152)
      {
          "L8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // L8_SINT (0x153)
      {
          "L8_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // I8_UINT (0x154)
      {
          "I8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // I8_SINT (0x155)
      {
          "I8_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        true, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        true,                         // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x156)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x157)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x158)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x159)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x15A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x15B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x15C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x15D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x15E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x15F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x160)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x161)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x162)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x163)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x164)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x165)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x166)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x167)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x168)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x169)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x16A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x16B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x16C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x16D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x16E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x16F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x170)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x171)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x172)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x173)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x174)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x175)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x176)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x177)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x178)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x179)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x17A)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x17B)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x17C)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x17D)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x17E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x17F)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // DXT1_RGB_SRGB (0x180)
      {
          "DXT1_RGB_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        64,                          // Bits per element
+        8,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // padding (0x181)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x182)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // YCRCB_SWAPUVY (0x183)
      {
          "YCRCB_SWAPUVY",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        true, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        2, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {8, 8, 8, 8},                 // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        true,                         // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        2,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x184)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x185)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // BC1_UNORM (0x186)
      {
          "BC1_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        64,                          // Bits per element
+        8,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC2_UNORM (0x187)
      {
          "BC2_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC3_UNORM (0x188)
      {
          "BC3_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC4_UNORM (0x189)
      {
          "BC4_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        64,                          // Bits per element
+        8,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC5_UNORM (0x18A)
      {
          "BC5_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC1_UNORM_SRGB (0x18B)
      {
          "BC1_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        1, // Num components
-        true, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        64,                          // Bits per element
+        8,                           // Bytes per element
+        1,                           // Num components
+        true,                        // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC2_UNORM_SRGB (0x18C)
      {
          "BC2_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        true, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        true,                        // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC3_UNORM_SRGB (0x18D)
      {
          "BC3_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        true, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        true,                        // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // padding (0x18E)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // YCRCB_SWAPUV (0x18F)
      {
          "YCRCB_SWAPUV",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        true, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        2, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {8, 8, 8, 8},                 // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        true,                         // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        2,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x190)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // DXT1_RGB (0x191)
      {
          "DXT1_RGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        64,                          // Bits per element
+        8,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // padding (0x192)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R8G8B8_UNORM (0x193)
      {
          "R8G8B8_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 8, 8, 8, 0 }, // Bits per component
-        24, // Bits per element
-        3, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                            // Defaults for missing components
+        {0, 1, 2, 0},                                     // Swizzle
+        {8, 8, 8, 0},                                     // Bits per component
+        24,                                               // Bits per element
+        3,                                                // Bytes per element
+        3,                                                // Num components
+        false,                                            // isSRGB
+        false,                                            // isBC
+        false,                                            // isSubsampled
+        false,                                            // isLuminance
+        {true, true, true, false},                        // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0}, // To float scale factor
+        1,                                                // bcWidth
+        1,                                                // bcHeight
      },
  
      // R8G8B8_SNORM (0x194)
      {
          "R8G8B8_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 8, 8, 8, 0 }, // Bits per component
-        24, // Bits per element
-        3, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                            // Defaults for missing components
+        {0, 1, 2, 0},                                     // Swizzle
+        {8, 8, 8, 0},                                     // Bits per component
+        24,                                               // Bits per element
+        3,                                                // Bytes per element
+        3,                                                // Num components
+        false,                                            // isSRGB
+        false,                                            // isBC
+        false,                                            // isSubsampled
+        false,                                            // isLuminance
+        {true, true, true, false},                        // Is normalized?
+        {1.0f / 127.0f, 1.0f / 127.0f, 1.0f / 127.0f, 0}, // To float scale factor
+        1,                                                // bcWidth
+        1,                                                // bcHeight
      },
  
      // R8G8B8_SSCALED (0x195)
      {
          "R8G8B8_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 8, 8, 8, 0 }, // Bits per component
-        24, // Bits per element
-        3, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {8, 8, 8, 0},                 // Bits per component
+        24,                           // Bits per element
+        3,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8G8B8_USCALED (0x196)
      {
          "R8G8B8_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 8, 8, 8, 0 }, // Bits per component
-        24, // Bits per element
-        3, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {8, 8, 8, 0},                 // Bits per component
+        24,                           // Bits per element
+        3,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R64G64B64A64_FLOAT (0x197)
      {
          "R64G64B64A64_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 64, 64, 64, 64 }, // Bits per component
-        256, // Bits per element
-        32, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {64, 64, 64, 64},             // Bits per component
+        256,                          // Bits per element
+        32,                           // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R64G64B64_FLOAT (0x198)
      {
          "R64G64B64_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 64, 64, 64, 0 }, // Bits per component
-        192, // Bits per element
-        24, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {64, 64, 64, 0},              // Bits per component
+        192,                          // Bits per element
+        24,                           // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // BC4_SNORM (0x199)
      {
          "BC4_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        64, // Bits per element
-        8, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        64,                          // Bits per element
+        8,                           // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 127.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC5_SNORM (0x19A)
      {
          "BC5_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 127.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // R16G16B16_FLOAT (0x19B)
      {
          "R16G16B16_FLOAT",
-        { SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 16, 16, 16, 0 }, // Bits per component
-        48, // Bits per element
-        6, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_FLOAT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {16, 16, 16, 0},              // Bits per component
+        48,                           // Bits per element
+        6,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16_UNORM (0x19C)
      {
          "R16G16B16_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 16, 16, 16, 0 }, // Bits per component
-        48, // Bits per element
-        6, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                                  // Defaults for missing components
+        {0, 1, 2, 0},                                           // Swizzle
+        {16, 16, 16, 0},                                        // Bits per component
+        48,                                                     // Bits per element
+        6,                                                      // Bytes per element
+        3,                                                      // Num components
+        false,                                                  // isSRGB
+        false,                                                  // isBC
+        false,                                                  // isSubsampled
+        false,                                                  // isLuminance
+        {true, true, true, false},                              // Is normalized?
+        {1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / 65535.0f, 0}, // To float scale factor
+        1,                                                      // bcWidth
+        1,                                                      // bcHeight
      },
  
      // R16G16B16_SNORM (0x19D)
      {
          "R16G16B16_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 16, 16, 16, 0 }, // Bits per component
-        48, // Bits per element
-        6, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                                  // Defaults for missing components
+        {0, 1, 2, 0},                                           // Swizzle
+        {16, 16, 16, 0},                                        // Bits per component
+        48,                                                     // Bits per element
+        6,                                                      // Bytes per element
+        3,                                                      // Num components
+        false,                                                  // isSRGB
+        false,                                                  // isBC
+        false,                                                  // isSubsampled
+        false,                                                  // isLuminance
+        {true, true, true, false},                              // Is normalized?
+        {1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / 32767.0f, 0}, // To float scale factor
+        1,                                                      // bcWidth
+        1,                                                      // bcHeight
      },
  
      // R16G16B16_SSCALED (0x19E)
      {
          "R16G16B16_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 16, 16, 16, 0 }, // Bits per component
-        48, // Bits per element
-        6, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {16, 16, 16, 0},              // Bits per component
+        48,                           // Bits per element
+        6,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16_USCALED (0x19F)
      {
          "R16G16B16_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 16, 16, 16, 0 }, // Bits per component
-        48, // Bits per element
-        6, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {16, 16, 16, 0},              // Bits per component
+        48,                           // Bits per element
+        6,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x1A0)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // BC6H_SF16 (0x1A1)
      {
          "BC6H_SF16",
-        { SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 127.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 127.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC7_UNORM (0x1A2)
      {
          "BC7_UNORM",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC7_UNORM_SRGB (0x1A3)
      {
          "BC7_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        true, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        true,                        // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // BC6H_UF16 (0x1A4)
      {
          "BC6H_UF16",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 8, 8, 8 }, // Bits per component
-        128, // Bits per element
-        16, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        true, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, false, false, false }, // Is normalized?
-        { 1.0f / 255.0f, 0, 0, 0 }, // To float scale factor
-        4, // bcWidth
-        4, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},       // Defaults for missing components
+        {0, 1, 2, 3},                // Swizzle
+        {8, 8, 8, 8},                // Bits per component
+        128,                         // Bits per element
+        16,                          // Bytes per element
+        1,                           // Num components
+        false,                       // isSRGB
+        true,                        // isBC
+        false,                       // isSubsampled
+        false,                       // isLuminance
+        {true, false, false, false}, // Is normalized?
+        {1.0f / 255.0f, 0, 0, 0},    // To float scale factor
+        4,                           // bcWidth
+        4,                           // bcHeight
      },
  
      // padding (0x1A5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1A6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1A7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R8G8B8_UNORM_SRGB (0x1A8)
      {
          "R8G8B8_UNORM_SRGB",
-        { SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 8, 8, 8, 0 }, // Bits per component
-        24, // Bits per element
-        3, // Bytes per element
-        3, // Num components
-        true, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, false }, // Is normalized?
-        { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNORM, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},                            // Defaults for missing components
+        {0, 1, 2, 0},                                     // Swizzle
+        {8, 8, 8, 0},                                     // Bits per component
+        24,                                               // Bits per element
+        3,                                                // Bytes per element
+        3,                                                // Num components
+        true,                                             // isSRGB
+        false,                                            // isBC
+        false,                                            // isSubsampled
+        false,                                            // isLuminance
+        {true, true, true, false},                        // Is normalized?
+        {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 0}, // To float scale factor
+        1,                                                // bcWidth
+        1,                                                // bcHeight
      },
  
      // padding (0x1A9)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1AA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1AB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1AC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1AD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1AE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1AF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R16G16B16_UINT (0x1B0)
      {
          "R16G16B16_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 16, 16, 16, 0 }, // Bits per component
-        48, // Bits per element
-        6, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {16, 16, 16, 0},              // Bits per component
+        48,                           // Bits per element
+        6,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R16G16B16_SINT (0x1B1)
      {
          "R16G16B16_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 16, 16, 16, 0 }, // Bits per component
-        48, // Bits per element
-        6, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {16, 16, 16, 0},              // Bits per component
+        48,                           // Bits per element
+        6,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R32_SFIXED (0x1B2)
      {
          "R32_SFIXED",
-        { SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 0, 0, 0 }, // Swizzle
-        { 32, 0, 0, 0 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SFIXED, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 0, 0, 0},                 // Swizzle
+        {32, 0, 0, 0},                // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R10G10B10A2_SNORM (0x1B3)
      {
          "R10G10B10A2_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {0, 1, 2, 3},             // Swizzle
+        {10, 10, 10, 2},          // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f}, // To float scale factor
+        1,                                                          // bcWidth
+        1,                                                          // bcHeight
      },
  
      // R10G10B10A2_USCALED (0x1B4)
      {
          "R10G10B10A2_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R10G10B10A2_SSCALED (0x1B5)
      {
          "R10G10B10A2_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R10G10B10A2_SINT (0x1B6)
      {
          "R10G10B10A2_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // B10G10R10A2_SNORM (0x1B7)
      {
          "B10G10R10A2_SNORM",
-        { SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { true, true, true, true }, // Is normalized?
-        { 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM, SWR_TYPE_SNORM},
+        {0, 0, 0, 0x3f800000},    // Defaults for missing components
+        {2, 1, 0, 3},             // Swizzle
+        {10, 10, 10, 2},          // Bits per component
+        32,                       // Bits per element
+        4,                        // Bytes per element
+        4,                        // Num components
+        false,                    // isSRGB
+        false,                    // isBC
+        false,                    // isSubsampled
+        false,                    // isLuminance
+        {true, true, true, true}, // Is normalized?
+        {1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 511.0f, 1.0f / 1.0f}, // To float scale factor
+        1,                                                          // bcWidth
+        1,                                                          // bcHeight
      },
  
      // B10G10R10A2_USCALED (0x1B8)
      {
          "B10G10R10A2_USCALED",
-        { SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED, SWR_TYPE_USCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {2, 1, 0, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // B10G10R10A2_SSCALED (0x1B9)
      {
          "B10G10R10A2_SSCALED",
-        { SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED },
-        { 0, 0, 0, 0x3f800000 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED, SWR_TYPE_SSCALED},
+        {0, 0, 0, 0x3f800000},        // Defaults for missing components
+        {2, 1, 0, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // B10G10R10A2_UINT (0x1BA)
      {
          "B10G10R10A2_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {2, 1, 0, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // B10G10R10A2_SINT (0x1BB)
      {
          "B10G10R10A2_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 2, 1, 0, 3 }, // Swizzle
-        { 10, 10, 10, 2 }, // Bits per component
-        32, // Bits per element
-        4, // Bytes per element
-        4, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 1.0f }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {2, 1, 0, 3},                 // Swizzle
+        {10, 10, 10, 2},              // Bits per component
+        32,                           // Bits per element
+        4,                            // Bytes per element
+        4,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 1.0f},     // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x1BC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1BD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1BE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1BF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C0)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C1)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C3)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1C7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // R8G8B8_UINT (0x1C8)
      {
          "R8G8B8_UINT",
-        { SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 8, 8, 8, 0 }, // Bits per component
-        24, // Bits per element
-        3, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UINT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {8, 8, 8, 0},                 // Bits per component
+        24,                           // Bits per element
+        3,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // R8G8B8_SINT (0x1C9)
      {
          "R8G8B8_SINT",
-        { SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 0 }, // Swizzle
-        { 8, 8, 8, 0 }, // Bits per component
-        24, // Bits per element
-        3, // Bytes per element
-        3, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 1.0f, 1.0f, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_SINT, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 0},                 // Swizzle
+        {8, 8, 8, 0},                 // Bits per component
+        24,                           // Bits per element
+        3,                            // Bytes per element
+        3,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 1.0f, 1.0f, 0},        // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  
      // padding (0x1CA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1CB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1CC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1CD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1CE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1CF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D0)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D1)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D3)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D8)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1D9)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1DA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1DB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1DC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1DD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1DE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1DF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E0)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E1)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E3)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E8)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1E9)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1EA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1EB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1EC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1ED)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1EE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1EF)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F0)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F1)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F2)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F3)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F4)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F5)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F6)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F7)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F8)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1F9)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1FA)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1FB)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1FC)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1FD)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // padding (0x1FE)
-    {
-        nullptr,
-        { SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0 },{ 0, 0, 0, 0 },{ 0, 0, 0, 0 },
-        0, 0, 0, false, false, false, false,
-        { false, false, false, false },
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        1, 1
-    },
+    {nullptr,
+     {SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     {0, 0, 0, 0},
+     0,
+     0,
+     0,
+     false,
+     false,
+     false,
+     false,
+     {false, false, false, false},
+     {0.0f, 0.0f, 0.0f, 0.0f},
+     1,
+     1},
      // RAW (0x1FF)
      {
          "RAW",
-        { SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN },
-        { 0, 0, 0, 0x1 }, // Defaults for missing components
-        { 0, 1, 2, 3 }, // Swizzle
-        { 8, 0, 0, 0 }, // Bits per component
-        8, // Bits per element
-        1, // Bytes per element
-        1, // Num components
-        false, // isSRGB
-        false, // isBC
-        false, // isSubsampled
-        false, // isLuminance
-        { false, false, false, false }, // Is normalized?
-        { 1.0f, 0, 0, 0 }, // To float scale factor
-        1, // bcWidth
-        1, // bcHeight
+        {SWR_TYPE_UINT, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN, SWR_TYPE_UNKNOWN},
+        {0, 0, 0, 0x1},               // Defaults for missing components
+        {0, 1, 2, 3},                 // Swizzle
+        {8, 0, 0, 0},                 // Bits per component
+        8,                            // Bits per element
+        1,                            // Bytes per element
+        1,                            // Num components
+        false,                        // isSRGB
+        false,                        // isBC
+        false,                        // isSubsampled
+        false,                        // isLuminance
+        {false, false, false, false}, // Is normalized?
+        {1.0f, 0, 0, 0},              // To float scale factor
+        1,                            // bcWidth
+        1,                            // bcHeight
      },
  };
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/formats.h b/src/gallium/drivers/swr/rasterizer/common/formats.h

index f13f338f8b1283ada3bcb1b1f68e48dcc74afd2f..b7a3e533d157574ca0a436c06bd27e783070225e 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/formats.h
+++ b/src/gallium/drivers/swr/rasterizer/common/formats.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file formats.h
-* 
-* @brief auto-generated file
-* 
-* DO NOT EDIT
-* 
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file formats.h
+ *
+ * @brief auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ ******************************************************************************/
  
  #pragma once
  
@@ -54,179 +54,179 @@ enum SWR_TYPE
  //////////////////////////////////////////////////////////////////////////
  enum SWR_FORMAT
  {
-    R32G32B32A32_FLOAT          = 0x0,
-    R32G32B32A32_SINT           = 0x1,
-    R32G32B32A32_UINT           = 0x2,
-    R64G64_FLOAT                = 0x5,
-    R32G32B32X32_FLOAT          = 0x6,
-    R32G32B32A32_SSCALED        = 0x7,
-    R32G32B32A32_USCALED        = 0x8,
-    R32G32B32A32_SFIXED         = 0x20,
-    R32G32B32_FLOAT             = 0x40,
-    R32G32B32_SINT              = 0x41,
-    R32G32B32_UINT              = 0x42,
-    R32G32B32_SSCALED           = 0x45,
-    R32G32B32_USCALED           = 0x46,
-    R32G32B32_SFIXED            = 0x50,
-    R16G16B16A16_UNORM          = 0x80,
-    R16G16B16A16_SNORM          = 0x81,
-    R16G16B16A16_SINT           = 0x82,
-    R16G16B16A16_UINT           = 0x83,
-    R16G16B16A16_FLOAT          = 0x84,
-    R32G32_FLOAT                = 0x85,
-    R32G32_SINT                 = 0x86,
-    R32G32_UINT                 = 0x87,
-    R32_FLOAT_X8X24_TYPELESS    = 0x88,
-    X32_TYPELESS_G8X24_UINT     = 0x89,
-    L32A32_FLOAT                = 0x8A,
-    R64_FLOAT                   = 0x8D,
-    R16G16B16X16_UNORM          = 0x8E,
-    R16G16B16X16_FLOAT          = 0x8F,
-    L32X32_FLOAT                = 0x91,
-    I32X32_FLOAT                = 0x92,
-    R16G16B16A16_SSCALED        = 0x93,
-    R16G16B16A16_USCALED        = 0x94,
-    R32G32_SSCALED              = 0x95,
-    R32G32_USCALED              = 0x96,
-    R32G32_SFIXED               = 0xA0,
-    B8G8R8A8_UNORM              = 0xC0,
-    B8G8R8A8_UNORM_SRGB         = 0xC1,
-    R10G10B10A2_UNORM           = 0xC2,
-    R10G10B10A2_UNORM_SRGB      = 0xC3,
-    R10G10B10A2_UINT            = 0xC4,
-    R8G8B8A8_UNORM              = 0xC7,
-    R8G8B8A8_UNORM_SRGB         = 0xC8,
-    R8G8B8A8_SNORM              = 0xC9,
-    R8G8B8A8_SINT               = 0xCA,
-    R8G8B8A8_UINT               = 0xCB,
-    R16G16_UNORM                = 0xCC,
-    R16G16_SNORM                = 0xCD,
-    R16G16_SINT                 = 0xCE,
-    R16G16_UINT                 = 0xCF,
-    R16G16_FLOAT                = 0xD0,
-    B10G10R10A2_UNORM           = 0xD1,
-    B10G10R10A2_UNORM_SRGB      = 0xD2,
-    R11G11B10_FLOAT             = 0xD3,
-    R10G10B10_FLOAT_A2_UNORM    = 0xD5,
-    R32_SINT                    = 0xD6,
-    R32_UINT                    = 0xD7,
-    R32_FLOAT                   = 0xD8,
-    R24_UNORM_X8_TYPELESS       = 0xD9,
-    X24_TYPELESS_G8_UINT        = 0xDA,
-    L32_UNORM                   = 0xDD,
-    L16A16_UNORM                = 0xDF,
-    I24X8_UNORM                 = 0xE0,
-    L24X8_UNORM                 = 0xE1,
-    I32_FLOAT                   = 0xE3,
-    L32_FLOAT                   = 0xE4,
-    A32_FLOAT                   = 0xE5,
-    B8G8R8X8_UNORM              = 0xE9,
-    B8G8R8X8_UNORM_SRGB         = 0xEA,
-    R8G8B8X8_UNORM              = 0xEB,
-    R8G8B8X8_UNORM_SRGB         = 0xEC,
-    R9G9B9E5_SHAREDEXP          = 0xED,
-    B10G10R10X2_UNORM           = 0xEE,
-    L16A16_FLOAT                = 0xF0,
-    R10G10B10X2_USCALED         = 0xF3,
-    R8G8B8A8_SSCALED            = 0xF4,
-    R8G8B8A8_USCALED            = 0xF5,
-    R16G16_SSCALED              = 0xF6,
-    R16G16_USCALED              = 0xF7,
-    R32_SSCALED                 = 0xF8,
-    R32_USCALED                 = 0xF9,
-    B5G6R5_UNORM                = 0x100,
-    B5G6R5_UNORM_SRGB           = 0x101,
-    B5G5R5A1_UNORM              = 0x102,
-    B5G5R5A1_UNORM_SRGB         = 0x103,
-    B4G4R4A4_UNORM              = 0x104,
-    B4G4R4A4_UNORM_SRGB         = 0x105,
-    R8G8_UNORM                  = 0x106,
-    R8G8_SNORM                  = 0x107,
-    R8G8_SINT                   = 0x108,
-    R8G8_UINT                   = 0x109,
-    R16_UNORM                   = 0x10A,
-    R16_SNORM                   = 0x10B,
-    R16_SINT                    = 0x10C,
-    R16_UINT                    = 0x10D,
-    R16_FLOAT                   = 0x10E,
-    I16_UNORM                   = 0x111,
-    L16_UNORM                   = 0x112,
-    A16_UNORM                   = 0x113,
-    L8A8_UNORM                  = 0x114,
-    I16_FLOAT                   = 0x115,
-    L16_FLOAT                   = 0x116,
-    A16_FLOAT                   = 0x117,
-    L8A8_UNORM_SRGB             = 0x118,
-    B5G5R5X1_UNORM              = 0x11A,
-    B5G5R5X1_UNORM_SRGB         = 0x11B,
-    R8G8_SSCALED                = 0x11C,
-    R8G8_USCALED                = 0x11D,
-    R16_SSCALED                 = 0x11E,
-    R16_USCALED                 = 0x11F,
-    A1B5G5R5_UNORM              = 0x124,
-    A4B4G4R4_UNORM              = 0x125,
-    L8A8_UINT                   = 0x126,
-    L8A8_SINT                   = 0x127,
-    R8_UNORM                    = 0x140,
-    R8_SNORM                    = 0x141,
-    R8_SINT                     = 0x142,
-    R8_UINT                     = 0x143,
-    A8_UNORM                    = 0x144,
-    I8_UNORM                    = 0x145,
-    L8_UNORM                    = 0x146,
-    R8_SSCALED                  = 0x149,
-    R8_USCALED                  = 0x14A,
-    L8_UNORM_SRGB               = 0x14C,
-    L8_UINT                     = 0x152,
-    L8_SINT                     = 0x153,
-    I8_UINT                     = 0x154,
-    I8_SINT                     = 0x155,
-    DXT1_RGB_SRGB               = 0x180,
-    YCRCB_SWAPUVY               = 0x183,
-    BC1_UNORM                   = 0x186,
-    BC2_UNORM                   = 0x187,
-    BC3_UNORM                   = 0x188,
-    BC4_UNORM                   = 0x189,
-    BC5_UNORM                   = 0x18A,
-    BC1_UNORM_SRGB              = 0x18B,
-    BC2_UNORM_SRGB              = 0x18C,
-    BC3_UNORM_SRGB              = 0x18D,
-    YCRCB_SWAPUV                = 0x18F,
-    DXT1_RGB                    = 0x191,
-    R8G8B8_UNORM                = 0x193,
-    R8G8B8_SNORM                = 0x194,
-    R8G8B8_SSCALED              = 0x195,
-    R8G8B8_USCALED              = 0x196,
-    R64G64B64A64_FLOAT          = 0x197,
-    R64G64B64_FLOAT             = 0x198,
-    BC4_SNORM                   = 0x199,
-    BC5_SNORM                   = 0x19A,
-    R16G16B16_FLOAT             = 0x19B,
-    R16G16B16_UNORM             = 0x19C,
-    R16G16B16_SNORM             = 0x19D,
-    R16G16B16_SSCALED           = 0x19E,
-    R16G16B16_USCALED           = 0x19F,
-    BC6H_SF16                   = 0x1A1,
-    BC7_UNORM                   = 0x1A2,
-    BC7_UNORM_SRGB              = 0x1A3,
-    BC6H_UF16                   = 0x1A4,
-    R8G8B8_UNORM_SRGB           = 0x1A8,
-    R16G16B16_UINT              = 0x1B0,
-    R16G16B16_SINT              = 0x1B1,
-    R32_SFIXED                  = 0x1B2,
-    R10G10B10A2_SNORM           = 0x1B3,
-    R10G10B10A2_USCALED         = 0x1B4,
-    R10G10B10A2_SSCALED         = 0x1B5,
-    R10G10B10A2_SINT            = 0x1B6,
-    B10G10R10A2_SNORM           = 0x1B7,
-    B10G10R10A2_USCALED         = 0x1B8,
-    B10G10R10A2_SSCALED         = 0x1B9,
-    B10G10R10A2_UINT            = 0x1BA,
-    B10G10R10A2_SINT            = 0x1BB,
-    R8G8B8_UINT                 = 0x1C8,
-    R8G8B8_SINT                 = 0x1C9,
-    RAW                         = 0x1FF,
-    NUM_SWR_FORMATS             = 0x200,
+    R32G32B32A32_FLOAT       = 0x0,
+    R32G32B32A32_SINT        = 0x1,
+    R32G32B32A32_UINT        = 0x2,
+    R64G64_FLOAT             = 0x5,
+    R32G32B32X32_FLOAT       = 0x6,
+    R32G32B32A32_SSCALED     = 0x7,
+    R32G32B32A32_USCALED     = 0x8,
+    R32G32B32A32_SFIXED      = 0x20,
+    R32G32B32_FLOAT          = 0x40,
+    R32G32B32_SINT           = 0x41,
+    R32G32B32_UINT           = 0x42,
+    R32G32B32_SSCALED        = 0x45,
+    R32G32B32_USCALED        = 0x46,
+    R32G32B32_SFIXED         = 0x50,
+    R16G16B16A16_UNORM       = 0x80,
+    R16G16B16A16_SNORM       = 0x81,
+    R16G16B16A16_SINT        = 0x82,
+    R16G16B16A16_UINT        = 0x83,
+    R16G16B16A16_FLOAT       = 0x84,
+    R32G32_FLOAT             = 0x85,
+    R32G32_SINT              = 0x86,
+    R32G32_UINT              = 0x87,
+    R32_FLOAT_X8X24_TYPELESS = 0x88,
+    X32_TYPELESS_G8X24_UINT  = 0x89,
+    L32A32_FLOAT             = 0x8A,
+    R64_FLOAT                = 0x8D,
+    R16G16B16X16_UNORM       = 0x8E,
+    R16G16B16X16_FLOAT       = 0x8F,
+    L32X32_FLOAT             = 0x91,
+    I32X32_FLOAT             = 0x92,
+    R16G16B16A16_SSCALED     = 0x93,
+    R16G16B16A16_USCALED     = 0x94,
+    R32G32_SSCALED           = 0x95,
+    R32G32_USCALED           = 0x96,
+    R32G32_SFIXED            = 0xA0,
+    B8G8R8A8_UNORM           = 0xC0,
+    B8G8R8A8_UNORM_SRGB      = 0xC1,
+    R10G10B10A2_UNORM        = 0xC2,
+    R10G10B10A2_UNORM_SRGB   = 0xC3,
+    R10G10B10A2_UINT         = 0xC4,
+    R8G8B8A8_UNORM           = 0xC7,
+    R8G8B8A8_UNORM_SRGB      = 0xC8,
+    R8G8B8A8_SNORM           = 0xC9,
+    R8G8B8A8_SINT            = 0xCA,
+    R8G8B8A8_UINT            = 0xCB,
+    R16G16_UNORM             = 0xCC,
+    R16G16_SNORM             = 0xCD,
+    R16G16_SINT              = 0xCE,
+    R16G16_UINT              = 0xCF,
+    R16G16_FLOAT             = 0xD0,
+    B10G10R10A2_UNORM        = 0xD1,
+    B10G10R10A2_UNORM_SRGB   = 0xD2,
+    R11G11B10_FLOAT          = 0xD3,
+    R10G10B10_FLOAT_A2_UNORM = 0xD5,
+    R32_SINT                 = 0xD6,
+    R32_UINT                 = 0xD7,
+    R32_FLOAT                = 0xD8,
+    R24_UNORM_X8_TYPELESS    = 0xD9,
+    X24_TYPELESS_G8_UINT     = 0xDA,
+    L32_UNORM                = 0xDD,
+    L16A16_UNORM             = 0xDF,
+    I24X8_UNORM              = 0xE0,
+    L24X8_UNORM              = 0xE1,
+    I32_FLOAT                = 0xE3,
+    L32_FLOAT                = 0xE4,
+    A32_FLOAT                = 0xE5,
+    B8G8R8X8_UNORM           = 0xE9,
+    B8G8R8X8_UNORM_SRGB      = 0xEA,
+    R8G8B8X8_UNORM           = 0xEB,
+    R8G8B8X8_UNORM_SRGB      = 0xEC,
+    R9G9B9E5_SHAREDEXP       = 0xED,
+    B10G10R10X2_UNORM        = 0xEE,
+    L16A16_FLOAT             = 0xF0,
+    R10G10B10X2_USCALED      = 0xF3,
+    R8G8B8A8_SSCALED         = 0xF4,
+    R8G8B8A8_USCALED         = 0xF5,
+    R16G16_SSCALED           = 0xF6,
+    R16G16_USCALED           = 0xF7,
+    R32_SSCALED              = 0xF8,
+    R32_USCALED              = 0xF9,
+    B5G6R5_UNORM             = 0x100,
+    B5G6R5_UNORM_SRGB        = 0x101,
+    B5G5R5A1_UNORM           = 0x102,
+    B5G5R5A1_UNORM_SRGB      = 0x103,
+    B4G4R4A4_UNORM           = 0x104,
+    B4G4R4A4_UNORM_SRGB      = 0x105,
+    R8G8_UNORM               = 0x106,
+    R8G8_SNORM               = 0x107,
+    R8G8_SINT                = 0x108,
+    R8G8_UINT                = 0x109,
+    R16_UNORM                = 0x10A,
+    R16_SNORM                = 0x10B,
+    R16_SINT                 = 0x10C,
+    R16_UINT                 = 0x10D,
+    R16_FLOAT                = 0x10E,
+    I16_UNORM                = 0x111,
+    L16_UNORM                = 0x112,
+    A16_UNORM                = 0x113,
+    L8A8_UNORM               = 0x114,
+    I16_FLOAT                = 0x115,
+    L16_FLOAT                = 0x116,
+    A16_FLOAT                = 0x117,
+    L8A8_UNORM_SRGB          = 0x118,
+    B5G5R5X1_UNORM           = 0x11A,
+    B5G5R5X1_UNORM_SRGB      = 0x11B,
+    R8G8_SSCALED             = 0x11C,
+    R8G8_USCALED             = 0x11D,
+    R16_SSCALED              = 0x11E,
+    R16_USCALED              = 0x11F,
+    A1B5G5R5_UNORM           = 0x124,
+    A4B4G4R4_UNORM           = 0x125,
+    L8A8_UINT                = 0x126,
+    L8A8_SINT                = 0x127,
+    R8_UNORM                 = 0x140,
+    R8_SNORM                 = 0x141,
+    R8_SINT                  = 0x142,
+    R8_UINT                  = 0x143,
+    A8_UNORM                 = 0x144,
+    I8_UNORM                 = 0x145,
+    L8_UNORM                 = 0x146,
+    R8_SSCALED               = 0x149,
+    R8_USCALED               = 0x14A,
+    L8_UNORM_SRGB            = 0x14C,
+    L8_UINT                  = 0x152,
+    L8_SINT                  = 0x153,
+    I8_UINT                  = 0x154,
+    I8_SINT                  = 0x155,
+    DXT1_RGB_SRGB            = 0x180,
+    YCRCB_SWAPUVY            = 0x183,
+    BC1_UNORM                = 0x186,
+    BC2_UNORM                = 0x187,
+    BC3_UNORM                = 0x188,
+    BC4_UNORM                = 0x189,
+    BC5_UNORM                = 0x18A,
+    BC1_UNORM_SRGB           = 0x18B,
+    BC2_UNORM_SRGB           = 0x18C,
+    BC3_UNORM_SRGB           = 0x18D,
+    YCRCB_SWAPUV             = 0x18F,
+    DXT1_RGB                 = 0x191,
+    R8G8B8_UNORM             = 0x193,
+    R8G8B8_SNORM             = 0x194,
+    R8G8B8_SSCALED           = 0x195,
+    R8G8B8_USCALED           = 0x196,
+    R64G64B64A64_FLOAT       = 0x197,
+    R64G64B64_FLOAT          = 0x198,
+    BC4_SNORM                = 0x199,
+    BC5_SNORM                = 0x19A,
+    R16G16B16_FLOAT          = 0x19B,
+    R16G16B16_UNORM          = 0x19C,
+    R16G16B16_SNORM          = 0x19D,
+    R16G16B16_SSCALED        = 0x19E,
+    R16G16B16_USCALED        = 0x19F,
+    BC6H_SF16                = 0x1A1,
+    BC7_UNORM                = 0x1A2,
+    BC7_UNORM_SRGB           = 0x1A3,
+    BC6H_UF16                = 0x1A4,
+    R8G8B8_UNORM_SRGB        = 0x1A8,
+    R16G16B16_UINT           = 0x1B0,
+    R16G16B16_SINT           = 0x1B1,
+    R32_SFIXED               = 0x1B2,
+    R10G10B10A2_SNORM        = 0x1B3,
+    R10G10B10A2_USCALED      = 0x1B4,
+    R10G10B10A2_SSCALED      = 0x1B5,
+    R10G10B10A2_SINT         = 0x1B6,
+    B10G10R10A2_SNORM        = 0x1B7,
+    B10G10R10A2_USCALED      = 0x1B8,
+    B10G10R10A2_SSCALED      = 0x1B9,
+    B10G10R10A2_UINT         = 0x1BA,
+    B10G10R10A2_SINT         = 0x1BB,
+    R8G8B8_UINT              = 0x1C8,
+    R8G8B8_SINT              = 0x1C9,
+    RAW                      = 0x1FF,
+    NUM_SWR_FORMATS          = 0x200,
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -266,4 +266,3 @@ INLINE const SWR_FORMAT_INFO& GetFormatInfo(SWR_FORMAT format)
  
  // lookup table for unorm8 srgb -> float conversion
  extern const uint32_t srgb8Table[256];
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/intrin.h b/src/gallium/drivers/swr/rasterizer/common/intrin.h

index 59d66bc60a8b801ff4d0c2dab5e9aae4df31f01b..4c413caf44110baf79448e2f5152a888b1747329 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/intrin.h
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #ifndef __SWR_INTRIN_H__
  #define __SWR_INTRIN_H__
@@ -28,34 +28,34 @@
  
  #if !defined(SIMD_ARCH)
  #define SIMD_ARCH KNOB_ARCH
-#endif 
+#endif
  
  #include "simdlib_types.hpp"
  
-typedef SIMDImpl::SIMD128Impl::Float                      simd4scalar;
-typedef SIMDImpl::SIMD128Impl::Double                     simd4scalard;
-typedef SIMDImpl::SIMD128Impl::Integer                    simd4scalari;
-typedef SIMDImpl::SIMD128Impl::Vec4                       simd4vector;
-typedef SIMDImpl::SIMD128Impl::Mask                       simd4mask;
-
-typedef SIMDImpl::SIMD256Impl::Float                      simd8scalar;
-typedef SIMDImpl::SIMD256Impl::Double                     simd8scalard;
-typedef SIMDImpl::SIMD256Impl::Integer                    simd8scalari;
-typedef SIMDImpl::SIMD256Impl::Vec4                       simd8vector;
-typedef SIMDImpl::SIMD256Impl::Mask                       simd8mask;
-
-typedef SIMDImpl::SIMD512Impl::Float                      simd16scalar;
-typedef SIMDImpl::SIMD512Impl::Double                     simd16scalard;
-typedef SIMDImpl::SIMD512Impl::Integer                    simd16scalari;
-typedef SIMDImpl::SIMD512Impl::Vec4                       simd16vector;
-typedef SIMDImpl::SIMD512Impl::Mask                       simd16mask;
-
-#if KNOB_SIMD_WIDTH == 8 
-typedef simd8scalar     simdscalar;
-typedef simd8scalard    simdscalard;
-typedef simd8scalari    simdscalari;
-typedef simd8vector     simdvector;
-typedef simd8mask       simdmask;
+typedef SIMDImpl::SIMD128Impl::Float   simd4scalar;
+typedef SIMDImpl::SIMD128Impl::Double  simd4scalard;
+typedef SIMDImpl::SIMD128Impl::Integer simd4scalari;
+typedef SIMDImpl::SIMD128Impl::Vec4    simd4vector;
+typedef SIMDImpl::SIMD128Impl::Mask    simd4mask;
+
+typedef SIMDImpl::SIMD256Impl::Float   simd8scalar;
+typedef SIMDImpl::SIMD256Impl::Double  simd8scalard;
+typedef SIMDImpl::SIMD256Impl::Integer simd8scalari;
+typedef SIMDImpl::SIMD256Impl::Vec4    simd8vector;
+typedef SIMDImpl::SIMD256Impl::Mask    simd8mask;
+
+typedef SIMDImpl::SIMD512Impl::Float   simd16scalar;
+typedef SIMDImpl::SIMD512Impl::Double  simd16scalard;
+typedef SIMDImpl::SIMD512Impl::Integer simd16scalari;
+typedef SIMDImpl::SIMD512Impl::Vec4    simd16vector;
+typedef SIMDImpl::SIMD512Impl::Mask    simd16mask;
+
+#if KNOB_SIMD_WIDTH == 8
+typedef simd8scalar  simdscalar;
+typedef simd8scalard simdscalard;
+typedef simd8scalari simdscalari;
+typedef simd8vector  simdvector;
+typedef simd8mask    simdmask;
  #else
  #error Unsupported vector width
  #endif
@@ -68,7 +68,7 @@ UINT pdep_u32(UINT a, UINT mask)
  #else
      UINT result = 0;
  
-    // copied from http://wm.ite.pl/articles/pdep-soft-emu.html 
+    // copied from http://wm.ite.pl/articles/pdep-soft-emu.html
      // using bsf instead of funky loop
      DWORD maskIndex;
      while (_BitScanForward(&maskIndex, mask))
@@ -99,8 +99,8 @@ UINT pext_u32(UINT a, UINT mask)
  #if KNOB_ARCH >= KNOB_ARCH_AVX2
      return _pext_u32(a, mask);
  #else
-    UINT result = 0;
-    DWORD maskIndex;
+    UINT     result = 0;
+    DWORD    maskIndex;
      uint32_t currentBit = 0;
      while (_BitScanForward(&maskIndex, mask))
      {
@@ -117,4 +117,4 @@ UINT pext_u32(UINT a, UINT mask)
  #endif
  }
  
-#endif//__SWR_INTRIN_H__
+#endif //__SWR_INTRIN_H__
diff --git a/src/gallium/drivers/swr/rasterizer/common/isa.hpp b/src/gallium/drivers/swr/rasterizer/common/isa.hpp

index a62350f2b6037cfa9fa36694b5bbfba81413faf5..aea5740bb664b94eb2167b71e48e3d29f76bcfd6 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/isa.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/isa.hpp
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #pragma once
  
@@ -44,7 +44,7 @@
  class InstructionSet
  {
  public:
-    InstructionSet() : CPU_Rep() {};
+    InstructionSet() : CPU_Rep(){};
  
      // getters
      std::string Vendor(void) { return CPU_Rep.vendor_; }
@@ -113,21 +113,11 @@ private:
      class InstructionSet_Internal
      {
      public:
-        InstructionSet_Internal()
-            : nIds_{ 0 },
-            nExIds_{ 0 },
-            isIntel_{ false },
-            isAMD_{ false },
-            f_1_ECX_{ 0 },
-            f_1_EDX_{ 0 },
-            f_7_EBX_{ 0 },
-            f_7_ECX_{ 0 },
-            f_81_ECX_{ 0 },
-            f_81_EDX_{ 0 },
-            data_{},
-            extdata_{}
+        InstructionSet_Internal() :
+            nIds_{0}, nExIds_{0}, isIntel_{false}, isAMD_{false}, f_1_ECX_{0}, f_1_EDX_{0},
+            f_7_EBX_{0}, f_7_ECX_{0}, f_81_ECX_{0}, f_81_EDX_{0}, data_{}, extdata_{}
          {
-            //int cpuInfo[4] = {-1};
+            // int cpuInfo[4] = {-1};
              std::array<int, 4> cpui;
  
              // Calling __cpuid with 0x0 as the function_id argument
@@ -144,7 +134,7 @@ private:
  #if defined(_MSC_VER) && !defined(__clang__)
                  __cpuidex(cpui.data(), i, 0);
  #else
-                int *data = cpui.data();
+                int* data = cpui.data();
                  __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
  #endif
                  data_.push_back(cpui);
@@ -153,10 +143,10 @@ private:
              // Capture vendor string
              char vendor[0x20];
              memset(vendor, 0, sizeof(vendor));
-            *reinterpret_cast<int*>(vendor) = data_[0][1];
+            *reinterpret_cast<int*>(vendor)     = data_[0][1];
              *reinterpret_cast<int*>(vendor + 4) = data_[0][3];
              *reinterpret_cast<int*>(vendor + 8) = data_[0][2];
-            vendor_ = vendor;
+            vendor_                             = vendor;
              if (vendor_ == "GenuineIntel")
              {
                  isIntel_ = true;
@@ -197,7 +187,7 @@ private:
  #if defined(_MSC_VER) && !defined(__clang__)
                  __cpuidex(cpui.data(), i, 0);
  #else
-                int *data = cpui.data();
+                int* data = cpui.data();
                  __cpuid_count(i, 0, data[0], data[1], data[2], data[3]);
  #endif
                  extdata_.push_back(cpui);
@@ -220,18 +210,18 @@ private:
              }
          };
  
-        int nIds_;
-        unsigned nExIds_;
-        std::string vendor_;
-        std::string brand_;
-        bool isIntel_;
-        bool isAMD_;
-        std::bitset<32> f_1_ECX_;
-        std::bitset<32> f_1_EDX_;
-        std::bitset<32> f_7_EBX_;
-        std::bitset<32> f_7_ECX_;
-        std::bitset<32> f_81_ECX_;
-        std::bitset<32> f_81_EDX_;
+        int                             nIds_;
+        unsigned                        nExIds_;
+        std::string                     vendor_;
+        std::string                     brand_;
+        bool                            isIntel_;
+        bool                            isAMD_;
+        std::bitset<32>                 f_1_ECX_;
+        std::bitset<32>                 f_1_EDX_;
+        std::bitset<32>                 f_7_EBX_;
+        std::bitset<32>                 f_7_ECX_;
+        std::bitset<32>                 f_81_ECX_;
+        std::bitset<32>                 f_81_EDX_;
          std::vector<std::array<int, 4>> data_;
          std::vector<std::array<int, 4>> extdata_;
      };
diff --git a/src/gallium/drivers/swr/rasterizer/common/os.cpp b/src/gallium/drivers/swr/rasterizer/common/os.cpp

index 2d97270b997532e96c668ce87c03c45889a353e2..aa817d451b4a19c8e6c40f134fa276f6d5a805cd 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/os.cpp
+++ b/src/gallium/drivers/swr/rasterizer/common/os.cpp
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #include "common/os.h"
  #include <vector>
@@ -34,28 +34,26 @@
  #include <pthread.h>
  #endif // Linux
  
-
-
  #if defined(_WIN32)
  static const DWORD MS_VC_EXCEPTION = 0x406D1388;
  
-#pragma pack(push,8)  
+#pragma pack(push, 8)
  typedef struct tagTHREADNAME_INFO
  {
-    DWORD dwType; // Must be 0x1000.  
-    LPCSTR szName; // Pointer to name (in user addr space).  
-    DWORD dwThreadID; // Thread ID (-1=caller thread).  
-    DWORD dwFlags; // Reserved for future use, must be zero.  
+    DWORD  dwType;     // Must be 0x1000.
+    LPCSTR szName;     // Pointer to name (in user addr space).
+    DWORD  dwThreadID; // Thread ID (-1=caller thread).
+    DWORD  dwFlags;    // Reserved for future use, must be zero.
  } THREADNAME_INFO;
  #pragma pack(pop)
  
  void LegacySetThreadName(const char* pThreadName)
  {
      THREADNAME_INFO info;
-    info.dwType = 0x1000;
-    info.szName = pThreadName;
+    info.dwType     = 0x1000;
+    info.szName     = pThreadName;
      info.dwThreadID = GetCurrentThreadId();
-    info.dwFlags = 0;
+    info.dwFlags    = 0;
  
      if (!IsDebuggerPresent())
      {
@@ -63,14 +61,16 @@ void LegacySetThreadName(const char* pThreadName)
          return;
      }
  
-#pragma warning(push)  
-#pragma warning(disable: 6320 6322)  
-    __try {
+#pragma warning(push)
+#pragma warning(disable : 6320 6322)
+    __try
+    {
          RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info);
      }
-    __except (EXCEPTION_EXECUTE_HANDLER) {
+    __except (EXCEPTION_EXECUTE_HANDLER)
+    {
      }
-#pragma warning(pop)  
+#pragma warning(pop)
  }
  #endif // _WIN32
  
@@ -78,23 +78,21 @@ void SWR_API SetCurrentThreadName(const char* pThreadName)
  {
  #if defined(_WIN32)
      // The SetThreadDescription API was brought in version 1607 of Windows 10.
-    typedef HRESULT(WINAPI* PFNSetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription);
+    typedef HRESULT(WINAPI * PFNSetThreadDescription)(HANDLE hThread, PCWSTR lpThreadDescription);
      // The SetThreadDescription API works even if no debugger is attached.
-    auto pfnSetThreadDescription =
-        reinterpret_cast<PFNSetThreadDescription>(
-            GetProcAddress(GetModuleHandleA("Kernel32.dll"), "SetThreadDescription"));
+    auto pfnSetThreadDescription = reinterpret_cast<PFNSetThreadDescription>(
+        GetProcAddress(GetModuleHandleA("Kernel32.dll"), "SetThreadDescription"));
  
      if (!pfnSetThreadDescription)
      {
          // try KernelBase.dll
-        pfnSetThreadDescription =
-            reinterpret_cast<PFNSetThreadDescription>(
-                GetProcAddress(GetModuleHandleA("KernelBase.dll"), "SetThreadDescription"));
+        pfnSetThreadDescription = reinterpret_cast<PFNSetThreadDescription>(
+            GetProcAddress(GetModuleHandleA("KernelBase.dll"), "SetThreadDescription"));
      }
  
      if (pfnSetThreadDescription)
      {
-        std::string utf8Name = pThreadName;
+        std::string  utf8Name = pThreadName;
          std::wstring wideName;
          wideName.resize(utf8Name.size() + 1);
          swprintf_s(&(wideName.front()), wideName.size(), L"%S", utf8Name.c_str());
@@ -113,12 +111,13 @@ void SWR_API SetCurrentThreadName(const char* pThreadName)
  #endif // Linux
  }
  
-static void SplitString(std::vector<std::string>& out_segments, const std::string& input, char splitToken)
+static void
+SplitString(std::vector<std::string>& out_segments, const std::string& input, char splitToken)
  {
      out_segments.clear();
  
      std::istringstream f(input);
-    std::string s;
+    std::string        s;
      while (std::getline(f, s, splitToken))
      {
          if (s.size())
@@ -155,12 +154,11 @@ void SWR_API CreateDirectoryPath(const std::string& path)
  
  /// Execute Command (block until finished)
  /// @returns process exit value
-int SWR_API  ExecCmd(
-    const std::string&  cmd,            ///< (In) Command line string
-    const char*         pOptEnvStrings, ///< (Optional In) Environment block for new process
-    std::string*        pOptStdOut,     ///< (Optional Out) Standard Output text
-    std::string*        pOptStdErr,     ///< (Optional Out) Standard Error text
-    const std::string*  pOptStdIn)      ///< (Optional In) Standard Input text
+int SWR_API ExecCmd(const std::string& cmd,     ///< (In) Command line string
+                    const char* pOptEnvStrings, ///< (Optional In) Environment block for new process
+                    std::string*       pOptStdOut, ///< (Optional Out) Standard Output text
+                    std::string*       pOptStdErr, ///< (Optional Out) Standard Error text
+                    const std::string* pOptStdIn)  ///< (Optional In) Standard Input text
  {
      int rvalue = -1;
  
@@ -172,8 +170,8 @@ int SWR_API  ExecCmd(
      };
      std::array<WinPipe, 3> hPipes = {};
  
-    SECURITY_ATTRIBUTES saAttr = { sizeof(SECURITY_ATTRIBUTES) };
-    saAttr.bInheritHandle = TRUE;   //Pipe handles are inherited by child process.
+    SECURITY_ATTRIBUTES saAttr  = {sizeof(SECURITY_ATTRIBUTES)};
+    saAttr.bInheritHandle       = TRUE; // Pipe handles are inherited by child process.
      saAttr.lpSecurityDescriptor = NULL;
  
      {
@@ -198,7 +196,7 @@ int SWR_API  ExecCmd(
      }
  
      STARTUPINFOA StartupInfo{};
-    StartupInfo.cb = sizeof(STARTUPINFOA);
+    StartupInfo.cb      = sizeof(STARTUPINFOA);
      StartupInfo.dwFlags = STARTF_USESTDHANDLES;
      StartupInfo.dwFlags |= STARTF_USESHOWWINDOW;
      StartupInfo.wShowWindow = SW_HIDE;
@@ -207,30 +205,28 @@ int SWR_API  ExecCmd(
          StartupInfo.hStdInput = hPipes[0].hRead;
      }
      StartupInfo.hStdOutput = hPipes[1].hWrite;
-    StartupInfo.hStdError = hPipes[2].hWrite;
+    StartupInfo.hStdError  = hPipes[2].hWrite;
      PROCESS_INFORMATION procInfo{};
  
      // CreateProcess can modify the string
      std::string local_cmd = cmd;
  
-    BOOL ProcessValue = CreateProcessA(
-        NULL,
-        (LPSTR)local_cmd.c_str(),
-        NULL,
-        NULL,
-        TRUE,
-        0,
-        (LPVOID)pOptEnvStrings,
-        NULL,
-        &StartupInfo,
-        &procInfo);
+    BOOL ProcessValue = CreateProcessA(NULL,
+                                       (LPSTR)local_cmd.c_str(),
+                                       NULL,
+                                       NULL,
+                                       TRUE,
+                                       0,
+                                       (LPVOID)pOptEnvStrings,
+                                       NULL,
+                                       &StartupInfo,
+                                       &procInfo);
  
      if (ProcessValue && procInfo.hProcess)
      {
-        auto ReadFromPipe = [](HANDLE hPipe, std::string* pOutStr)
-        {
-            char buf[1024];
-            DWORD dwRead = 0;
+        auto ReadFromPipe = [](HANDLE hPipe, std::string* pOutStr) {
+            char  buf[1024];
+            DWORD dwRead  = 0;
              DWORD dwAvail = 0;
              while (true)
              {
@@ -244,7 +240,12 @@ int SWR_API  ExecCmd(
                      break;
                  }
  
-                if (!::ReadFile(hPipe, buf, std::min<size_t>(sizeof(buf) - 1, size_t(dwAvail)), &dwRead, NULL) || !dwRead)
+                if (!::ReadFile(hPipe,
+                                buf,
+                                std::min<size_t>(sizeof(buf) - 1, size_t(dwAvail)),
+                                &dwRead,
+                                NULL) ||
+                    !dwRead)
                  {
                      // error, the child process might ended
                      break;
@@ -257,17 +258,18 @@ int SWR_API  ExecCmd(
                  }
              }
          };
-        bool bProcessEnded = false;
-        size_t bytesWritten = 0;
+        bool   bProcessEnded = false;
+        size_t bytesWritten  = 0;
          do
          {
              if (pOptStdIn && (pOptStdIn->size() > bytesWritten))
              {
                  DWORD bytesToWrite = static_cast<DWORD>(pOptStdIn->size()) - bytesWritten;
-                if (!::WriteFile(
-                    hPipes[0].hWrite,
-                    pOptStdIn->data() + bytesWritten,
-                    bytesToWrite, &bytesToWrite, nullptr))
+                if (!::WriteFile(hPipes[0].hWrite,
+                                 pOptStdIn->data() + bytesWritten,
+                                 bytesToWrite,
+                                 &bytesToWrite,
+                                 nullptr))
                  {
                      // Failed to write to pipe
                      break;
@@ -280,8 +282,7 @@ int SWR_API  ExecCmd(
  
              ReadFromPipe(hPipes[1].hRead, pOptStdOut);
              ReadFromPipe(hPipes[2].hRead, pOptStdErr);
-        }
-        while (!bProcessEnded);
+        } while (!bProcessEnded);
  
          DWORD exitVal = 0;
          if (!GetExitCodeProcess(procInfo.hProcess, &exitVal))
diff --git a/src/gallium/drivers/swr/rasterizer/common/os.h b/src/gallium/drivers/swr/rasterizer/common/os.h

index e779562225eab8bf0a1237e01d9659165648a7b7..d33c8735d119093d0b47f65d3905d1a53b3766b3 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/os.h
+++ b/src/gallium/drivers/swr/rasterizer/common/os.h
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #ifndef __SWR_OS_H__
  #define __SWR_OS_H__
@@ -30,7 +30,7 @@
  #if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
  
  #define SWR_API __cdecl
-#define SWR_VISIBLE  __declspec(dllexport)
+#define SWR_VISIBLE __declspec(dllexport)
  
  #ifndef NOMINMAX
  #define NOMINMAX
@@ -64,12 +64,12 @@
  #define DEBUGBREAK __debugbreak()
  
  #define PRAGMA_WARNING_PUSH_DISABLE(...) \
-    __pragma(warning(push));\
-    __pragma(warning(disable:__VA_ARGS__));
+    __pragma(warning(push));             \
+    __pragma(warning(disable : __VA_ARGS__));
  
  #define PRAGMA_WARNING_POP() __pragma(warning(pop))
  
-static inline void *AlignedMalloc(size_t _Size, size_t _Alignment)
+static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)
  {
      return _aligned_malloc(_Size, _Alignment);
  }
@@ -104,13 +104,13 @@ static inline void AlignedFree(void* p)
  #include <stdio.h>
  #include <limits.h>
  
-typedef void            VOID;
-typedef void*           LPVOID;
-typedef int             INT;
-typedef unsigned int    UINT;
-typedef void*           HANDLE;
-typedef int             LONG;
-typedef unsigned int    DWORD;
+typedef void         VOID;
+typedef void*        LPVOID;
+typedef int          INT;
+typedef unsigned int UINT;
+typedef void*        HANDLE;
+typedef int          LONG;
+typedef unsigned int DWORD;
  
  #undef FALSE
  #define FALSE 0
@@ -124,7 +124,7 @@ typedef unsigned int    DWORD;
  #ifndef INLINE
  #define INLINE __inline
  #endif
-#define DEBUGBREAK asm ("int $3")
+#define DEBUGBREAK asm("int $3")
  
  #if !defined(__CYGWIN__)
  
@@ -136,28 +136,25 @@ typedef unsigned int    DWORD;
  #endif
  
  #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
-    #define __declspec(x)           __declspec_##x
-    #define __declspec_align(y)     __attribute__((aligned(y)))
-    #define __declspec_deprecated   __attribute__((deprecated))
-    #define __declspec_dllexport
-    #define __declspec_dllimport
-    #define __declspec_noinline     __attribute__((__noinline__))
-    #define __declspec_nothrow      __attribute__((nothrow))
-    #define __declspec_novtable
-    #define __declspec_thread       __thread
+#define __declspec(x) __declspec_##x
+#define __declspec_align(y) __attribute__((aligned(y)))
+#define __declspec_deprecated __attribute__((deprecated))
+#define __declspec_dllexport
+#define __declspec_dllimport
+#define __declspec_noinline __attribute__((__noinline__))
+#define __declspec_nothrow __attribute__((nothrow))
+#define __declspec_novtable
+#define __declspec_thread __thread
  #else
-    #define __declspec(X)
+#define __declspec(X)
  #endif
  
  #endif
  
-#define GCC_VERSION (__GNUC__ * 10000 \
-                     + __GNUC_MINOR__ * 100 \
-                     + __GNUC_PATCHLEVEL__)
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
  
  #if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
-inline
-uint64_t __rdtsc()
+inline uint64_t      __rdtsc()
  {
      long low, high;
      asm volatile("rdtsc" : "=a"(low), "=d"(high));
@@ -165,10 +162,9 @@ uint64_t __rdtsc()
  }
  #endif
  
-#if !defined( __clang__) && !defined(__INTEL_COMPILER)
+#if !defined(__clang__) && !defined(__INTEL_COMPILER)
  // Intrinsic not defined in gcc
-static INLINE
-void _mm256_storeu2_m128i(__m128i *hi, __m128i *lo, __m256i a)
+static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
  {
      _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
      _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
@@ -181,29 +177,25 @@ void _mm256_storeu2_m128i(__m128i *hi, __m128i *lo, __m256i a)
  #endif
  #endif
  
-inline
-unsigned char _BitScanForward(unsigned long *Index, unsigned long Mask)
+inline unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask)
  {
      *Index = __builtin_ctz(Mask);
      return (Mask != 0);
  }
  
-inline
-unsigned char _BitScanForward(unsigned int *Index, unsigned int Mask)
+inline unsigned char _BitScanForward(unsigned int* Index, unsigned int Mask)
  {
      *Index = __builtin_ctz(Mask);
      return (Mask != 0);
  }
  
-inline
-unsigned char _BitScanReverse(unsigned long *Index, unsigned long Mask)
+inline unsigned char _BitScanReverse(unsigned long* Index, unsigned long Mask)
  {
      *Index = __builtin_clz(Mask);
      return (Mask != 0);
  }
  
-inline
-unsigned char _BitScanReverse(unsigned int *Index, unsigned int Mask)
+inline unsigned char _BitScanReverse(unsigned int* Index, unsigned int Mask)
  {
      *Index = __builtin_clz(Mask);
      return (Mask != 0);
@@ -212,10 +204,9 @@ unsigned char _BitScanReverse(unsigned int *Index, unsigned int Mask)
  #define _BitScanForward64 _BitScanForward
  #define _BitScanReverse64 _BitScanReverse
  
-inline
-void *AlignedMalloc(size_t size, size_t alignment)
+inline void* AlignedMalloc(size_t size, size_t alignment)
  {
-    void *ret;
+    void* ret;
      if (posix_memalign(&ret, alignment, size))
      {
          return NULL;
@@ -223,19 +214,19 @@ void *AlignedMalloc(size_t size, size_t alignment)
      return ret;
  }
  
-static inline
-void AlignedFree(void* p)
+static inline void AlignedFree(void* p)
  {
      free(p);
  }
  
-#define _countof(a) (sizeof(a)/sizeof(*(a)))
+#define _countof(a) (sizeof(a) / sizeof(*(a)))
  
  #define sprintf_s sprintf
-#define strcpy_s(dst,size,src) strncpy(dst,src,size)
+#define strcpy_s(dst, size, src) strncpy(dst, src, size)
  #define GetCurrentProcessId getpid
  
-#define InterlockedCompareExchange(Dest, Exchange, Comparand) __sync_val_compare_and_swap(Dest, Comparand, Exchange)
+#define InterlockedCompareExchange(Dest, Exchange, Comparand) \
+    __sync_val_compare_and_swap(Dest, Comparand, Exchange)
  #define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
  #define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
  #define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
@@ -257,9 +248,9 @@ void AlignedFree(void* p)
  #define THREAD thread_local
  
  // Universal types
-typedef uint8_t     KILOBYTE[1024];
-typedef KILOBYTE    MEGABYTE[1024];
-typedef MEGABYTE    GIGABYTE[1024];
+typedef uint8_t  KILOBYTE[1024];
+typedef KILOBYTE MEGABYTE[1024];
+typedef MEGABYTE GIGABYTE[1024];
  
  #define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
  #define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
@@ -275,9 +266,9 @@ typedef MEGABYTE    GIGABYTE[1024];
  #define ATTR_UNUSED
  #endif
  
-#define SWR_FUNC(_retType, _funcName, /* args */...)   \
-   typedef _retType (SWR_API * PFN##_funcName)(__VA_ARGS__); \
-  _retType SWR_API _funcName(__VA_ARGS__);
+#define SWR_FUNC(_retType, _funcName, /* args */...)        \
+    typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
+    _retType SWR_API _funcName(__VA_ARGS__);
  
  // Defined in os.cpp
  void SWR_API SetCurrentThreadName(const char* pThreadName);
@@ -285,11 +276,11 @@ void SWR_API CreateDirectoryPath(const std::string& path);
  
  /// Execute Command (block until finished)
  /// @returns process exit value
-int SWR_API  ExecCmd(
-    const std::string&  cmd,                        ///< (In) Command line string
-    const char*         pOptEnvStrings = nullptr,   ///< (Optional In) Environment block for new process
-    std::string*        pOptStdOut = nullptr,       ///< (Optional Out) Standard Output text
-    std::string*        pOptStdErr = nullptr,       ///< (Optional Out) Standard Error text
-    const std::string*  pOptStdIn = nullptr);       ///< (Optional In) Standard Input text
-
-#endif//__SWR_OS_H__
+int SWR_API
+    ExecCmd(const std::string& cmd,                ///< (In) Command line string
+            const char*  pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
+            std::string* pOptStdOut     = nullptr,   ///< (Optional Out) Standard Output text
+            std::string* pOptStdErr     = nullptr,   ///< (Optional Out) Standard Error text
+            const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
+
+#endif //__SWR_OS_H__
diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp

index 79e82c4e6b373c2b7533ceec3b16aff14dbcbfb3..e19a2d110458a5e0757f2b0b6d459ddcf4ac5310 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp
+++ b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file rdtsc_buckets.cpp
-* 
-* @brief implementation of rdtsc buckets.
-* 
-* Notes:
-* 
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rdtsc_buckets.cpp
+ *
+ * @brief implementation of rdtsc buckets.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "rdtsc_buckets.h"
  #include <inttypes.h>
  
@@ -50,16 +50,16 @@ void BucketManager::RegisterThread(const std::string& name)
      BUCKET_THREAD newThread;
      newThread.name = name;
      newThread.root.children.reserve(mBuckets.size());
-    newThread.root.id = 0;
+    newThread.root.id      = 0;
      newThread.root.pParent = nullptr;
-    newThread.pCurrent = &newThread.root;
+    newThread.pCurrent     = &newThread.root;
  
      mThreadMutex.lock();
  
      // assign unique thread id for this thread
-    size_t id = mThreads.size();
+    size_t id    = mThreads.size();
      newThread.id = (UINT)id;
-    tlsThreadId = (UINT)id;
+    tlsThreadId  = (UINT)id;
  
      // store new thread
      mThreads.push_back(newThread);
@@ -76,9 +76,10 @@ UINT BucketManager::RegisterBucket(const BUCKET_DESC& desc)
      return (UINT)id;
  }
  
-void BucketManager::PrintBucket(FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket)
+void BucketManager::PrintBucket(
+    FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket)
  {
-    const char *arrows[] = {
+    const char* arrows[] = {
          "",
          "|-> ",
          "    |-> ",
@@ -99,7 +100,7 @@ void BucketManager::PrintBucket(FILE* f, UINT level, uint64_t threadCycles, uint
      // compute average cycle count per invocation
      uint64_t CPE = bucket.elapsed / bucket.count;
  
-    BUCKET_DESC &desc = mBuckets[bucket.id];
+    BUCKET_DESC& desc = mBuckets[bucket.id];
  
      // construct hierarchy visualization
      char hier[80];
@@ -107,16 +108,16 @@ void BucketManager::PrintBucket(FILE* f, UINT level, uint64_t threadCycles, uint
      strcat(hier, desc.name.c_str());
  
      // print out
-    fprintf(f, "%6.2f %6.2f %-10" PRIu64 " %-10" PRIu64 " %-10u %-10lu %-10u %s\n", 
-        percentTotal, 
-        percentParent, 
-        bucket.elapsed, 
-        CPE, 
-        bucket.count, 
-        (unsigned long)0, 
-        (uint32_t)0, 
-        hier
-    );
+    fprintf(f,
+            "%6.2f %6.2f %-10" PRIu64 " %-10" PRIu64 " %-10u %-10lu %-10u %s\n",
+            percentTotal,
+            percentParent,
+            bucket.elapsed,
+            CPE,
+            bucket.count,
+            (unsigned long)0,
+            (uint32_t)0,
+            hier);
  
      // dump all children of this bucket
      for (const BUCKET& child : bucket.children)
@@ -135,8 +136,8 @@ void BucketManager::PrintThread(FILE* f, const BUCKET_THREAD& thread)
      fprintf(f, " %%Tot   %%Par  Cycles     CPE        NumEvent   CPE2       NumEvent2  Bucket\n");
  
      // compute thread level total cycle counts across all buckets from root
-    const BUCKET& root = thread.root;
-    uint64_t totalCycles = 0;
+    const BUCKET& root        = thread.root;
+    uint64_t      totalCycles = 0;
      for (const BUCKET& child : root.children)
      {
          totalCycles += child.elapsed;
diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h

index 48042ac223394cd1f9f1774929e98b636189e36d..bbc9538b86dd1de2e1837b956e939e88538d2e9a 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
+++ b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file rdtsc_buckets.h
-* 
-* @brief declaration for rdtsc buckets.
-* 
-* Notes:
-* 
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rdtsc_buckets.h
+ *
+ * @brief declaration for rdtsc buckets.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "os.h"
@@ -48,7 +48,7 @@ extern THREAD UINT tlsThreadId;
  class BucketManager
  {
  public:
-    BucketManager() { }
+    BucketManager() {}
      ~BucketManager();
  
      // removes all registered thread data
@@ -112,7 +112,8 @@ public:
      // @param id generated by RegisterBucket
      INLINE void StartBucket(UINT id)
      {
-        if (!mCapturing) return;
+        if (!mCapturing)
+            return;
  
          SWR_ASSERT(tlsThreadId < mThreads.size());
  
@@ -125,10 +126,10 @@ public:
              {
                  bt.pCurrent->children.resize(mBuckets.size());
              }
-            BUCKET &child = bt.pCurrent->children[id];
+            BUCKET& child = bt.pCurrent->children[id];
              child.pParent = bt.pCurrent;
-            child.id = id;
-            child.start = tsc;
+            child.id      = id;
+            child.start   = tsc;
  
              // update thread's currently executing bucket
              bt.pCurrent = &child;
@@ -142,7 +143,7 @@ public:
      INLINE void StopBucket(UINT id)
      {
          SWR_ASSERT(tlsThreadId < mThreads.size());
-        BUCKET_THREAD &bt = mThreads[tlsThreadId];
+        BUCKET_THREAD& bt = mThreads[tlsThreadId];
  
          if (bt.level == 0)
          {
@@ -152,7 +153,8 @@ public:
          uint64_t tsc = __rdtsc();
  
          {
-            if (bt.pCurrent->start == 0) return;
+            if (bt.pCurrent->start == 0)
+                return;
              SWR_ASSERT(bt.pCurrent->id == id, "Mismatched buckets detected");
  
              bt.pCurrent->elapsed += (tsc - bt.pCurrent->start);
@@ -167,7 +169,8 @@ public:
  
      INLINE void AddEvent(uint32_t id, uint32_t count)
      {
-        if (!mCapturing) return;
+        if (!mCapturing)
+            return;
  
          SWR_ASSERT(tlsThreadId < mThreads.size());
  
@@ -179,15 +182,16 @@ public:
              {
                  bt.pCurrent->children.resize(mBuckets.size());
              }
-            BUCKET &child = bt.pCurrent->children[id];
+            BUCKET& child = bt.pCurrent->children[id];
              child.pParent = bt.pCurrent;
-            child.id = id;
+            child.id      = id;
              child.count += count;
          }
      }
  
  private:
-    void PrintBucket(FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket);
+    void PrintBucket(
+        FILE* f, UINT level, uint64_t threadCycles, uint64_t parentCycles, const BUCKET& bucket);
      void PrintThread(FILE* f, const BUCKET_THREAD& thread);
  
      // list of active threads that have registered with this manager
@@ -197,10 +201,10 @@ private:
      std::vector<BUCKET_DESC> mBuckets;
  
      // is capturing currently enabled
-    volatile bool mCapturing{ false };
+    volatile bool mCapturing{false};
  
      // has capturing completed
-    volatile bool mDoneCapturing{ false };
+    volatile bool mDoneCapturing{false};
  
      std::mutex mThreadMutex;
  
@@ -208,7 +212,6 @@ private:
  
  };
  
-
  // C helpers for jitter
  void BucketManager_StartBucket(BucketManager* pBucketMgr, uint32_t id);
  void BucketManager_StopBucket(BucketManager* pBucketMgr, uint32_t id);
diff --git a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h

index f6e75cda89d23eab1613b90ab8ab314ffb43076b..fd3b1df746aee333a4e34114dba00a62071fc1a9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h
+++ b/src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file rdtsc_buckets.h
-* 
-* @brief declaration for rdtsc buckets.
-* 
-* Notes:
-* 
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rdtsc_buckets.h
+ *
+ * @brief declaration for rdtsc buckets.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include <vector>
@@ -34,12 +34,12 @@
  
  struct BUCKET
  {
-    uint32_t id{ 0 };
-    uint64_t start{ 0 };
-    uint64_t elapsed{ 0 };
-    uint32_t count{ 0 };
+    uint32_t id{0};
+    uint64_t start{0};
+    uint64_t elapsed{0};
+    uint32_t count{0};
  
-    BUCKET* pParent{ nullptr };
+    BUCKET*             pParent{nullptr};
      std::vector<BUCKET> children;
  };
  
@@ -65,29 +65,29 @@ struct BUCKET_THREAD
      std::string name;
  
      // id for this thread, assigned by the thread manager
-    uint32_t id{ 0 };
+    uint32_t id{0};
  
      // root of the bucket hierarchy for this thread
      BUCKET root;
  
      // currently executing bucket somewhere in the hierarchy
-    BUCKET* pCurrent{ nullptr };
+    BUCKET* pCurrent{nullptr};
  
      // currently executing hierarchy level
-    uint32_t level{ 0 };
+    uint32_t level{0};
  
      // threadviz file object
-    FILE* vizFile{ nullptr };
+    FILE* vizFile{nullptr};
  
  
      BUCKET_THREAD() {}
      BUCKET_THREAD(const BUCKET_THREAD& that)
      {
-        name = that.name;
-        id = that.id;
-        root = that.root;
+        name     = that.name;
+        id       = that.id;
+        root     = that.root;
          pCurrent = &root;
-        vizFile = that.vizFile;
+        vizFile  = that.vizFile;
      }
  };
  
@@ -100,14 +100,14 @@ enum VIZ_TYPE
  
  struct VIZ_START_DATA
  {
-    uint8_t type;
+    uint8_t  type;
      uint32_t bucketId;
      uint64_t timestamp;
  };
  
  struct VIZ_STOP_DATA
  {
-    uint8_t type;
+    uint8_t  type;
      uint64_t timestamp;
  };
  
@@ -144,7 +144,7 @@ inline void Serialize(FILE* f, const std::string& string)
  
  inline void Deserialize(FILE* f, std::string& string)
  {
-    char cstr[256];
+    char    cstr[256];
      uint8_t length;
      fread(&length, sizeof(length), 1, f);
      fread(cstr, length, 1, f);
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h

index 98a8b9b2f9f2f880d8cbabca764409b1710ca8c3..b08fb2eaaeac62c8385c5441ee2da93b26f146db 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #ifndef __SWR_SIMD16INTRIN_H__
  #define __SWR_SIMD16INTRIN_H__
@@ -27,144 +27,146 @@
  #if ENABLE_AVX512_SIMD16
  
  #if KNOB_SIMD16_WIDTH == 16
-typedef SIMD512                             SIMD16;
+typedef SIMD512 SIMD16;
  #else
  #error Unsupported vector width
-#endif//KNOB_SIMD16_WIDTH == 16
+#endif // KNOB_SIMD16_WIDTH == 16
  
-#define _simd16_setzero_ps                  SIMD16::setzero_ps
-#define _simd16_setzero_si                  SIMD16::setzero_si
-#define _simd16_set1_ps                     SIMD16::set1_ps
-#define _simd16_set1_epi8                   SIMD16::set1_epi8
-#define _simd16_set1_epi32                  SIMD16::set1_epi32
-#define _simd16_set_ps                      SIMD16::set_ps
-#define _simd16_set_epi32                   SIMD16::set_epi32
-#define _simd16_load_ps                     SIMD16::load_ps
-#define _simd16_loadu_ps                    SIMD16::loadu_ps
-#if 1                                       
-#define _simd16_load1_ps                    SIMD16::broadcast_ss
-#endif                                      
-#define _simd16_load_si                     SIMD16::load_si
-#define _simd16_loadu_si                    SIMD16::loadu_si
-#define _simd16_broadcast_ss(m)             SIMD16::broadcast_ss((float const*)m)
-#define _simd16_store_ps                    SIMD16::store_ps
-#define _simd16_store_si                    SIMD16::store_si
-#define _simd16_extract_ps(a, imm8)         SIMD16::extract_ps<imm8>(a)
-#define _simd16_extract_si(a, imm8)         SIMD16::extract_si<imm8>(a)
-#define _simd16_insert_ps(a, b, imm8)       SIMD16::insert_ps<imm8>(a, b)
-#define _simd16_insert_si(a, b, imm8)       SIMD16::insert_si<imm8>(a, b)
-#define _simd16_maskstore_ps                SIMD16::maskstore_ps
-#define _simd16_blend_ps(a, b, mask)        SIMD16::blend_ps<mask>(a, b)
-#define _simd16_blendv_ps                   SIMD16::blendv_ps
-#define _simd16_blendv_epi32                SIMD16::blendv_epi32
-#define _simd16_mul_ps                      SIMD16::mul_ps
-#define _simd16_div_ps                      SIMD16::div_ps
-#define _simd16_add_ps                      SIMD16::add_ps
-#define _simd16_sub_ps                      SIMD16::sub_ps
-#define _simd16_rsqrt_ps                    SIMD16::rsqrt_ps
-#define _simd16_min_ps                      SIMD16::min_ps
-#define _simd16_max_ps                      SIMD16::max_ps
-#define _simd16_movemask_ps                 SIMD16::movemask_ps
-#define _simd16_movemask_pd                 SIMD16::movemask_pd
-#define _simd16_cvtps_epi32                 SIMD16::cvtps_epi32
-#define _simd16_cvttps_epi32                SIMD16::cvttps_epi32
-#define _simd16_cvtepi32_ps                 SIMD16::cvtepi32_ps
-#define _simd16_cmp_ps(a, b, comp)          SIMD16::cmp_ps<SIMD16::CompareType(comp)>(a, b)
-#define _simd16_cmplt_ps                    SIMD16::cmplt_ps
-#define _simd16_cmpgt_ps                    SIMD16::cmpgt_ps
-#define _simd16_cmpneq_ps                   SIMD16::cmpneq_ps
-#define _simd16_cmpeq_ps                    SIMD16::cmpeq_ps
-#define _simd16_cmpge_ps                    SIMD16::cmpge_ps
-#define _simd16_cmple_ps                    SIMD16::cmple_ps
-#define _simd16_castsi_ps                   SIMD16::castsi_ps
-#define _simd16_castps_si                   SIMD16::castps_si
-#define _simd16_castsi_pd                   SIMD16::castsi_pd
-#define _simd16_castpd_si                   SIMD16::castpd_si
-#define _simd16_castpd_ps                   SIMD16::castpd_ps
-#define _simd16_castps_pd                   SIMD16::castps_pd
-#define _simd16_and_ps                      SIMD16::and_ps
-#define _simd16_andnot_ps                   SIMD16::andnot_ps
-#define _simd16_or_ps                       SIMD16::or_ps
-#define _simd16_xor_ps                      SIMD16::xor_ps
-#define _simd16_round_ps(a, mode)           SIMD16::round_ps<SIMD16::RoundMode(mode)>(a)
-#define _simd16_mul_epi32                   SIMD16::mul_epi32
-#define _simd16_mullo_epi32                 SIMD16::mullo_epi32
-#define _simd16_sub_epi32                   SIMD16::sub_epi32
-#define _simd16_sub_epi64                   SIMD16::sub_epi64
-#define _simd16_min_epi32                   SIMD16::min_epi32
-#define _simd16_max_epi32                   SIMD16::max_epi32
-#define _simd16_min_epu32                   SIMD16::min_epu32
-#define _simd16_max_epu32                   SIMD16::max_epu32
-#define _simd16_add_epi32                   SIMD16::add_epi32
-#define _simd16_and_si                      SIMD16::and_si
-#define _simd16_andnot_si                   SIMD16::andnot_si
-#define _simd16_or_si                       SIMD16::or_si
-#define _simd16_xor_si                      SIMD16::xor_si
-#define _simd16_cmpeq_epi32                 SIMD16::cmpeq_epi32
-#define _simd16_cmpgt_epi32                 SIMD16::cmpgt_epi32
-#define _simd16_cmplt_epi32                 SIMD16::cmplt_epi32
-#define _simd16_testz_ps                    SIMD16::testz_ps
-#define _simd16_unpacklo_ps                 SIMD16::unpacklo_ps
-#define _simd16_unpackhi_ps                 SIMD16::unpackhi_ps
-#define _simd16_unpacklo_pd                 SIMD16::unpacklo_pd
-#define _simd16_unpackhi_pd                 SIMD16::unpackhi_pd
-#define _simd16_unpacklo_epi8               SIMD16::unpacklo_epi8
-#define _simd16_unpackhi_epi8               SIMD16::unpackhi_epi8
-#define _simd16_unpacklo_epi16              SIMD16::unpacklo_epi16
-#define _simd16_unpackhi_epi16              SIMD16::unpackhi_epi16
-#define _simd16_unpacklo_epi32              SIMD16::unpacklo_epi32
-#define _simd16_unpackhi_epi32              SIMD16::unpackhi_epi32
-#define _simd16_unpacklo_epi64              SIMD16::unpacklo_epi64
-#define _simd16_unpackhi_epi64              SIMD16::unpackhi_epi64
-#define _simd16_slli_epi32(a, i)            SIMD16::slli_epi32<i>(a)
-#define _simd16_srli_epi32(a, i)            SIMD16::srli_epi32<i>(a)
-#define _simd16_srai_epi32(a, i)            SIMD16::srai_epi32<i>(a)
-#define _simd16_fmadd_ps                    SIMD16::fmadd_ps
-#define _simd16_fmsub_ps                    SIMD16::fmsub_ps
-#define _simd16_adds_epu8                   SIMD16::adds_epu8
-#define _simd16_subs_epu8                   SIMD16::subs_epu8
-#define _simd16_add_epi8                    SIMD16::add_epi8
-#define _simd16_shuffle_epi8                SIMD16::shuffle_epi8
+#define _simd16_setzero_ps SIMD16::setzero_ps
+#define _simd16_setzero_si SIMD16::setzero_si
+#define _simd16_set1_ps SIMD16::set1_ps
+#define _simd16_set1_epi8 SIMD16::set1_epi8
+#define _simd16_set1_epi32 SIMD16::set1_epi32
+#define _simd16_set_ps SIMD16::set_ps
+#define _simd16_set_epi32 SIMD16::set_epi32
+#define _simd16_load_ps SIMD16::load_ps
+#define _simd16_loadu_ps SIMD16::loadu_ps
+#if 1
+#define _simd16_load1_ps SIMD16::broadcast_ss
+#endif
+#define _simd16_load_si SIMD16::load_si
+#define _simd16_loadu_si SIMD16::loadu_si
+#define _simd16_broadcast_ss(m) SIMD16::broadcast_ss((float const*)m)
+#define _simd16_store_ps SIMD16::store_ps
+#define _simd16_store_si SIMD16::store_si
+#define _simd16_extract_ps(a, imm8) SIMD16::extract_ps<imm8>(a)
+#define _simd16_extract_si(a, imm8) SIMD16::extract_si<imm8>(a)
+#define _simd16_insert_ps(a, b, imm8) SIMD16::insert_ps<imm8>(a, b)
+#define _simd16_insert_si(a, b, imm8) SIMD16::insert_si<imm8>(a, b)
+#define _simd16_maskstore_ps SIMD16::maskstore_ps
+#define _simd16_blend_ps(a, b, mask) SIMD16::blend_ps<mask>(a, b)
+#define _simd16_blendv_ps SIMD16::blendv_ps
+#define _simd16_blendv_epi32 SIMD16::blendv_epi32
+#define _simd16_mul_ps SIMD16::mul_ps
+#define _simd16_div_ps SIMD16::div_ps
+#define _simd16_add_ps SIMD16::add_ps
+#define _simd16_sub_ps SIMD16::sub_ps
+#define _simd16_rsqrt_ps SIMD16::rsqrt_ps
+#define _simd16_min_ps SIMD16::min_ps
+#define _simd16_max_ps SIMD16::max_ps
+#define _simd16_movemask_ps SIMD16::movemask_ps
+#define _simd16_movemask_pd SIMD16::movemask_pd
+#define _simd16_cvtps_epi32 SIMD16::cvtps_epi32
+#define _simd16_cvttps_epi32 SIMD16::cvttps_epi32
+#define _simd16_cvtepi32_ps SIMD16::cvtepi32_ps
+#define _simd16_cmp_ps(a, b, comp) SIMD16::cmp_ps<SIMD16::CompareType(comp)>(a, b)
+#define _simd16_cmplt_ps SIMD16::cmplt_ps
+#define _simd16_cmpgt_ps SIMD16::cmpgt_ps
+#define _simd16_cmpneq_ps SIMD16::cmpneq_ps
+#define _simd16_cmpeq_ps SIMD16::cmpeq_ps
+#define _simd16_cmpge_ps SIMD16::cmpge_ps
+#define _simd16_cmple_ps SIMD16::cmple_ps
+#define _simd16_castsi_ps SIMD16::castsi_ps
+#define _simd16_castps_si SIMD16::castps_si
+#define _simd16_castsi_pd SIMD16::castsi_pd
+#define _simd16_castpd_si SIMD16::castpd_si
+#define _simd16_castpd_ps SIMD16::castpd_ps
+#define _simd16_castps_pd SIMD16::castps_pd
+#define _simd16_and_ps SIMD16::and_ps
+#define _simd16_andnot_ps SIMD16::andnot_ps
+#define _simd16_or_ps SIMD16::or_ps
+#define _simd16_xor_ps SIMD16::xor_ps
+#define _simd16_round_ps(a, mode) SIMD16::round_ps<SIMD16::RoundMode(mode)>(a)
+#define _simd16_mul_epi32 SIMD16::mul_epi32
+#define _simd16_mullo_epi32 SIMD16::mullo_epi32
+#define _simd16_sub_epi32 SIMD16::sub_epi32
+#define _simd16_sub_epi64 SIMD16::sub_epi64
+#define _simd16_min_epi32 SIMD16::min_epi32
+#define _simd16_max_epi32 SIMD16::max_epi32
+#define _simd16_min_epu32 SIMD16::min_epu32
+#define _simd16_max_epu32 SIMD16::max_epu32
+#define _simd16_add_epi32 SIMD16::add_epi32
+#define _simd16_and_si SIMD16::and_si
+#define _simd16_andnot_si SIMD16::andnot_si
+#define _simd16_or_si SIMD16::or_si
+#define _simd16_xor_si SIMD16::xor_si
+#define _simd16_cmpeq_epi32 SIMD16::cmpeq_epi32
+#define _simd16_cmpgt_epi32 SIMD16::cmpgt_epi32
+#define _simd16_cmplt_epi32 SIMD16::cmplt_epi32
+#define _simd16_testz_ps SIMD16::testz_ps
+#define _simd16_unpacklo_ps SIMD16::unpacklo_ps
+#define _simd16_unpackhi_ps SIMD16::unpackhi_ps
+#define _simd16_unpacklo_pd SIMD16::unpacklo_pd
+#define _simd16_unpackhi_pd SIMD16::unpackhi_pd
+#define _simd16_unpacklo_epi8 SIMD16::unpacklo_epi8
+#define _simd16_unpackhi_epi8 SIMD16::unpackhi_epi8
+#define _simd16_unpacklo_epi16 SIMD16::unpacklo_epi16
+#define _simd16_unpackhi_epi16 SIMD16::unpackhi_epi16
+#define _simd16_unpacklo_epi32 SIMD16::unpacklo_epi32
+#define _simd16_unpackhi_epi32 SIMD16::unpackhi_epi32
+#define _simd16_unpacklo_epi64 SIMD16::unpacklo_epi64
+#define _simd16_unpackhi_epi64 SIMD16::unpackhi_epi64
+#define _simd16_slli_epi32(a, i) SIMD16::slli_epi32<i>(a)
+#define _simd16_srli_epi32(a, i) SIMD16::srli_epi32<i>(a)
+#define _simd16_srai_epi32(a, i) SIMD16::srai_epi32<i>(a)
+#define _simd16_fmadd_ps SIMD16::fmadd_ps
+#define _simd16_fmsub_ps SIMD16::fmsub_ps
+#define _simd16_adds_epu8 SIMD16::adds_epu8
+#define _simd16_subs_epu8 SIMD16::subs_epu8
+#define _simd16_add_epi8 SIMD16::add_epi8
+#define _simd16_shuffle_epi8 SIMD16::shuffle_epi8
  
-#define _simd16_i32gather_ps(m, index, scale)               SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
-#define _simd16_mask_i32gather_ps(a, m, index, mask, scale) SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
+#define _simd16_i32gather_ps(m, index, scale) \
+    SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
+#define _simd16_mask_i32gather_ps(a, m, index, mask, scale) \
+    SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
  
-#define _simd16_abs_epi32                   SIMD16::abs_epi32
+#define _simd16_abs_epi32 SIMD16::abs_epi32
  
-#define _simd16_cmpeq_epi64                 SIMD16::cmpeq_epi64
-#define _simd16_cmpgt_epi64                 SIMD16::cmpgt_epi64
-#define _simd16_cmpeq_epi16                 SIMD16::cmpeq_epi16
-#define _simd16_cmpgt_epi16                 SIMD16::cmpgt_epi16
-#define _simd16_cmpeq_epi8                  SIMD16::cmpeq_epi8
-#define _simd16_cmpgt_epi8                  SIMD16::cmpgt_epi8
+#define _simd16_cmpeq_epi64 SIMD16::cmpeq_epi64
+#define _simd16_cmpgt_epi64 SIMD16::cmpgt_epi64
+#define _simd16_cmpeq_epi16 SIMD16::cmpeq_epi16
+#define _simd16_cmpgt_epi16 SIMD16::cmpgt_epi16
+#define _simd16_cmpeq_epi8 SIMD16::cmpeq_epi8
+#define _simd16_cmpgt_epi8 SIMD16::cmpgt_epi8
  
-#define _simd16_permute_ps_i(a, i)          SIMD16::permute_ps<i>(a)
-#define _simd16_permute_ps                  SIMD16::permute_ps
-#define _simd16_permute_epi32               SIMD16::permute_epi32
-#define _simd16_sllv_epi32                  SIMD16::sllv_epi32
-#define _simd16_srlv_epi32                  SIMD16::sllv_epi32
-#define _simd16_permute2f128_ps(a, b, i)    SIMD16::permute2f128_ps<i>(a, b)
-#define _simd16_permute2f128_pd(a, b, i)    SIMD16::permute2f128_pd<i>(a, b)
-#define _simd16_permute2f128_si(a, b, i)    SIMD16::permute2f128_si<i>(a, b)
-#define _simd16_shuffle_ps(a, b, i)         SIMD16::shuffle_ps<i>(a, b)
-#define _simd16_shuffle_pd(a, b, i)         SIMD16::shuffle_pd<i>(a, b)
-#define _simd16_shuffle_epi32(a, b, imm8)   SIMD16::shuffle_epi32<imm8>(a, b)
-#define _simd16_shuffle_epi64(a, b, imm8)   SIMD16::shuffle_epi64<imm8>(a, b)
-#define _simd16_cvtepu8_epi16               SIMD16::cvtepu8_epi16
-#define _simd16_cvtepu8_epi32               SIMD16::cvtepu8_epi32
-#define _simd16_cvtepu16_epi32              SIMD16::cvtepu16_epi32
-#define _simd16_cvtepu16_epi64              SIMD16::cvtepu16_epi64
-#define _simd16_cvtepu32_epi64              SIMD16::cvtepu32_epi64
-#define _simd16_packus_epi16                SIMD16::packus_epi16
-#define _simd16_packs_epi16                 SIMD16::packs_epi16
-#define _simd16_packus_epi32                SIMD16::packus_epi32
-#define _simd16_packs_epi32                 SIMD16::packs_epi32
-#define _simd16_cmplt_ps_mask               SIMD16::cmp_ps_mask<SIMD16::CompareType::LT_OQ>
-#define _simd16_cmpeq_ps_mask               SIMD16::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>
-#define _simd16_int2mask(mask)              simd16mask(mask)
-#define _simd16_mask2int(mask)              int(mask)
-#define _simd16_vmask_ps                    SIMD16::vmask_ps
+#define _simd16_permute_ps_i(a, i) SIMD16::permute_ps<i>(a)
+#define _simd16_permute_ps SIMD16::permute_ps
+#define _simd16_permute_epi32 SIMD16::permute_epi32
+#define _simd16_sllv_epi32 SIMD16::sllv_epi32
+#define _simd16_srlv_epi32 SIMD16::sllv_epi32
+#define _simd16_permute2f128_ps(a, b, i) SIMD16::permute2f128_ps<i>(a, b)
+#define _simd16_permute2f128_pd(a, b, i) SIMD16::permute2f128_pd<i>(a, b)
+#define _simd16_permute2f128_si(a, b, i) SIMD16::permute2f128_si<i>(a, b)
+#define _simd16_shuffle_ps(a, b, i) SIMD16::shuffle_ps<i>(a, b)
+#define _simd16_shuffle_pd(a, b, i) SIMD16::shuffle_pd<i>(a, b)
+#define _simd16_shuffle_epi32(a, b, imm8) SIMD16::shuffle_epi32<imm8>(a, b)
+#define _simd16_shuffle_epi64(a, b, imm8) SIMD16::shuffle_epi64<imm8>(a, b)
+#define _simd16_cvtepu8_epi16 SIMD16::cvtepu8_epi16
+#define _simd16_cvtepu8_epi32 SIMD16::cvtepu8_epi32
+#define _simd16_cvtepu16_epi32 SIMD16::cvtepu16_epi32
+#define _simd16_cvtepu16_epi64 SIMD16::cvtepu16_epi64
+#define _simd16_cvtepu32_epi64 SIMD16::cvtepu32_epi64
+#define _simd16_packus_epi16 SIMD16::packus_epi16
+#define _simd16_packs_epi16 SIMD16::packs_epi16
+#define _simd16_packus_epi32 SIMD16::packus_epi32
+#define _simd16_packs_epi32 SIMD16::packs_epi32
+#define _simd16_cmplt_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::LT_OQ>
+#define _simd16_cmpeq_ps_mask SIMD16::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>
+#define _simd16_int2mask(mask) simd16mask(mask)
+#define _simd16_mask2int(mask) int(mask)
+#define _simd16_vmask_ps SIMD16::vmask_ps
  
-#endif//ENABLE_AVX512_SIMD16
+#endif // ENABLE_AVX512_SIMD16
  
-#endif//__SWR_SIMD16INTRIN_H_
+#endif //__SWR_SIMD16INTRIN_H_
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h

index b1471a97250dbfd8f1027960761e41662e7a3a01..8ffda3f845894bc3a1c220e4caf7636cc271dc50 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #ifndef __SWR_SIMDINTRIN_H__
  #define __SWR_SIMDINTRIN_H__
@@ -28,176 +28,177 @@
  #include "common/simdlib.hpp"
  
  #if KNOB_SIMD_WIDTH == 8
-typedef SIMD256                             SIMD;
+typedef SIMD256 SIMD;
  #else
  #error Unsupported vector width
-#endif//KNOB_SIMD16_WIDTH == 16
-
-
-#define _simd128_maskstore_ps               SIMD128::maskstore_ps
-#define _simd128_fmadd_ps                   SIMD128::fmadd_ps
-
-#define _simd_load_ps                       SIMD::load_ps
-#define _simd_load1_ps                      SIMD::broadcast_ss
-#define _simd_loadu_ps                      SIMD::loadu_ps
-#define _simd_setzero_ps                    SIMD::setzero_ps
-#define _simd_set1_ps                       SIMD::set1_ps
-#define _simd_blend_ps(a, b, i)             SIMD::blend_ps<i>(a, b)
-#define _simd_blend_epi32(a, b, i)          SIMD::blend_epi32<i>(a, b)
-#define _simd_blendv_ps                     SIMD::blendv_ps
-#define _simd_store_ps                      SIMD::store_ps
-#define _simd_mul_ps                        SIMD::mul_ps
-#define _simd_add_ps                        SIMD::add_ps
-#define _simd_sub_ps                        SIMD::sub_ps
-#define _simd_rsqrt_ps                      SIMD::rsqrt_ps
-#define _simd_min_ps                        SIMD::min_ps
-#define _simd_max_ps                        SIMD::max_ps
-#define _simd_movemask_ps                   SIMD::movemask_ps
-#define _simd_cvtps_epi32                   SIMD::cvtps_epi32
-#define _simd_cvttps_epi32                  SIMD::cvttps_epi32
-#define _simd_cvtepi32_ps                   SIMD::cvtepi32_ps
-#define _simd_cmplt_ps                      SIMD::cmplt_ps
-#define _simd_cmpgt_ps                      SIMD::cmpgt_ps
-#define _simd_cmpneq_ps                     SIMD::cmpneq_ps
-#define _simd_cmpeq_ps                      SIMD::cmpeq_ps
-#define _simd_cmpge_ps                      SIMD::cmpge_ps
-#define _simd_cmple_ps                      SIMD::cmple_ps
-#define _simd_cmp_ps(a, b, imm)             SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)
-#define _simd_and_ps                        SIMD::and_ps
-#define _simd_or_ps                         SIMD::or_ps
-#define _simd_rcp_ps                        SIMD::rcp_ps
-#define _simd_div_ps                        SIMD::div_ps
-#define _simd_castsi_ps                     SIMD::castsi_ps
-#define _simd_castps_pd                     SIMD::castps_pd
-#define _simd_castpd_ps                     SIMD::castpd_ps
-#define _simd_andnot_ps                     SIMD::andnot_ps
-#define _simd_round_ps(a, i)                SIMD::round_ps<SIMD::RoundMode(i)>(a)
-#define _simd_castpd_ps                     SIMD::castpd_ps
-#define _simd_broadcast_ps(a)               SIMD::broadcast_ps((SIMD128::Float const *)(a))
-#define _simd_stream_ps                     SIMD::stream_ps
-
-#define _simd_movemask_pd                   SIMD::movemask_pd
-#define _simd_castsi_pd                     SIMD::castsi_pd
-
-#define _simd_mul_epi32                     SIMD::mul_epi32
-#define _simd_mullo_epi32                   SIMD::mullo_epi32
-#define _simd_sub_epi32                     SIMD::sub_epi32
-#define _simd_sub_epi64                     SIMD::sub_epi64
-#define _simd_min_epi32                     SIMD::min_epi32
-#define _simd_min_epu32                     SIMD::min_epu32
-#define _simd_max_epi32                     SIMD::max_epi32
-#define _simd_max_epu32                     SIMD::max_epu32
-#define _simd_add_epi32                     SIMD::add_epi32
-#define _simd_and_si                        SIMD::and_si
-#define _simd_andnot_si                     SIMD::andnot_si
-#define _simd_cmpeq_epi32                   SIMD::cmpeq_epi32
-#define _simd_cmplt_epi32                   SIMD::cmplt_epi32
-#define _simd_cmpgt_epi32                   SIMD::cmpgt_epi32
-#define _simd_or_si                         SIMD::or_si
-#define _simd_xor_si                        SIMD::xor_si
-#define _simd_castps_si                     SIMD::castps_si
-#define _simd_adds_epu8                     SIMD::adds_epu8
-#define _simd_subs_epu8                     SIMD::subs_epu8
-#define _simd_add_epi8                      SIMD::add_epi8
-#define _simd_cmpeq_epi64                   SIMD::cmpeq_epi64
-#define _simd_cmpgt_epi64                   SIMD::cmpgt_epi64
-#define _simd_cmpgt_epi8                    SIMD::cmpgt_epi8
-#define _simd_cmpeq_epi8                    SIMD::cmpeq_epi8
-#define _simd_cmpgt_epi16                   SIMD::cmpgt_epi16
-#define _simd_cmpeq_epi16                   SIMD::cmpeq_epi16
-#define _simd_movemask_epi8                 SIMD::movemask_epi8
-#define _simd_permute_ps_i(a, i)            SIMD::permute_ps<i>(a)
-#define _simd_permute_ps                    SIMD::permute_ps
-#define _simd_permute_epi32                 SIMD::permute_epi32
-#define _simd_srlv_epi32                    SIMD::srlv_epi32
-#define _simd_sllv_epi32                    SIMD::sllv_epi32
-
-#define _simd_unpacklo_epi8                 SIMD::unpacklo_epi8
-#define _simd_unpackhi_epi8                 SIMD::unpackhi_epi8
-#define _simd_unpacklo_epi16                SIMD::unpacklo_epi16
-#define _simd_unpackhi_epi16                SIMD::unpackhi_epi16
-#define _simd_unpacklo_epi32                SIMD::unpacklo_epi32
-#define _simd_unpackhi_epi32                SIMD::unpackhi_epi32
-#define _simd_unpacklo_epi64                SIMD::unpacklo_epi64
-#define _simd_unpackhi_epi64                SIMD::unpackhi_epi64
-
-#define _simd_slli_epi32(a,i)               SIMD::slli_epi32<i>(a)
-#define _simd_srai_epi32(a,i)               SIMD::srai_epi32<i>(a)
-#define _simd_srli_epi32(a,i)               SIMD::srli_epi32<i>(a)
-#define _simd_srlisi_ps(a,i)                SIMD::srlisi_ps<i>(a)
-
-#define _simd_fmadd_ps                      SIMD::fmadd_ps
-#define _simd_fmsub_ps                      SIMD::fmsub_ps
-#define _simd_shuffle_epi8                  SIMD::shuffle_epi8
-
-#define _simd_i32gather_ps(p, o, s)         SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)
-#define _simd_mask_i32gather_ps(r, p, o, m, s) SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)
-#define _simd_abs_epi32                     SIMD::abs_epi32
-
-#define _simd_cvtepu8_epi16                 SIMD::cvtepu8_epi16
-#define _simd_cvtepu8_epi32                 SIMD::cvtepu8_epi32
-#define _simd_cvtepu16_epi32                SIMD::cvtepu16_epi32
-#define _simd_cvtepu16_epi64                SIMD::cvtepu16_epi64
-#define _simd_cvtepu32_epi64                SIMD::cvtepu32_epi64
-
-#define _simd_packus_epi16                  SIMD::packus_epi16
-#define _simd_packs_epi16                   SIMD::packs_epi16
-#define _simd_packus_epi32                  SIMD::packus_epi32
-#define _simd_packs_epi32                   SIMD::packs_epi32
-
-#define _simd_unpacklo_ps                   SIMD::unpacklo_ps
-#define _simd_unpackhi_ps                   SIMD::unpackhi_ps
-#define _simd_unpacklo_pd                   SIMD::unpacklo_pd
-#define _simd_unpackhi_pd                   SIMD::unpackhi_pd
-#define _simd_insertf128_ps                 SIMD::insertf128_ps
-#define _simd_insertf128_pd                 SIMD::insertf128_pd
-#define _simd_insertf128_si(a, b, i)        SIMD::insertf128_si<i>(a, b)
-#define _simd_extractf128_ps(a, i)          SIMD::extractf128_ps<i>(a)
-#define _simd_extractf128_pd(a, i)          SIMD::extractf128_pd<i>(a)
-#define _simd_extractf128_si(a, i)          SIMD::extractf128_si<i>(a)
-#define _simd_permute2f128_ps(a, b, i)      SIMD::permute2f128_ps<i>(a, b)
-#define _simd_permute2f128_pd(a, b, i)      SIMD::permute2f128_pd<i>(a, b)
-#define _simd_permute2f128_si(a, b, i)      SIMD::permute2f128_si<i>(a, b)
-#define _simd_shuffle_ps(a, b, i)           SIMD::shuffle_ps<i>(a, b)
-#define _simd_shuffle_pd(a, b, i)           SIMD::shuffle_pd<i>(a, b)
-#define _simd_shuffle_epi32(a, b, imm8)     SIMD::shuffle_epi32<imm8>(a, b)
-#define _simd_shuffle_epi64(a, b, imm8)     SIMD::shuffle_epi64<imm8>(a, b)
-#define _simd_set1_epi32                    SIMD::set1_epi32
-#define _simd_set_epi32                     SIMD::set_epi32
-#define _simd_set_ps                        SIMD::set_ps
-#define _simd_set1_epi8                     SIMD::set1_epi8
-#define _simd_setzero_si                    SIMD::setzero_si
-#define _simd_cvttps_epi32                  SIMD::cvttps_epi32
-#define _simd_store_si                      SIMD::store_si
-#define _simd_broadcast_ss                  SIMD::broadcast_ss
-#define _simd_maskstore_ps                  SIMD::maskstore_ps
-#define _simd_load_si                       SIMD::load_si
-#define _simd_loadu_si                      SIMD::loadu_si
-#define _simd_sub_ps                        SIMD::sub_ps
-#define _simd_testz_ps                      SIMD::testz_ps
-#define _simd_testz_si                      SIMD::testz_si
-#define _simd_xor_ps                        SIMD::xor_ps
-
-#define _simd_loadu2_si                     SIMD::loadu2_si
-#define _simd_storeu2_si                    SIMD::storeu2_si
-
-#define _simd_blendv_epi32                  SIMD::blendv_epi32
-#define _simd_vmask_ps                      SIMD::vmask_ps
-
-template<int mask> SIMDINLINE
-SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const &a, SIMD128::Integer const &b)
+#endif // KNOB_SIMD16_WIDTH == 16
+
+#define _simd128_maskstore_ps SIMD128::maskstore_ps
+#define _simd128_fmadd_ps SIMD128::fmadd_ps
+
+#define _simd_load_ps SIMD::load_ps
+#define _simd_load1_ps SIMD::broadcast_ss
+#define _simd_loadu_ps SIMD::loadu_ps
+#define _simd_setzero_ps SIMD::setzero_ps
+#define _simd_set1_ps SIMD::set1_ps
+#define _simd_blend_ps(a, b, i) SIMD::blend_ps<i>(a, b)
+#define _simd_blend_epi32(a, b, i) SIMD::blend_epi32<i>(a, b)
+#define _simd_blendv_ps SIMD::blendv_ps
+#define _simd_store_ps SIMD::store_ps
+#define _simd_mul_ps SIMD::mul_ps
+#define _simd_add_ps SIMD::add_ps
+#define _simd_sub_ps SIMD::sub_ps
+#define _simd_rsqrt_ps SIMD::rsqrt_ps
+#define _simd_min_ps SIMD::min_ps
+#define _simd_max_ps SIMD::max_ps
+#define _simd_movemask_ps SIMD::movemask_ps
+#define _simd_cvtps_epi32 SIMD::cvtps_epi32
+#define _simd_cvttps_epi32 SIMD::cvttps_epi32
+#define _simd_cvtepi32_ps SIMD::cvtepi32_ps
+#define _simd_cmplt_ps SIMD::cmplt_ps
+#define _simd_cmpgt_ps SIMD::cmpgt_ps
+#define _simd_cmpneq_ps SIMD::cmpneq_ps
+#define _simd_cmpeq_ps SIMD::cmpeq_ps
+#define _simd_cmpge_ps SIMD::cmpge_ps
+#define _simd_cmple_ps SIMD::cmple_ps
+#define _simd_cmp_ps(a, b, imm) SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)
+#define _simd_and_ps SIMD::and_ps
+#define _simd_or_ps SIMD::or_ps
+#define _simd_rcp_ps SIMD::rcp_ps
+#define _simd_div_ps SIMD::div_ps
+#define _simd_castsi_ps SIMD::castsi_ps
+#define _simd_castps_pd SIMD::castps_pd
+#define _simd_castpd_ps SIMD::castpd_ps
+#define _simd_andnot_ps SIMD::andnot_ps
+#define _simd_round_ps(a, i) SIMD::round_ps<SIMD::RoundMode(i)>(a)
+#define _simd_castpd_ps SIMD::castpd_ps
+#define _simd_broadcast_ps(a) SIMD::broadcast_ps((SIMD128::Float const*)(a))
+#define _simd_stream_ps SIMD::stream_ps
+
+#define _simd_movemask_pd SIMD::movemask_pd
+#define _simd_castsi_pd SIMD::castsi_pd
+
+#define _simd_mul_epi32 SIMD::mul_epi32
+#define _simd_mullo_epi32 SIMD::mullo_epi32
+#define _simd_sub_epi32 SIMD::sub_epi32
+#define _simd_sub_epi64 SIMD::sub_epi64
+#define _simd_min_epi32 SIMD::min_epi32
+#define _simd_min_epu32 SIMD::min_epu32
+#define _simd_max_epi32 SIMD::max_epi32
+#define _simd_max_epu32 SIMD::max_epu32
+#define _simd_add_epi32 SIMD::add_epi32
+#define _simd_and_si SIMD::and_si
+#define _simd_andnot_si SIMD::andnot_si
+#define _simd_cmpeq_epi32 SIMD::cmpeq_epi32
+#define _simd_cmplt_epi32 SIMD::cmplt_epi32
+#define _simd_cmpgt_epi32 SIMD::cmpgt_epi32
+#define _simd_or_si SIMD::or_si
+#define _simd_xor_si SIMD::xor_si
+#define _simd_castps_si SIMD::castps_si
+#define _simd_adds_epu8 SIMD::adds_epu8
+#define _simd_subs_epu8 SIMD::subs_epu8
+#define _simd_add_epi8 SIMD::add_epi8
+#define _simd_cmpeq_epi64 SIMD::cmpeq_epi64
+#define _simd_cmpgt_epi64 SIMD::cmpgt_epi64
+#define _simd_cmpgt_epi8 SIMD::cmpgt_epi8
+#define _simd_cmpeq_epi8 SIMD::cmpeq_epi8
+#define _simd_cmpgt_epi16 SIMD::cmpgt_epi16
+#define _simd_cmpeq_epi16 SIMD::cmpeq_epi16
+#define _simd_movemask_epi8 SIMD::movemask_epi8
+#define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)
+#define _simd_permute_ps SIMD::permute_ps
+#define _simd_permute_epi32 SIMD::permute_epi32
+#define _simd_srlv_epi32 SIMD::srlv_epi32
+#define _simd_sllv_epi32 SIMD::sllv_epi32
+
+#define _simd_unpacklo_epi8 SIMD::unpacklo_epi8
+#define _simd_unpackhi_epi8 SIMD::unpackhi_epi8
+#define _simd_unpacklo_epi16 SIMD::unpacklo_epi16
+#define _simd_unpackhi_epi16 SIMD::unpackhi_epi16
+#define _simd_unpacklo_epi32 SIMD::unpacklo_epi32
+#define _simd_unpackhi_epi32 SIMD::unpackhi_epi32
+#define _simd_unpacklo_epi64 SIMD::unpacklo_epi64
+#define _simd_unpackhi_epi64 SIMD::unpackhi_epi64
+
+#define _simd_slli_epi32(a, i) SIMD::slli_epi32<i>(a)
+#define _simd_srai_epi32(a, i) SIMD::srai_epi32<i>(a)
+#define _simd_srli_epi32(a, i) SIMD::srli_epi32<i>(a)
+#define _simd_srlisi_ps(a, i) SIMD::srlisi_ps<i>(a)
+
+#define _simd_fmadd_ps SIMD::fmadd_ps
+#define _simd_fmsub_ps SIMD::fmsub_ps
+#define _simd_shuffle_epi8 SIMD::shuffle_epi8
+
+#define _simd_i32gather_ps(p, o, s) SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)
+#define _simd_mask_i32gather_ps(r, p, o, m, s) \
+    SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)
+#define _simd_abs_epi32 SIMD::abs_epi32
+
+#define _simd_cvtepu8_epi16 SIMD::cvtepu8_epi16
+#define _simd_cvtepu8_epi32 SIMD::cvtepu8_epi32
+#define _simd_cvtepu16_epi32 SIMD::cvtepu16_epi32
+#define _simd_cvtepu16_epi64 SIMD::cvtepu16_epi64
+#define _simd_cvtepu32_epi64 SIMD::cvtepu32_epi64
+
+#define _simd_packus_epi16 SIMD::packus_epi16
+#define _simd_packs_epi16 SIMD::packs_epi16
+#define _simd_packus_epi32 SIMD::packus_epi32
+#define _simd_packs_epi32 SIMD::packs_epi32
+
+#define _simd_unpacklo_ps SIMD::unpacklo_ps
+#define _simd_unpackhi_ps SIMD::unpackhi_ps
+#define _simd_unpacklo_pd SIMD::unpacklo_pd
+#define _simd_unpackhi_pd SIMD::unpackhi_pd
+#define _simd_insertf128_ps SIMD::insertf128_ps
+#define _simd_insertf128_pd SIMD::insertf128_pd
+#define _simd_insertf128_si(a, b, i) SIMD::insertf128_si<i>(a, b)
+#define _simd_extractf128_ps(a, i) SIMD::extractf128_ps<i>(a)
+#define _simd_extractf128_pd(a, i) SIMD::extractf128_pd<i>(a)
+#define _simd_extractf128_si(a, i) SIMD::extractf128_si<i>(a)
+#define _simd_permute2f128_ps(a, b, i) SIMD::permute2f128_ps<i>(a, b)
+#define _simd_permute2f128_pd(a, b, i) SIMD::permute2f128_pd<i>(a, b)
+#define _simd_permute2f128_si(a, b, i) SIMD::permute2f128_si<i>(a, b)
+#define _simd_shuffle_ps(a, b, i) SIMD::shuffle_ps<i>(a, b)
+#define _simd_shuffle_pd(a, b, i) SIMD::shuffle_pd<i>(a, b)
+#define _simd_shuffle_epi32(a, b, imm8) SIMD::shuffle_epi32<imm8>(a, b)
+#define _simd_shuffle_epi64(a, b, imm8) SIMD::shuffle_epi64<imm8>(a, b)
+#define _simd_set1_epi32 SIMD::set1_epi32
+#define _simd_set_epi32 SIMD::set_epi32
+#define _simd_set_ps SIMD::set_ps
+#define _simd_set1_epi8 SIMD::set1_epi8
+#define _simd_setzero_si SIMD::setzero_si
+#define _simd_cvttps_epi32 SIMD::cvttps_epi32
+#define _simd_store_si SIMD::store_si
+#define _simd_broadcast_ss SIMD::broadcast_ss
+#define _simd_maskstore_ps SIMD::maskstore_ps
+#define _simd_load_si SIMD::load_si
+#define _simd_loadu_si SIMD::loadu_si
+#define _simd_sub_ps SIMD::sub_ps
+#define _simd_testz_ps SIMD::testz_ps
+#define _simd_testz_si SIMD::testz_si
+#define _simd_xor_ps SIMD::xor_ps
+
+#define _simd_loadu2_si SIMD::loadu2_si
+#define _simd_storeu2_si SIMD::storeu2_si
+
+#define _simd_blendv_epi32 SIMD::blendv_epi32
+#define _simd_vmask_ps SIMD::vmask_ps
+
+template <int mask>
+SIMDINLINE SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const& a, SIMD128::Integer const& b)
  {
-    return SIMD128::castps_si(SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
+    return SIMD128::castps_si(
+        SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
  }
  
  SIMDINLINE
-void _simd_mov(simdscalar &r, unsigned int rlane, simdscalar& s, unsigned int slane)
+void _simd_mov(simdscalar& r, unsigned int rlane, simdscalar& s, unsigned int slane)
  {
      OSALIGNSIMD(float) rArray[KNOB_SIMD_WIDTH], sArray[KNOB_SIMD_WIDTH];
      SIMD256::store_ps(rArray, r);
      SIMD256::store_ps(sArray, s);
      rArray[rlane] = sArray[slane];
-    r = SIMD256::load_ps(rArray);
+    r             = SIMD256::load_ps(rArray);
  }
  
  // Populates a simdvector from a vector. So p = xyzw becomes xxxx yyyy zzzz wwww.
@@ -228,34 +229,42 @@ void _simdvec_mov(simdvector &r, unsigned int rlane, simdvector& s, unsigned int
  
  #endif
  
-#define _simdvec_dp3_ps                 SIMD::vec4_dp3_ps
-#define _simdvec_dp4_ps                 SIMD::vec4_dp4_ps
-#define _simdvec_rcp_length_ps          SIMD::vec4_rcp_length_ps
-#define _simdvec_normalize_ps           SIMD::vec4_normalize_ps
-#define _simdvec_mul_ps                 SIMD::vec4_mul_ps
-#define _simdvec_add_ps                 SIMD::vec4_add_ps
-#define _simdvec_min_ps                 SIMD::vec4_min_ps
-#define _simdvec_max_ps                 SIMD::vec4_max_ps
-#define _simd_mat4x4_vec4_multiply      SIMD::mat4x4_vec4_multiply
-#define _simd_mat3x3_vec3_w0_multiply   SIMD::mat3x3_vec3_w0_multiply
-#define _simd_mat4x4_vec3_w1_multiply   SIMD::mat4x4_vec3_w1_multiply
-#define _simd_mat4x3_vec3_w1_multiply   SIMD::mat4x3_vec3_w1_multiply
+#define _simdvec_dp3_ps SIMD::vec4_dp3_ps
+#define _simdvec_dp4_ps SIMD::vec4_dp4_ps
+#define _simdvec_rcp_length_ps SIMD::vec4_rcp_length_ps
+#define _simdvec_normalize_ps SIMD::vec4_normalize_ps
+#define _simdvec_mul_ps SIMD::vec4_mul_ps
+#define _simdvec_add_ps SIMD::vec4_add_ps
+#define _simdvec_min_ps SIMD::vec4_min_ps
+#define _simdvec_max_ps SIMD::vec4_max_ps
+#define _simd_mat4x4_vec4_multiply SIMD::mat4x4_vec4_multiply
+#define _simd_mat3x3_vec3_w0_multiply SIMD::mat3x3_vec3_w0_multiply
+#define _simd_mat4x4_vec3_w1_multiply SIMD::mat4x4_vec3_w1_multiply
+#define _simd_mat4x3_vec3_w1_multiply SIMD::mat4x3_vec3_w1_multiply
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Compute plane equation vA * vX + vB * vY + vC
-SIMDINLINE simdscalar vplaneps(simdscalar const &vA, simdscalar const &vB, simdscalar const &vC, simdscalar const &vX, simdscalar const &vY)
+SIMDINLINE simdscalar vplaneps(simdscalar const& vA,
+                               simdscalar const& vB,
+                               simdscalar const& vC,
+                               simdscalar const& vX,
+                               simdscalar const& vY)
  {
      simdscalar vOut = _simd_fmadd_ps(vA, vX, vC);
-    vOut = _simd_fmadd_ps(vB, vY, vOut);
+    vOut            = _simd_fmadd_ps(vB, vY, vOut);
      return vOut;
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Compute plane equation vA * vX + vB * vY + vC
-SIMDINLINE simd4scalar vplaneps(simd4scalar const &vA, simd4scalar const &vB, simd4scalar const &vC, simd4scalar const &vX, simd4scalar const &vY)
+SIMDINLINE simd4scalar vplaneps(simd4scalar const& vA,
+                                simd4scalar const& vB,
+                                simd4scalar const& vC,
+                                simd4scalar const& vX,
+                                simd4scalar const& vY)
  {
      simd4scalar vOut = _simd128_fmadd_ps(vA, vX, vC);
-    vOut = _simd128_fmadd_ps(vB, vY, vOut);
+    vOut             = _simd128_fmadd_ps(vB, vY, vOut);
      return vOut;
  }
  
@@ -264,30 +273,32 @@ SIMDINLINE simd4scalar vplaneps(simd4scalar const &vA, simd4scalar const &vB, si
  /// @param vI - barycentric I
  /// @param vJ - barycentric J
  /// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template<UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simdscalar InterpolateComponent(simdscalar const &vI, simdscalar const &vJ, const float *pInterpBuffer)
+template <UINT Attrib, UINT Comp, UINT numComponents = 4>
+static SIMDINLINE simdscalar InterpolateComponent(simdscalar const& vI,
+                                                  simdscalar const& vJ,
+                                                  const float*      pInterpBuffer)
  {
-    const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
-    const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
-    const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
+    const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+    const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
+    const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
  
      simdscalar vA = _simd_broadcast_ss(pInterpA);
      simdscalar vB = _simd_broadcast_ss(pInterpB);
      simdscalar vC = _simd_broadcast_ss(pInterpC);
  
      simdscalar vk = _simd_sub_ps(_simd_sub_ps(_simd_set1_ps(1.0f), vI), vJ);
-    vC = _simd_mul_ps(vk, vC);
-    
+    vC            = _simd_mul_ps(vk, vC);
+
      return vplaneps(vA, vB, vC, vI, vJ);
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Interpolates a single component (flat shade).
  /// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template<UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simdscalar InterpolateComponentFlat(const float *pInterpBuffer)
+template <UINT Attrib, UINT Comp, UINT numComponents = 4>
+static SIMDINLINE simdscalar InterpolateComponentFlat(const float* pInterpBuffer)
  {
-    const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+    const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
  
      simdscalar vA = _simd_broadcast_ss(pInterpA);
  
@@ -299,38 +310,39 @@ static SIMDINLINE simdscalar InterpolateComponentFlat(const float *pInterpBuffer
  /// @param vI - barycentric I
  /// @param vJ - barycentric J
  /// @param pInterpBuffer - pointer to attribute barycentric coeffs
-template<UINT Attrib, UINT Comp, UINT numComponents = 4>
-static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const &vI, simd4scalar const &vJ, const float *pInterpBuffer)
+template <UINT Attrib, UINT Comp, UINT numComponents = 4>
+static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const& vI,
+                                                   simd4scalar const& vJ,
+                                                   const float*       pInterpBuffer)
  {
-    const float *pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
-    const float *pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
-    const float *pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
+    const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
+    const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
+    const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
  
      simd4scalar vA = SIMD128::broadcast_ss(pInterpA);
      simd4scalar vB = SIMD128::broadcast_ss(pInterpB);
      simd4scalar vC = SIMD128::broadcast_ss(pInterpC);
  
      simd4scalar vk = SIMD128::sub_ps(SIMD128::sub_ps(SIMD128::set1_ps(1.0f), vI), vJ);
-    vC = SIMD128::mul_ps(vk, vC);
+    vC             = SIMD128::mul_ps(vk, vC);
  
      return vplaneps(vA, vB, vC, vI, vJ);
  }
  
-static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const &a)
+static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const& a)
  {
      simd4scalari ai = SIMD128::castps_si(a);
      return SIMD128::castsi_ps(SIMD128::and_si(ai, SIMD128::set1_epi32(0x7fffffff)));
  }
  
-static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const &a)
+static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const& a)
  {
      simdscalari ai = _simd_castps_si(a);
      return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));
  }
  
-
  #if ENABLE_AVX512_SIMD16
  #include "simd16intrin.h"
-#endif//ENABLE_AVX512_SIMD16
+#endif // ENABLE_AVX512_SIMD16
  
-#endif//__SWR_SIMDINTRIN_H__
+#endif //__SWR_SIMDINTRIN_H__
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp

index 24cf27d4dbcd61b50b52dd018d7b90a733cbbc50..bd48fb2aae7cc8aa48d1e3bd7db6b981d466308c 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #pragma once
  
  #include "simdlib_types.hpp"
@@ -38,8 +38,7 @@ namespace SIMDImpl
  #include "simdlib_128_avx.inl"
  #undef __SIMD_LIB_AVX_HPP__
          }; // struct AVXImpl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
+#endif     // #if SIMD_ARCH >= SIMD_ARCH_AVX
  
  #if SIMD_ARCH >= SIMD_ARCH_AVX2
          struct AVX2Impl : AVXImpl
@@ -48,7 +47,7 @@ namespace SIMDImpl
  #include "simdlib_128_avx2.inl"
  #undef __SIMD_LIB_AVX2_HPP__
          }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
+#endif     // #if SIMD_ARCH >= SIMD_ARCH_AVX2
  
  #if SIMD_ARCH >= SIMD_ARCH_AVX512
          struct AVX512Impl : AVX2Impl
@@ -62,9 +61,9 @@ namespace SIMDImpl
  #include "simdlib_128_avx512_core.inl"
  #endif // defined(SIMD_ARCH_KNIGHTS)
  #undef __SIMD_LIB_AVX512_HPP__
-#endif // SIMD_OPT_128_AVX512
+#endif     // SIMD_OPT_128_AVX512
          }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
+#endif     // #if SIMD_ARCH >= SIMD_ARCH_AVX512
  
          struct Traits : SIMDImpl::Traits
          {
@@ -78,13 +77,13 @@ namespace SIMDImpl
  #error Invalid value for SIMD_ARCH
  #endif
  
-            using Float     = SIMD128Impl::Float;
-            using Double    = SIMD128Impl::Double;
-            using Integer   = SIMD128Impl::Integer;
-            using Vec4      = SIMD128Impl::Vec4;
-            using Mask      = SIMD128Impl::Mask;
+            using Float   = SIMD128Impl::Float;
+            using Double  = SIMD128Impl::Double;
+            using Integer = SIMD128Impl::Integer;
+            using Vec4    = SIMD128Impl::Vec4;
+            using Mask    = SIMD128Impl::Mask;
          };
-    } // ns SIMD128Impl
+    } // namespace SIMD128Impl
  
      namespace SIMD256Impl
      {
@@ -95,8 +94,7 @@ namespace SIMDImpl
  #include "simdlib_256_avx.inl"
  #undef __SIMD_LIB_AVX_HPP__
          }; // struct AVXImpl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
-
+#endif     // #if SIMD_ARCH >= SIMD_ARCH_AVX
  
  #if SIMD_ARCH >= SIMD_ARCH_AVX2
          struct AVX2Impl : AVXImpl
@@ -105,7 +103,7 @@ namespace SIMDImpl
  #include "simdlib_256_avx2.inl"
  #undef __SIMD_LIB_AVX2_HPP__
          }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
+#endif     // #if SIMD_ARCH >= SIMD_ARCH_AVX2
  
  #if SIMD_ARCH >= SIMD_ARCH_AVX512
          struct AVX512Impl : AVX2Impl
@@ -119,9 +117,9 @@ namespace SIMDImpl
  #include "simdlib_256_avx512_core.inl"
  #endif // defined(SIMD_ARCH_KNIGHTS)
  #undef __SIMD_LIB_AVX512_HPP__
-#endif // SIMD_OPT_256_AVX512
+#endif     // SIMD_OPT_256_AVX512
          }; // struct AVX2Impl
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
+#endif     // #if SIMD_ARCH >= SIMD_ARCH_AVX512
  
          struct Traits : SIMDImpl::Traits
          {
@@ -135,18 +133,18 @@ namespace SIMDImpl
  #error Invalid value for SIMD_ARCH
  #endif
  
-            using Float     = SIMD256Impl::Float;
-            using Double    = SIMD256Impl::Double;
-            using Integer   = SIMD256Impl::Integer;
-            using Vec4      = SIMD256Impl::Vec4;
-            using Mask      = SIMD256Impl::Mask;
+            using Float   = SIMD256Impl::Float;
+            using Double  = SIMD256Impl::Double;
+            using Integer = SIMD256Impl::Integer;
+            using Vec4    = SIMD256Impl::Vec4;
+            using Mask    = SIMD256Impl::Mask;
          };
-    } // ns SIMD256Impl
+    } // namespace SIMD256Impl
  
      namespace SIMD512Impl
      {
  #if SIMD_ARCH >= SIMD_ARCH_AVX
-        template<typename SIMD256T>
+        template <typename SIMD256T>
          struct AVXImplBase
          {
  #define __SIMD_LIB_AVX_HPP__
@@ -157,12 +155,10 @@ namespace SIMDImpl
          using AVXImpl = AVXImplBase<SIMD256Impl::AVXImpl>;
  #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX
  
-
  #if SIMD_ARCH >= SIMD_ARCH_AVX2
          using AVX2Impl = AVXImplBase<SIMD256Impl::AVX2Impl>;
  #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX2
  
-
  #if SIMD_ARCH >= SIMD_ARCH_AVX512
          struct AVX512Impl : AVXImplBase<SIMD256Impl::AVX512Impl>
          {
@@ -178,7 +174,7 @@ namespace SIMDImpl
  #endif // defined(SIMD_ARCH_KNIGHTS)
  #undef __SIMD_LIB_AVX512_HPP__
          }; // struct AVX512ImplBase
-#endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512
+#endif     // #if SIMD_ARCH >= SIMD_ARCH_AVX512
  
          struct Traits : SIMDImpl::Traits
          {
@@ -192,33 +188,32 @@ namespace SIMDImpl
  #error Invalid value for SIMD_ARCH
  #endif
  
-            using Float     = SIMD512Impl::Float;
-            using Double    = SIMD512Impl::Double;
-            using Integer   = SIMD512Impl::Integer;
-            using Vec4      = SIMD512Impl::Vec4;
-            using Mask      = SIMD512Impl::Mask;
+            using Float   = SIMD512Impl::Float;
+            using Double  = SIMD512Impl::Double;
+            using Integer = SIMD512Impl::Integer;
+            using Vec4    = SIMD512Impl::Vec4;
+            using Mask    = SIMD512Impl::Mask;
          };
-    } // ns SIMD512Impl
-} // ns SIMDImpl
+    } // namespace SIMD512Impl
+} // namespace SIMDImpl
  
  template <typename Traits>
  struct SIMDBase : Traits::IsaImpl
  {
-    using CompareType   = typename Traits::CompareType;
-    using ScaleFactor   = typename Traits::ScaleFactor;
-    using RoundMode     = typename Traits::RoundMode;
-    using SIMD          = typename Traits::IsaImpl;
-    using Float         = typename Traits::Float;
-    using Double        = typename Traits::Double;
-    using Integer       = typename Traits::Integer;
-    using Vec4          = typename Traits::Vec4;
-    using Mask          = typename Traits::Mask;
+    using CompareType = typename Traits::CompareType;
+    using ScaleFactor = typename Traits::ScaleFactor;
+    using RoundMode   = typename Traits::RoundMode;
+    using SIMD        = typename Traits::IsaImpl;
+    using Float       = typename Traits::Float;
+    using Double      = typename Traits::Double;
+    using Integer     = typename Traits::Integer;
+    using Vec4        = typename Traits::Vec4;
+    using Mask        = typename Traits::Mask;
  
      static const size_t VECTOR_BYTES = sizeof(Float);
  
      // Populates a SIMD Vec4 from a non-simd vector. So p = xyzw becomes xxxx yyyy zzzz wwww.
-    static SIMDINLINE
-    void vec4_load1_ps(Vec4& r, const float *p)
+    static SIMDINLINE void vec4_load1_ps(Vec4& r, const float* p)
      {
          r[0] = SIMD::set1_ps(p[0]);
          r[1] = SIMD::set1_ps(p[1]);
@@ -226,8 +221,7 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = SIMD::set1_ps(p[3]);
      }
  
-    static SIMDINLINE
-    void vec4_set1_vps(Vec4& r, Float const &s)
+    static SIMDINLINE void vec4_set1_vps(Vec4& r, Float const& s)
      {
          r[0] = s;
          r[1] = s;
@@ -235,48 +229,44 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = s;
      }
  
-    static SIMDINLINE
-    Float vec4_dp3_ps(const Vec4& v0, const Vec4& v1)
+    static SIMDINLINE Float vec4_dp3_ps(const Vec4& v0, const Vec4& v1)
      {
          Float tmp, r;
-        r   = SIMD::mul_ps(v0[0], v1[0]);     // (v0.x*v1.x)
+        r = SIMD::mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
  
-        tmp = SIMD::mul_ps(v0[1], v1[1]);     // (v0.y*v1.y)
-        r   = SIMD::add_ps(r, tmp);           // (v0.x*v1.x) + (v0.y*v1.y)
+        tmp = SIMD::mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
+        r   = SIMD::add_ps(r, tmp);       // (v0.x*v1.x) + (v0.y*v1.y)
  
-        tmp = SIMD::mul_ps(v0[2], v1[2]);     // (v0.z*v1.z)
-        r   = SIMD::add_ps(r, tmp);           // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
+        tmp = SIMD::mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
+        r   = SIMD::add_ps(r, tmp);       // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
  
          return r;
      }
  
-    static SIMDINLINE
-    Float vec4_dp4_ps(const Vec4& v0, const Vec4& v1)
+    static SIMDINLINE Float vec4_dp4_ps(const Vec4& v0, const Vec4& v1)
      {
          Float tmp, r;
-        r   = SIMD::mul_ps(v0[0], v1[0]);     // (v0.x*v1.x)
+        r = SIMD::mul_ps(v0[0], v1[0]); // (v0.x*v1.x)
  
-        tmp = SIMD::mul_ps(v0[1], v1[1]);     // (v0.y*v1.y)
-        r   = SIMD::add_ps(r, tmp);           // (v0.x*v1.x) + (v0.y*v1.y)
+        tmp = SIMD::mul_ps(v0[1], v1[1]); // (v0.y*v1.y)
+        r   = SIMD::add_ps(r, tmp);       // (v0.x*v1.x) + (v0.y*v1.y)
  
-        tmp = SIMD::mul_ps(v0[2], v1[2]);     // (v0.z*v1.z)
-        r   = SIMD::add_ps(r, tmp);           // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
+        tmp = SIMD::mul_ps(v0[2], v1[2]); // (v0.z*v1.z)
+        r   = SIMD::add_ps(r, tmp);       // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
  
-        tmp = SIMD::mul_ps(v0[3], v1[3]);     // (v0.w*v1.w)
-        r   = SIMD::add_ps(r, tmp);           // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
+        tmp = SIMD::mul_ps(v0[3], v1[3]); // (v0.w*v1.w)
+        r   = SIMD::add_ps(r, tmp);       // (v0.x*v1.x) + (v0.y*v1.y) + (v0.z*v1.z)
  
          return r;
      }
  
-    static SIMDINLINE
-    Float vec4_rcp_length_ps(const Vec4& v)
+    static SIMDINLINE Float vec4_rcp_length_ps(const Vec4& v)
      {
          Float length = vec4_dp4_ps(v, v);
          return SIMD::rsqrt_ps(length);
      }
  
-    static SIMDINLINE
-    void vec4_normalize_ps(Vec4& r, const Vec4& v)
+    static SIMDINLINE void vec4_normalize_ps(Vec4& r, const Vec4& v)
      {
          Float rcpLength = vec4_rcp_length_ps(v);
  
@@ -286,8 +276,7 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = SIMD::mul_ps(v[3], rcpLength);
      }
  
-    static SIMDINLINE
-    void vec4_mul_ps(Vec4& r, const Vec4& v, Float const &s)
+    static SIMDINLINE void vec4_mul_ps(Vec4& r, const Vec4& v, Float const& s)
      {
          r[0] = SIMD::mul_ps(v[0], s);
          r[1] = SIMD::mul_ps(v[1], s);
@@ -295,8 +284,7 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = SIMD::mul_ps(v[3], s);
      }
  
-    static SIMDINLINE
-    void vec4_mul_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
+    static SIMDINLINE void vec4_mul_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
      {
          r[0] = SIMD::mul_ps(v0[0], v1[0]);
          r[1] = SIMD::mul_ps(v0[1], v1[1]);
@@ -304,8 +292,7 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = SIMD::mul_ps(v0[3], v1[3]);
      }
  
-    static SIMDINLINE
-    void vec4_add_ps(Vec4& r, const Vec4& v0, Float const &s)
+    static SIMDINLINE void vec4_add_ps(Vec4& r, const Vec4& v0, Float const& s)
      {
          r[0] = SIMD::add_ps(v0[0], s);
          r[1] = SIMD::add_ps(v0[1], s);
@@ -313,8 +300,7 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = SIMD::add_ps(v0[3], s);
      }
  
-    static SIMDINLINE
-    void vec4_add_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
+    static SIMDINLINE void vec4_add_ps(Vec4& r, const Vec4& v0, const Vec4& v1)
      {
          r[0] = SIMD::add_ps(v0[0], v1[0]);
          r[1] = SIMD::add_ps(v0[1], v1[1]);
@@ -322,8 +308,7 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = SIMD::add_ps(v0[3], v1[3]);
      }
  
-    static SIMDINLINE
-    void vec4_min_ps(Vec4& r, const Vec4& v0, Float const &s)
+    static SIMDINLINE void vec4_min_ps(Vec4& r, const Vec4& v0, Float const& s)
      {
          r[0] = SIMD::min_ps(v0[0], s);
          r[1] = SIMD::min_ps(v0[1], s);
@@ -331,8 +316,7 @@ struct SIMDBase : Traits::IsaImpl
          r[3] = SIMD::min_ps(v0[3], s);
      }
  
-    static SIMDINLINE
-    void vec4_max_ps(Vec4& r, const Vec4& v0, Float const &s)
+    static SIMDINLINE void vec4_max_ps(Vec4& r, const Vec4& v0, Float const& s)
      {
          r[0] = SIMD::max_ps(v0[0], s);
          r[1] = SIMD::max_ps(v0[1], s);
@@ -345,66 +329,64 @@ struct SIMDBase : Traits::IsaImpl
      //   outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * v.w)
      //   outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * v.w)
      //   outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * v.w)
-    static SIMDINLINE
-    void SIMDCALL mat4x4_vec4_multiply(
-        Vec4& result,
-        const float *pMatrix,
-        const Vec4& v)
+    static SIMDINLINE void SIMDCALL mat4x4_vec4_multiply(Vec4&        result,
+                                                         const float* pMatrix,
+                                                         const Vec4&  v)
      {
          Float m;
          Float r0;
          Float r1;
  
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 3);  // m[row][3]
-        r1  = SIMD::mul_ps(m, v[3]);              // (m3 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 3); // m[row][3]
+        r1        = SIMD::mul_ps(m, v[3]);               // (m3 * v.z)
+        r0        = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
          result[0] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 3);  // m[row][3]
-        r1  = SIMD::mul_ps(m, v[3]);              // (m3 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 3); // m[row][3]
+        r1        = SIMD::mul_ps(m, v[3]);               // (m3 * v.z)
+        r0        = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
          result[1] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 3);  // m[row][3]
-        r1  = SIMD::mul_ps(m, v[3]);              // (m3 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 3); // m[row][3]
+        r1        = SIMD::mul_ps(m, v[3]);               // (m3 * v.z)
+        r0        = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
          result[2] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 3);  // m[row][3]
-        r1  = SIMD::mul_ps(m, v[3]);              // (m3 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 3); // m[row][3]
+        r1        = SIMD::mul_ps(m, v[3]);               // (m3 * v.z)
+        r0        = SIMD::add_ps(r0, r1); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * v.w)
          result[3] = r0;
      }
  
@@ -413,44 +395,42 @@ struct SIMDBase : Traits::IsaImpl
      //   outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * 0)
      //   outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * 0)
      //   outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * 0)
-    static SIMDINLINE
-    void SIMDCALL mat3x3_vec3_w0_multiply(
-        Vec4& result,
-        const float *pMatrix,
-        const Vec4& v)
+    static SIMDINLINE void SIMDCALL mat3x3_vec3_w0_multiply(Vec4&        result,
+                                                            const float* pMatrix,
+                                                            const Vec4&  v)
      {
          Float m;
          Float r0;
          Float r1;
  
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
          result[0] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
          result[1] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
          result[2] = r0;
  
          result[3] = SIMD::setzero_ps();
@@ -461,108 +441,104 @@ struct SIMDBase : Traits::IsaImpl
      //   outVec.y = (m10 * v.x) + (m11 * v.y) + (m12 * v.z) + (m13 * 1)
      //   outVec.z = (m20 * v.x) + (m21 * v.y) + (m22 * v.z) + (m23 * 1)
      //   outVec.w = (m30 * v.x) + (m31 * v.y) + (m32 * v.z) + (m33 * 1)
-    static SIMDINLINE
-    void SIMDCALL mat4x4_vec3_w1_multiply(
-        Vec4& result,
-        const float *pMatrix,
-        const Vec4& v)
+    static SIMDINLINE void SIMDCALL mat4x4_vec3_w1_multiply(Vec4&        result,
+                                                            const float* pMatrix,
+                                                            const Vec4&  v)
      {
          Float m;
          Float r0;
          Float r1;
  
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 3);  // m[row][3]
-        r0  = SIMD::add_ps(r0, m);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 3); // m[row][3]
+        r0        = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
          result[0] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 3);  // m[row][3]
-        r0  = SIMD::add_ps(r0, m);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 3); // m[row][3]
+        r0        = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
          result[1] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 3);  // m[row][3]
-        r0  = SIMD::add_ps(r0, m);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 3); // m[row][3]
+        r0        = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
          result[2] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 3*4 + 3);  // m[row][3]
-        result[3] = SIMD::add_ps(r0, m);        // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 3 * 4 + 3); // m[row][3]
+        result[3] = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
      }
  
-    static SIMDINLINE
-    void SIMDCALL mat4x3_vec3_w1_multiply(
-        Vec4& result,
-        const float *pMatrix,
-        const Vec4& v)
+    static SIMDINLINE void SIMDCALL mat4x3_vec3_w1_multiply(Vec4&        result,
+                                                            const float* pMatrix,
+                                                            const Vec4&  v)
      {
          Float m;
          Float r0;
          Float r1;
  
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 0*4 + 3);  // m[row][3]
-        r0  = SIMD::add_ps(r0, m);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 0 * 4 + 3); // m[row][3]
+        r0        = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
          result[0] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 1*4 + 3);  // m[row][3]
-        r0  = SIMD::add_ps(r0, m);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 1 * 4 + 3); // m[row][3]
+        r0        = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
          result[1] = r0;
  
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 0);  // m[row][0]
-        r0  = SIMD::mul_ps(m, v[0]);              // (m00 * v.x)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 1);  // m[row][1]
-        r1  = SIMD::mul_ps(m, v[1]);              // (m1 * v.y)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 2);  // m[row][2]
-        r1  = SIMD::mul_ps(m, v[2]);              // (m2 * v.z)
-        r0  = SIMD::add_ps(r0, r1);               // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
-        m   = SIMD::load1_ps(pMatrix + 2*4 + 3);  // m[row][3]
-        r0  = SIMD::add_ps(r0, m);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 0); // m[row][0]
+        r0        = SIMD::mul_ps(m, v[0]);               // (m00 * v.x)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 1); // m[row][1]
+        r1        = SIMD::mul_ps(m, v[1]);               // (m1 * v.y)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 2); // m[row][2]
+        r1        = SIMD::mul_ps(m, v[2]);               // (m2 * v.z)
+        r0        = SIMD::add_ps(r0, r1);                // (m0 * v.x) + (m1 * v.y) + (m2 * v.z)
+        m         = SIMD::load1_ps(pMatrix + 2 * 4 + 3); // m[row][3]
+        r0        = SIMD::add_ps(r0, m); // (m0 * v.x) + (m1 * v.y) + (m2 * v.z) + (m2 * 1)
          result[2] = r0;
          result[3] = SIMD::set1_ps(1.0f);
      }
@@ -572,30 +548,38 @@ using SIMD128 = SIMDBase<SIMDImpl::SIMD128Impl::Traits>;
  using SIMD256 = SIMDBase<SIMDImpl::SIMD256Impl::Traits>;
  using SIMD512 = SIMDBase<SIMDImpl::SIMD512Impl::Traits>;
  
-template <typename SIMD_T> using CompareType    = typename SIMD_T::CompareType;
-template <typename SIMD_T> using ScaleFactor    = typename SIMD_T::ScaleFactor;
-template <typename SIMD_T> using RoundMode      = typename SIMD_T::RoundMode;
-template <typename SIMD_T> using Float          = typename SIMD_T::Float;
-template <typename SIMD_T> using Double         = typename SIMD_T::Double;
-template <typename SIMD_T> using Integer        = typename SIMD_T::Integer;
-template <typename SIMD_T> using Vec4           = typename SIMD_T::Vec4;
-template <typename SIMD_T> using Mask           = typename SIMD_T::Mask;
+template <typename SIMD_T>
+using CompareType = typename SIMD_T::CompareType;
+template <typename SIMD_T>
+using ScaleFactor = typename SIMD_T::ScaleFactor;
+template <typename SIMD_T>
+using RoundMode = typename SIMD_T::RoundMode;
+template <typename SIMD_T>
+using Float = typename SIMD_T::Float;
+template <typename SIMD_T>
+using Double = typename SIMD_T::Double;
+template <typename SIMD_T>
+using Integer = typename SIMD_T::Integer;
+template <typename SIMD_T>
+using Vec4 = typename SIMD_T::Vec4;
+template <typename SIMD_T>
+using Mask = typename SIMD_T::Mask;
  
  template <typename SIMD_T>
  struct SIMDVecEqual
  {
-    INLINE bool operator () (Integer<SIMD_T> a, Integer<SIMD_T> b) const
+    INLINE bool operator()(Integer<SIMD_T> a, Integer<SIMD_T> b) const
      {
          Integer<SIMD_T> c = SIMD_T::xor_si(a, b);
          return SIMD_T::testz_si(c, c);
      }
  
-    INLINE bool operator () (Float<SIMD_T> a, Float<SIMD_T> b) const
+    INLINE bool operator()(Float<SIMD_T> a, Float<SIMD_T> b) const
      {
          return this->operator()(SIMD_T::castps_si(a), SIMD_T::castps_si(b));
      }
  
-    INLINE bool operator () (Double<SIMD_T> a, Double<SIMD_T> b) const
+    INLINE bool operator()(Double<SIMD_T> a, Double<SIMD_T> b) const
      {
          return this->operator()(SIMD_T::castpd_si(a), SIMD_T::castpd_si(b));
      }
@@ -604,13 +588,13 @@ struct SIMDVecEqual
  template <typename SIMD_T>
  struct SIMDVecHash
  {
-    INLINE uint32_t operator ()(Integer<SIMD_T> val) const
+    INLINE uint32_t operator()(Integer<SIMD_T> val) const
      {
  #if defined(_WIN64) || !defined(_WIN32) // assume non-Windows is always 64-bit
          static_assert(sizeof(void*) == 8, "This path only meant for 64-bit code");
  
-        uint64_t crc32 = 0;
-        const uint64_t *pData = reinterpret_cast<const uint64_t*>(&val);
+        uint64_t              crc32          = 0;
+        const uint64_t*       pData          = reinterpret_cast<const uint64_t*>(&val);
          static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
          static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
  
@@ -624,7 +608,7 @@ struct SIMDVecHash
          static_assert(sizeof(void*) == 4, "This path only meant for 32-bit code");
  
          uint32_t crc32 = 0;
-        const uint32_t *pData = reinterpret_cast<const uint32_t*>(&val);
+        const uint32_t* pData = reinterpret_cast<const uint32_t*>(&val);
          static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
          static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
  
@@ -637,11 +621,11 @@ struct SIMDVecHash
  #endif
      };
  
-    INLINE uint32_t operator ()(Float<SIMD_T> val) const
+    INLINE uint32_t operator()(Float<SIMD_T> val) const
      {
          return operator()(SIMD_T::castps_si(val));
      };
-    INLINE uint32_t operator ()(Double<SIMD_T> val) const
+    INLINE uint32_t operator()(Double<SIMD_T> val) const
      {
          return operator()(SIMD_T::castpd_si(val));
      }
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl

index b1511c6c0e2b313eb2eec36df06d72b0271870f8..0c5795cf1369a9981b43525c509078cc62dd0f70 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -28,100 +28,79 @@
  // SIMD128 AVX (1) implementation
  //============================================================================
  
-#define SIMD_WRAPPER_1(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return _mm_##op(a);\
-    }
+#define SIMD_WRAPPER_1(op) \
+    static SIMDINLINE Float SIMDCALL op(Float a) { return _mm_##op(a); }
  
-#define SIMD_WRAPPER_2(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm_##op(a, b);\
-    }
+#define SIMD_WRAPPER_2(op) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm_##op(a, b); }
  
-#define SIMD_DWRAPPER_2(op)  \
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm_##op(a, b);\
-    }
+#define SIMD_DWRAPPER_2(op) \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm_##op(a, b); }
  
-#define SIMD_WRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm_##op(a, b, ImmT);\
+#define SIMD_WRAPPER_2I(op)                               \
+    template <int ImmT>                                   \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+    {                                                     \
+        return _mm_##op(a, b, ImmT);                      \
      }
  
-#define SIMD_DWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm_##op(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I(op)                                 \
+    template <int ImmT>                                      \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+    {                                                        \
+        return _mm_##op(a, b, ImmT);                         \
      }
  
-#define SIMD_WRAPPER_3(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return _mm_##op(a, b, c);\
-    }
+#define SIMD_WRAPPER_3(op) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm_##op(a, b, c); }
  
-#define SIMD_IWRAPPER_1(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return _mm_##op(a);\
-    }
+#define SIMD_IWRAPPER_1(op) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm_##op(a); }
  
-#define SIMD_IWRAPPER_1I_(op, intrin)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin)                \
+    template <int ImmT>                              \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) \
+    {                                                \
+        return intrin(a, ImmT);                      \
      }
  #define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm_##op)
  
-#define SIMD_IWRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return intrin(a, b);\
-    }
+#define SIMD_IWRAPPER_2_(op, intrin) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return intrin(a, b); }
  
-#define SIMD_IWRAPPER_2(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm_##op(a, b);\
-    }
+#define SIMD_IWRAPPER_2(op) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm_##op(a, b); }
  
-#define SIMD_IFWRAPPER_2(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return castps_si( intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin)                            \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+    {                                                           \
+        return castps_si(intrin(castsi_ps(a), castsi_ps(b)));   \
      }
  
-#define SIMD_IWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm_##op(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I(op)                                    \
+    template <int ImmT>                                         \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+    {                                                           \
+        return _mm_##op(a, b, ImmT);                            \
      }
  
  //-----------------------------------------------------------------------
  // Single precision floating point arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps);     // return a + b
-SIMD_WRAPPER_2(div_ps);     // return a / b
-SIMD_WRAPPER_2(max_ps);     // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps);     // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps);     // return a * b
-SIMD_WRAPPER_1(rcp_ps);     // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps);   // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps);     // return a - b
+SIMD_WRAPPER_2(add_ps);   // return a + b
+SIMD_WRAPPER_2(div_ps);   // return a / b
+SIMD_WRAPPER_2(max_ps);   // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps);   // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps);   // return a * b
+SIMD_WRAPPER_1(rcp_ps);   // return 1.0f / a
+SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps);   // return a - b
  
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float a, Float b, Float c)    // return (a * b) + c
+static SIMDINLINE Float SIMDCALL fmadd_ps(Float a, Float b, Float c) // return (a * b) + c
  {
      return add_ps(mul_ps(a, b), c);
  }
-static SIMDINLINE Float SIMDCALL fmsub_ps(Float a, Float b, Float c)    // return (a * b) - c
+static SIMDINLINE Float SIMDCALL fmsub_ps(Float a, Float b, Float c) // return (a * b) - c
  {
      return sub_ps(mul_ps(a, b), c);
  }
@@ -132,8 +111,14 @@ static SIMDINLINE Float SIMDCALL round_ps(Float a)
      return _mm_round_ps(a, static_cast<int>(RMT));
  }
  
-static SIMDINLINE Float SIMDCALL ceil_ps(Float a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float a)
+{
+    return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float a)
+{
+    return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
  
  //-----------------------------------------------------------------------
  // Integer (various width) arithmetic operations
@@ -141,7 +126,7 @@ static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::
  SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
  SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
  SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
  SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
  SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
  SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
@@ -160,41 +145,40 @@ SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
  //-----------------------------------------------------------------------
  // Logical operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2(and_ps);                             // return a & b       (float treated as int)
-SIMD_IWRAPPER_2_(and_si, _mm_and_si128);        // return a & b       (int)
-SIMD_WRAPPER_2(andnot_ps);                          // return (~a) & b    (float treated as int)
-SIMD_IWRAPPER_2_(andnot_si, _mm_andnot_si128);  // return (~a) & b    (int)
-SIMD_WRAPPER_2(or_ps);                              // return a | b       (float treated as int)
-SIMD_IWRAPPER_2_(or_si, _mm_or_si128);          // return a | b       (int)
-SIMD_WRAPPER_2(xor_ps);                             // return a ^ b       (float treated as int)
-SIMD_IWRAPPER_2_(xor_si, _mm_xor_si128);        // return a ^ b       (int)
-
+SIMD_WRAPPER_2(and_ps);                        // return a & b       (float treated as int)
+SIMD_IWRAPPER_2_(and_si, _mm_and_si128);       // return a & b       (int)
+SIMD_WRAPPER_2(andnot_ps);                     // return (~a) & b    (float treated as int)
+SIMD_IWRAPPER_2_(andnot_si, _mm_andnot_si128); // return (~a) & b    (int)
+SIMD_WRAPPER_2(or_ps);                         // return a | b       (float treated as int)
+SIMD_IWRAPPER_2_(or_si, _mm_or_si128);         // return a | b       (int)
+SIMD_WRAPPER_2(xor_ps);                        // return a ^ b       (float treated as int)
+SIMD_IWRAPPER_2_(xor_si, _mm_xor_si128);       // return a ^ b       (int)
  
  //-----------------------------------------------------------------------
  // Shift operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32);               // return a << ImmT
-SIMD_IWRAPPER_1I(slli_epi64);               // return a << ImmT
+SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_1I(slli_epi64); // return a << ImmT
  
  static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer vA, Integer vB) // return a << b      (uint32)
  {
      int32_t a, count;
-    a = _mm_extract_epi32(vA, 0);
+    a     = _mm_extract_epi32(vA, 0);
      count = _mm_extract_epi32(vB, 0);
      a <<= count;
      vA = _mm_insert_epi32(vA, a, 0);
  
-    a = _mm_extract_epi32(vA, 1);
+    a     = _mm_extract_epi32(vA, 1);
      count = _mm_extract_epi32(vB, 1);
      a <<= count;
      vA = _mm_insert_epi32(vA, a, 1);
  
-    a = _mm_extract_epi32(vA, 2);
+    a     = _mm_extract_epi32(vA, 2);
      count = _mm_extract_epi32(vB, 2);
      a <<= count;
      vA = _mm_insert_epi32(vA, a, 2);
  
-    a = _mm_extract_epi32(vA, 3);
+    a     = _mm_extract_epi32(vA, 3);
      count = _mm_extract_epi32(vB, 3);
      a <<= count;
      vA = _mm_insert_epi32(vA, a, 3);
@@ -211,7 +195,7 @@ static SIMDINLINE Integer SIMDCALL srl_epi64(Integer a, Integer n)
      return _mm_srl_epi64(a, n);
  }
  
-template<int ImmT>                              // same as srli_si, but with Float cast to int
+template <int ImmT> // same as srli_si, but with Float cast to int
  static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
  {
      return castsi_ps(srli_si<ImmT>(castps_si(a)));
@@ -220,22 +204,22 @@ static SIMDINLINE Float SIMDCALL srlisi_ps(Float a)
  static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vB) // return a >> b      (uint32)
  {
      int32_t a, count;
-    a = _mm_extract_epi32(vA, 0);
+    a     = _mm_extract_epi32(vA, 0);
      count = _mm_extract_epi32(vB, 0);
      a >>= count;
      vA = _mm_insert_epi32(vA, a, 0);
  
-    a = _mm_extract_epi32(vA, 1);
+    a     = _mm_extract_epi32(vA, 1);
      count = _mm_extract_epi32(vB, 1);
      a >>= count;
      vA = _mm_insert_epi32(vA, a, 1);
  
-    a = _mm_extract_epi32(vA, 2);
+    a     = _mm_extract_epi32(vA, 2);
      count = _mm_extract_epi32(vB, 2);
      a >>= count;
      vA = _mm_insert_epi32(vA, a, 2);
  
-    a = _mm_extract_epi32(vA, 3);
+    a     = _mm_extract_epi32(vA, 3);
      count = _mm_extract_epi32(vB, 3);
      a >>= count;
      vA = _mm_insert_epi32(vA, a, 3);
@@ -243,32 +227,30 @@ static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vB) // return
      return vA;
  }
  
-
-
  //-----------------------------------------------------------------------
  // Conversion operations
  //-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double a)   // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
  {
      return _mm_castpd_ps(a);
  }
  
-static SIMDINLINE Integer SIMDCALL castps_si(Float a)   // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
  {
      return _mm_castps_si128(a);
  }
  
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer a)   // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
  {
      return _mm_castsi128_pd(a);
  }
  
-static SIMDINLINE Double SIMDCALL castps_pd(Float a)   // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
  {
      return _mm_castps_pd(a);
  }
  
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer a)   // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
  {
      return _mm_castsi128_ps(a);
  }
@@ -288,18 +270,19 @@ static SIMDINLINE Integer SIMDCALL cvtsi32_si128(int32_t n) // return a[0] = n,
      return _mm_cvtsi32_si128(n);
  }
  
-SIMD_IWRAPPER_1(cvtepu8_epi16);     // return (int16)a    (uint8 --> int16)
-SIMD_IWRAPPER_1(cvtepu8_epi32);     // return (int32)a    (uint8 --> int32)
-SIMD_IWRAPPER_1(cvtepu16_epi32);    // return (int32)a    (uint16 --> int32)
-SIMD_IWRAPPER_1(cvtepu16_epi64);    // return (int64)a    (uint16 --> int64)
-SIMD_IWRAPPER_1(cvtepu32_epi64);    // return (int64)a    (uint32 --> int64)
+SIMD_IWRAPPER_1(cvtepu8_epi16);  // return (int16)a    (uint8 --> int16)
+SIMD_IWRAPPER_1(cvtepu8_epi32);  // return (int32)a    (uint8 --> int32)
+SIMD_IWRAPPER_1(cvtepu16_epi32); // return (int32)a    (uint16 --> int32)
+SIMD_IWRAPPER_1(cvtepu16_epi64); // return (int64)a    (uint16 --> int64)
+SIMD_IWRAPPER_1(cvtepu32_epi64); // return (int64)a    (uint32 --> int64)
  
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a)            // return (int32)a    (float --> int32)
+static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a    (float --> int32)
  {
      return _mm_cvtps_epi32(a);
  }
  
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a)           // return (int32)a    (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvttps_epi32(Float a) // return (int32)a    (rnd_to_zero(float) --> int32)
  {
      return _mm_cvttps_epi32(a);
  }
@@ -307,77 +290,104 @@ static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a)           // return (in
  //-----------------------------------------------------------------------
  // Comparison operations
  //-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
+template <CompareType CmpTypeT>
  static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b
  {
      return _mm_cmp_ps(a, b, static_cast<const int>(CmpTypeT));
  }
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::LE_OQ>(a, b);
+}
  
-SIMD_IWRAPPER_2(cmpeq_epi8);    // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16);   // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32);   // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64);   // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8);    // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16);   // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32);   // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64);   // return a > b (int64)
-SIMD_IWRAPPER_2(cmplt_epi32);   // return a < b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi8);  // return a == b (int8)
+SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_IWRAPPER_2(cmpgt_epi8);  // return a > b (int8)
+SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
  
-static SIMDINLINE bool SIMDCALL testz_ps(Float a, Float b)  // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL testz_ps(Float a,
+                                         Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
  {
-    return  0 != _mm_testz_ps(a, b);
+    return 0 != _mm_testz_ps(a, b);
  }
  
-static SIMDINLINE bool SIMDCALL testz_si(Integer a, Integer b)  // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL testz_si(Integer a,
+                                         Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
  {
-    return  0 != _mm_testz_si128(a, b);
+    return 0 != _mm_testz_si128(a, b);
  }
  
  //-----------------------------------------------------------------------
  // Blend / shuffle / permute operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps);  // return ImmT ? b : a  (float)
-SIMD_WRAPPER_3(blendv_ps);  // return mask ? b : a  (float)
+SIMD_WRAPPER_2I(blend_ps); // return ImmT ? b : a  (float)
+SIMD_WRAPPER_3(blendv_ps); // return mask ? b : a  (float)
  
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Float mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+                                                Integer b,
+                                                Float   mask) // return mask ? b : a (int)
  {
      return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
  }
  
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Integer mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+                                                Integer b,
+                                                Integer mask) // return mask ? b : a (int)
  {
      return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
  }
  
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p)  // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+                        broadcast_ss(float const* p) // return *p (all elements in vector get same value)
  {
      return _mm_broadcast_ss(p);
  }
  
-SIMD_IWRAPPER_2(packs_epi16);   // See documentation for _mm_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16);  // See documentation for _mm_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32);  // See documentation for _mm_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2(packs_epi16);  // See documentation for _mm_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32);  // See documentation for _mm_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm_packus_epi32 and _mm512_packus_epi32
  
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Integer SIMDCALL
+                          permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
  {
      return castps_si(_mm_permutevar_ps(castsi_ps(a), swiz));
  }
  
-static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+                        permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
  {
      return _mm_permutevar_ps(a, swiz);
  }
  
  SIMD_IWRAPPER_1I(shuffle_epi32);
  
-template<int ImmT>
+template <int ImmT>
  static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b) = delete;
  
  SIMD_IWRAPPER_2(shuffle_epi8);
@@ -385,7 +395,7 @@ SIMD_DWRAPPER_2I(shuffle_pd);
  SIMD_WRAPPER_2I(shuffle_ps);
  SIMD_IWRAPPER_2(unpackhi_epi16);
  
-//SIMD_IFWRAPPER_2(unpackhi_epi32, _mm_unpackhi_ps);
+// SIMD_IFWRAPPER_2(unpackhi_epi32, _mm_unpackhi_ps);
  static SIMDINLINE Integer SIMDCALL unpackhi_epi32(Integer a, Integer b)
  {
      return castps_si(_mm_unpackhi_ps(castsi_ps(a), castsi_ps(b)));
@@ -405,68 +415,74 @@ SIMD_WRAPPER_2(unpacklo_ps);
  //-----------------------------------------------------------------------
  // Load / store operations
  //-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
-    uint32_t *pOffsets = (uint32_t*)&idx;
-    Float vResult;
-    float* pResult = (float*)&vResult;
+    uint32_t* pOffsets = (uint32_t*)&idx;
+    Float     vResult;
+    float*    pResult = (float*)&vResult;
      for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
      {
          uint32_t offset = pOffsets[i];
-        offset = offset * static_cast<uint32_t>(ScaleT);
-        pResult[i] = *(float const*)(((uint8_t const*)p + offset));
+        offset          = offset * static_cast<uint32_t>(ScaleT);
+        pResult[i]      = *(float const*)(((uint8_t const*)p + offset));
      }
  
      return vResult;
  }
  
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p)  // return *p    (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+                        load1_ps(float const* p) // return *p    (broadcast 1 value to all elements)
  {
      return broadcast_ss(p);
  }
  
-static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+                        load_ps(float const* p) // return *p    (loads SIMD width elements from memory)
  {
      return _mm_load_ps(p);
  }
  
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
  {
      return _mm_load_si128(&p->v);
  }
  
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+                        loadu_ps(float const* p) // return *p    (same as load_ps but allows for unaligned mem)
  {
      return _mm_loadu_ps(p);
  }
  
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p    (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+                          loadu_si(Integer const* p) // return *p    (same as load_si but allows for unaligned mem)
  {
      return _mm_lddqu_si128(&p->v);
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
-{
-    uint32_t *pOffsets = (uint32_t*)&idx;
-    Float vResult = old;
-    float* pResult = (float*)&vResult;
-    DWORD index;
-    uint32_t umask = movemask_ps(mask);
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+{
+    uint32_t* pOffsets = (uint32_t*)&idx;
+    Float     vResult  = old;
+    float*    pResult  = (float*)&vResult;
+    DWORD     index;
+    uint32_t  umask = movemask_ps(mask);
      while (_BitScanForward(&index, umask))
      {
          umask &= ~(1 << index);
          uint32_t offset = pOffsets[index];
-        offset = offset * static_cast<uint32_t>(ScaleT);
-        pResult[index] = *(float const *)(((uint8_t const *)p + offset));
+        offset          = offset * static_cast<uint32_t>(ScaleT);
+        pResult[index]  = *(float const*)(((uint8_t const*)p + offset));
      }
  
      return vResult;
  }
  
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
  {
      _mm_maskstore_ps(p, mask, src);
  }
@@ -495,37 +511,40 @@ static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements a
      return _mm_set1_epi8(i);
  }
  
-static SIMDINLINE Float SIMDCALL set1_ps(float f)  // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
  {
      return _mm_set1_ps(f);
  }
  
-static SIMDINLINE Float SIMDCALL setzero_ps()      // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
  {
      return _mm_setzero_ps();
  }
  
-static SIMDINLINE Integer SIMDCALL setzero_si()      // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
  {
      return _mm_setzero_si128();
  }
  
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a)    // *p = a   (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+                       store_ps(float* p, Float a) // *p = a   (stores all elements contiguously in memory)
  {
      _mm_store_ps(p, a);
  }
  
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a)   // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
  {
      _mm_store_si128(&p->v, a);
  }
  
-static SIMDINLINE void SIMDCALL storeu_si(Integer *p, Integer a) // *p = a    (same as store_si but allows for unaligned mem)
+static SIMDINLINE void SIMDCALL
+                       storeu_si(Integer* p, Integer a) // *p = a    (same as store_si but allows for unaligned mem)
  {
      _mm_storeu_si128(&p->v, a);
  }
  
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float a)   // *p = a   (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+                       stream_ps(float* p, Float a) // *p = a   (same as store_ps, but doesn't keep memory in cache)
  {
      _mm_stream_ps(p, a);
  }
@@ -549,11 +568,10 @@ static SIMDINLINE float SIMDCALL extract_ps(Float a)
  
  static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
  {
-    Integer vec = set1_epi32(mask);
-    const Integer bit = set_epi32(
-        0x08, 0x04, 0x02, 0x01);
-    vec = and_si(vec, bit);
-    vec = cmplt_epi32(setzero_si(), vec);
+    Integer       vec = set1_epi32(mask);
+    const Integer bit = set_epi32(0x08, 0x04, 0x02, 0x01);
+    vec               = and_si(vec, bit);
+    vec               = cmplt_epi32(setzero_si(), vec);
      return castsi_ps(vec);
  }
  
@@ -573,4 +591,3 @@ static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
  #undef SIMD_IWRAPPER_2
  #undef SIMD_IWRAPPER_2_
  #undef SIMD_IWRAPPER_2I
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl

index e8ee0b4d87bcbcfece530d461c81d829a1a0ff05..35f9175ea46ba3be493c15312facb5152273f829 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX2_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -32,14 +32,11 @@
  // Only 2 shifts and 2 gathers were introduced with AVX 2
  // Also, add native support for FMA operations
  //============================================================================
-#define SIMD_WRAPPER_3(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return _mm_##op(a, b, c);\
-    }
+#define SIMD_WRAPPER_3(op) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm_##op(a, b, c); }
  
-SIMD_WRAPPER_3(fmadd_ps);   // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps);   // return (a * b) - c
+SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
  
  static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer vA, Integer vB) // return a << b      (uint32)
  {
@@ -51,18 +48,19 @@ static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer vA, Integer vB) // return
      return _mm_srlv_epi32(vA, vB);
  }
  
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
      return _mm_i32gather_ps(p, idx, static_cast<const int>(ScaleT));
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
  {
      return _mm_mask_i32gather_ps(old, p, idx, mask, static_cast<const int>(ScaleT));
  }
  
  #undef SIMD_WRAPPER_3
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl

index b70a7691e2b734f5fdf858b69b520525b16376a5..2ce3caa582fd91fde64d3a423fcb6a951cc2fb73 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -34,120 +34,138 @@
  //============================================================================
  
  private:
-    static SIMDINLINE __m512  __conv(Float r) { return _mm512_castps128_ps512(r.v); }
-    static SIMDINLINE __m512d __conv(Double r) { return _mm512_castpd128_pd512(r.v); }
-    static SIMDINLINE __m512i __conv(Integer r) { return _mm512_castsi128_si512(r.v); }
-    static SIMDINLINE Float   __conv(__m512 r) { return _mm512_castps512_ps128(r); }
-    static SIMDINLINE Double  __conv(__m512d r) { return _mm512_castpd512_pd128(r); }
-    static SIMDINLINE Integer __conv(__m512i r) { return _mm512_castsi512_si128(r); }
-public:
+static SIMDINLINE __m512 __conv(Float r)
+{
+    return _mm512_castps128_ps512(r.v);
+}
+static SIMDINLINE __m512d __conv(Double r)
+{
+    return _mm512_castpd128_pd512(r.v);
+}
+static SIMDINLINE __m512i __conv(Integer r)
+{
+    return _mm512_castsi128_si512(r.v);
+}
+static SIMDINLINE Float __conv(__m512 r)
+{
+    return _mm512_castps512_ps128(r);
+}
+static SIMDINLINE Double __conv(__m512d r)
+{
+    return _mm512_castpd512_pd128(r);
+}
+static SIMDINLINE Integer __conv(__m512i r)
+{
+    return _mm512_castsi512_si128(r);
+}
  
-#define SIMD_WRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+public:
+#define SIMD_WRAPPER_1_(op, intrin, mask)                        \
+    static SIMDINLINE Float SIMDCALL op(Float a)                 \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_WRAPPER_1(op)  SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
  
-#define SIMD_WRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_WRAPPER_1I_(op, intrin, mask)                             \
+    template <int ImmT>                                                \
+    static SIMDINLINE Float SIMDCALL op(Float a)                       \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_WRAPPER_1I(op)  SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
  
-#define SIMD_WRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_WRAPPER_2_(op, intrin, mask)                                   \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                   \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_WRAPPER_2(op)  SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
  
-#define SIMD_WRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_WRAPPER_2I(op)                                                \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                  \
+    {                                                                      \
+        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
      }
  
-#define SIMD_WRAPPER_3_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\
+#define SIMD_WRAPPER_3_(op, intrin, mask)                                              \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)                     \
+    {                                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
      }
-#define SIMD_WRAPPER_3(op)  SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
  
-#define SIMD_DWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT));\
+#define SIMD_DWRAPPER_2I(op)                                               \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b)               \
+    {                                                                      \
+        return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT)); \
      }
  
-#define SIMD_IWRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask)                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)             \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_IWRAPPER_1_32(op)  SIMD_IWRAPPER_1_(op, op, __mmask16(0xf))
+#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xf))
  
-#define SIMD_IWRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1I_(op, intrin, mask)                            \
+    template <int ImmT>                                                \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)                   \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_IWRAPPER_1I_32(op)  SIMD_IWRAPPER_1I_(op, op, __mmask16(0xf))
+#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xf))
  
-#define SIMD_IWRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_2_(op, intrin, mask)                                  \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)             \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_IWRAPPER_2_32(op)  SIMD_IWRAPPER_2_(op, op, __mmask16(0xf))
+#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xf))
  
-#define SIMD_IWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_IWRAPPER_2I(op)                                               \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)            \
+    {                                                                      \
+        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
      }
  
  //-----------------------------------------------------------------------
  // Single precision floating point arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps);     // return a + b
-SIMD_WRAPPER_2(div_ps);     // return a / b
-SIMD_WRAPPER_3(fmadd_ps);   // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps);   // return (a * b) - c
-SIMD_WRAPPER_2(max_ps);     // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps);     // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps);     // return a * b
+SIMD_WRAPPER_2(add_ps);                                // return a + b
+SIMD_WRAPPER_2(div_ps);                                // return a / b
+SIMD_WRAPPER_3(fmadd_ps);                              // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps);                              // return (a * b) - c
+SIMD_WRAPPER_2(max_ps);                                // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps);                                // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps);                                // return a * b
  SIMD_WRAPPER_1_(rcp_ps, rcp14_ps, __mmask16(0xf));     // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xf));   // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps);     // return a - b
+SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xf)); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps);                                // return a - b
  
  //-----------------------------------------------------------------------
  // Integer (various width) arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1_32(abs_epi32);  // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2_32(add_epi32);  // return a + b (int32)
-SIMD_IWRAPPER_2_32(max_epi32);  // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(max_epu32);  // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(min_epi32);  // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(min_epu32);  // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(mul_epi32);  // return a * b (int32)
+SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
+SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
+SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
  
  // SIMD_IWRAPPER_2_8(add_epi8);    // return a + b (int8)
-// SIMD_IWRAPPER_2_8(adds_epu8);   // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+// SIMD_IWRAPPER_2_8(adds_epu8);   // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
  
  // return (a * b) & 0xFFFFFFFF
  //
  // Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
  // and store the low 32 bits of the intermediate integers in dst.
  SIMD_IWRAPPER_2_32(mullo_epi32);
-SIMD_IWRAPPER_2_32(sub_epi32);  // return a - b (int32)
+SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
  
  // SIMD_IWRAPPER_2_64(sub_epi64);  // return a - b (int64)
  // SIMD_IWRAPPER_2_8(subs_epu8);   // return (b > a) ? 0 : (a - b) (uint8)
@@ -155,23 +173,22 @@ SIMD_IWRAPPER_2_32(sub_epi32);  // return a - b (int32)
  //-----------------------------------------------------------------------
  // Logical operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si,    and_epi32, __mmask16(0xf));    // return a & b       (int)
+SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xf));       // return a & b       (int)
  SIMD_IWRAPPER_2_(andnot_si, andnot_epi32, __mmask16(0xf)); // return (~a) & b    (int)
-SIMD_IWRAPPER_2_(or_si,     or_epi32, __mmask16(0xf));     // return a | b       (int)
-SIMD_IWRAPPER_2_(xor_si,    xor_epi32, __mmask16(0xf));    // return a ^ b       (int)
-
+SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xf));         // return a | b       (int)
+SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xf));       // return a ^ b       (int)
  
  //-----------------------------------------------------------------------
  // Shift operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I_32(slli_epi32);               // return a << ImmT
-SIMD_IWRAPPER_2_32(sllv_epi32);                // return a << b      (uint32)
-SIMD_IWRAPPER_1I_32(srai_epi32);               // return a >> ImmT   (int32)
-SIMD_IWRAPPER_1I_32(srli_epi32);               // return a >> ImmT   (uint32)
-SIMD_IWRAPPER_2_32(srlv_epi32);                // return a >> b      (uint32)
+SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_2_32(sllv_epi32);  // return a << b      (uint32)
+SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT   (int32)
+SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT   (uint32)
+SIMD_IWRAPPER_2_32(srlv_epi32);  // return a >> b      (uint32)
  
  // use AVX2 version
-//SIMD_IWRAPPER_1I_(srli_si, srli_si256);     // return a >> (ImmT*8) (uint)
+// SIMD_IWRAPPER_1I_(srli_si, srli_si256);     // return a >> (ImmT*8) (uint)
  
  //-----------------------------------------------------------------------
  // Conversion operations (Use AVX2 versions)
@@ -185,16 +202,16 @@ SIMD_IWRAPPER_2_32(srlv_epi32);                // return a >> b      (uint32)
  //-----------------------------------------------------------------------
  // Comparison operations (Use AVX2 versions
  //-----------------------------------------------------------------------
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi8);    // return a == b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi16);   // return a == b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi32);   // return a == b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi64);   // return a == b (int64)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi8,);   // return a > b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi16);   // return a > b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi32);   // return a > b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi64);   // return a > b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi8);    // return a == b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi16);   // return a == b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi32);   // return a == b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi64);   // return a == b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi8,);   // return a > b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi16);   // return a > b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi32);   // return a > b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi64);   // return a > b (int64)
  //
-//static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b)   // return a < b (int32)
+// static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b)   // return a < b (int32)
  //{
  //    return cmpgt_epi32(b, a);
  //}
@@ -202,24 +219,27 @@ SIMD_IWRAPPER_2_32(srlv_epi32);                // return a >> b      (uint32)
  //-----------------------------------------------------------------------
  // Blend / shuffle / permute operations
  //-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_8(packs_epi16);     // int16 --> int8    See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-// SIMD_IWRAPPER_2_16(packs_epi32);    // int32 --> int16   See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-// SIMD_IWRAPPER_2_8(packus_epi16);    // uint16 --> uint8  See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2_16(packus_epi32);   // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
-// SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
-
-//static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+// SIMD_IWRAPPER_2_8(packs_epi16);     // int16 --> int8    See documentation for _mm256_packs_epi16
+// and _mm512_packs_epi16 SIMD_IWRAPPER_2_16(packs_epi32);    // int32 --> int16   See documentation
+// for _mm256_packs_epi32 and _mm512_packs_epi32 SIMD_IWRAPPER_2_8(packus_epi16);    // uint16 -->
+// uint8  See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+// SIMD_IWRAPPER_2_16(packus_epi32);   // uint32 --> uint16 See documentation for
+// _mm256_packus_epi32 and _mm512_packus_epi32 SIMD_IWRAPPER_2_(permute_epi32,
+// permutevar8x32_epi32);
+
+// static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz)    // return a[swiz[i]] for
+// each 32-bit lane i (float)
  //{
  //    return _mm256_permutevar8x32_ps(a, swiz);
  //}
  
  SIMD_IWRAPPER_1I_32(shuffle_epi32);
-//template<int ImmT>
-//static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
+// template<int ImmT>
+// static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
  //{
  //    return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
  //}
-//SIMD_IWRAPPER_2(shuffle_epi8);
+// SIMD_IWRAPPER_2(shuffle_epi8);
  SIMD_IWRAPPER_2_32(unpackhi_epi32);
  SIMD_IWRAPPER_2_32(unpacklo_epi32);
  
@@ -233,50 +253,47 @@ SIMD_IWRAPPER_2_32(unpacklo_epi32);
  //-----------------------------------------------------------------------
  // Load / store operations
  //-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+                        load_ps(float const* p) // return *p    (loads SIMD width elements from memory)
  {
      return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p));
  }
  
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
  {
      return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p));
  }
  
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+                        loadu_ps(float const* p) // return *p    (same as load_ps but allows for unaligned mem)
  {
      return __conv(_mm512_maskz_loadu_ps(__mmask16(0xf), p));
  }
  
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p    (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+                          loadu_si(Integer const* p) // return *p    (same as load_si but allows for unaligned mem)
  {
      return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xf), p));
  }
  
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
      return __conv(_mm512_mask_i32gather_ps(
-                    _mm512_setzero_ps(),
-                    __mmask16(0xf),
-                    __conv(idx),
-                    p,
-                    static_cast<int>(ScaleT)));
+        _mm512_setzero_ps(), __mmask16(0xf), __conv(idx), p, static_cast<int>(ScaleT)));
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
  {
      __mmask16 m = 0xf;
-    m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)),
-                                _mm512_set1_epi32(0x80000000));
-    return __conv(_mm512_mask_i32gather_ps(
-                    __conv(old),
-                    m,
-                    __conv(idx),
-                    p,
-                    static_cast<int>(ScaleT)));
+    m           = _mm512_mask_test_epi32_mask(
+        m, _mm512_castps_si512(__conv(mask)), _mm512_set1_epi32(0x80000000));
+    return __conv(
+        _mm512_mask_i32gather_ps(__conv(old), m, __conv(idx), p, static_cast<int>(ScaleT)));
  }
  
  // static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
@@ -286,19 +303,20 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In
  //         _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
  // }
  
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
  {
      __mmask16 m = 0xf;
-    m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
+    m           = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
      _mm512_mask_storeu_ps(p, m, __conv(src));
  }
  
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a)    // *p = a   (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+                       store_ps(float* p, Float a) // *p = a   (stores all elements contiguously in memory)
  {
      _mm512_mask_storeu_ps(p, __mmask16(0xf), __conv(a));
  }
  
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a)   // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
  {
      _mm512_mask_storeu_epi32(p, __mmask16(0xf), __conv(a));
  }
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl

index a4ecd09f16418290351bcf868098813baa70d55c..16e59c4decb0d71838e661d3514d2673bf88bcb3 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -33,114 +33,118 @@
  // register set.
  //============================================================================
  
-#define SIMD_WRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_WRAPPER_1_(op, intrin, mask)                        \
+    static SIMDINLINE Float SIMDCALL op(Float a)                 \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_WRAPPER_1(op)  SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf))
  
-#define SIMD_WRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_WRAPPER_1I_(op, intrin, mask)                             \
+    template <int ImmT>                                                \
+    static SIMDINLINE Float SIMDCALL op(Float a)                       \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_WRAPPER_1I(op)  SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf))
  
-#define SIMD_WRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_WRAPPER_2_(op, intrin, mask)                                   \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                   \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_WRAPPER_2(op)  SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf))
  
-#define SIMD_WRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_WRAPPER_2I(op)                                                \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                  \
+    {                                                                      \
+        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
      }
  
-#define SIMD_WRAPPER_3_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\
+#define SIMD_WRAPPER_3_(op, intrin, mask)                                              \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)                     \
+    {                                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
      }
-#define SIMD_WRAPPER_3(op)  SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
+#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf))
  
-#define SIMD_DWRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Double SIMDCALL op(Double a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_DWRAPPER_1_(op, intrin, mask)                       \
+    static SIMDINLINE Double SIMDCALL op(Double a)               \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_DWRAPPER_1(op)  SIMD_DWRAPPER_1_(op, op, __mmask8(0x3))
+#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3))
  
-#define SIMD_DWRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Double SIMDCALL op(Double a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_DWRAPPER_1I_(op, intrin, mask)                            \
+    template <int ImmT>                                                \
+    static SIMDINLINE Double SIMDCALL op(Double a)                     \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_DWRAPPER_1I(op)  SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3))
+#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3))
  
-#define SIMD_DWRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_DWRAPPER_2_(op, intrin, mask)                                  \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b)                \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_DWRAPPER_2(op)  SIMD_DWRAPPER_2_(op, op, __mmask8(0x3))
+#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3))
  
-#define SIMD_DWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT));\
+#define SIMD_DWRAPPER_2I(op)                                               \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b)               \
+    {                                                                      \
+        return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT)); \
      }
  
-#define SIMD_IWRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask)                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)             \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_IWRAPPER_1_8(op)   SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_1_16(op)  SIMD_IWRAPPER_1_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_1_64(op)  SIMD_IWRAPPER_1_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_1I_(op, intrin, mask)                            \
+    template <int ImmT>                                                \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)                   \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_IWRAPPER_1I_8(op)   SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_1I_16(op)  SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_1I_64(op)  SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_2_(op, intrin, mask)                                  \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)             \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_IWRAPPER_2_8(op)   SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull))
-#define SIMD_IWRAPPER_2_16(op)  SIMD_IWRAPPER_2_(op, op, __mmask32(0xff))
-#define SIMD_IWRAPPER_2_64(op)  SIMD_IWRAPPER_2_(op, op, __mmask8(0x3))
-
-#define SIMD_IWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull))
+#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff))
+#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3))
+
+#define SIMD_IWRAPPER_2I(op)                                               \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)            \
+    {                                                                      \
+        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
      }
  
-SIMD_IWRAPPER_2_8(add_epi8);    // return a + b (int8)
-SIMD_IWRAPPER_2_8(adds_epu8);   // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
-SIMD_IWRAPPER_2_64(sub_epi64);  // return a - b (int64)
-SIMD_IWRAPPER_2_8(subs_epu8);   // return (b > a) ? 0 : (a - b) (uint8)
-SIMD_IWRAPPER_2_8(packs_epi16);     // int16 --> int8    See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2_16(packs_epi32);    // int32 --> int16   See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2_8(packus_epi16);    // uint16 --> uint8  See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2_16(packus_epi32);   // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2_8(add_epi8);      // return a + b (int8)
+SIMD_IWRAPPER_2_8(adds_epu8);     // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2_64(sub_epi64);    // return a - b (int64)
+SIMD_IWRAPPER_2_8(subs_epu8);     // return (b > a) ? 0 : (a - b) (uint8)
+SIMD_IWRAPPER_2_8(packs_epi16);   // int16 --> int8    See documentation for _mm256_packs_epi16 and
+                                  // _mm512_packs_epi16
+SIMD_IWRAPPER_2_16(packs_epi32);  // int32 --> int16   See documentation for _mm256_packs_epi32 and
+                                  // _mm512_packs_epi32
+SIMD_IWRAPPER_2_8(packus_epi16);  // uint16 --> uint8  See documentation for _mm256_packus_epi16 and
+                                  // _mm512_packus_epi16
+SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and
+                                  // _mm512_packus_epi32
  SIMD_IWRAPPER_2_16(unpackhi_epi16);
  SIMD_IWRAPPER_2_64(unpackhi_epi64);
  SIMD_IWRAPPER_2_8(unpackhi_epi8);
@@ -151,8 +155,7 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8);
  static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
  {
      __mmask64 m = 0xffffull;
-    return static_cast<uint32_t>(
-        _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
+    return static_cast<uint32_t>(_mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
  }
  
  #undef SIMD_WRAPPER_1_
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl

index b0cae5034197d256393501581eef98e727bb28e2..1b6592e200354ed637b7c1546048fd98b1602af9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -32,4 +32,3 @@
  // These use native AVX512 instructions with masking to enable a larger
  // register set.
  //============================================================================
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl

index 00c094a425a18225cf465003ffbfca25bda9159c..4ac0f95a468b91725b19149427af8b59e3b6aaf4 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -30,178 +30,172 @@ using SIMD128T = SIMD128Impl::AVXImpl;
  // SIMD256 AVX (1) implementation
  //============================================================================
  
-#define SIMD_WRAPPER_1(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float const &a)   \
-    {\
-        return _mm256_##op(a);\
-    }
+#define SIMD_WRAPPER_1(op) \
+    static SIMDINLINE Float SIMDCALL op(Float const& a) { return _mm256_##op(a); }
  
-#define SIMD_WRAPPER_2(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b)   \
-    {\
-        return _mm256_##op(a, b);\
+#define SIMD_WRAPPER_2(op)                                              \
+    static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+    {                                                                   \
+        return _mm256_##op(a, b);                                       \
      }
  
-#define SIMD_DWRAPPER_2(op)  \
-    static SIMDINLINE Double SIMDCALL op(Double const &a, Double const &b)   \
-    {\
-        return _mm256_##op(a, b);\
+#define SIMD_DWRAPPER_2(op)                                                \
+    static SIMDINLINE Double SIMDCALL op(Double const& a, Double const& b) \
+    {                                                                      \
+        return _mm256_##op(a, b);                                          \
      }
  
-#define SIMD_WRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b)   \
-    {\
-        return  _mm256_##op(a, b, ImmT);\
+#define SIMD_WRAPPER_2I(op)                                             \
+    template <int ImmT>                                                 \
+    static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+    {                                                                   \
+        return _mm256_##op(a, b, ImmT);                                 \
      }
  
-#define SIMD_DWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double const &a, Double const &b)   \
-    {\
-        return _mm256_##op(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I(op)                                               \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Double SIMDCALL op(Double const& a, Double const& b) \
+    {                                                                      \
+        return _mm256_##op(a, b, ImmT);                                    \
      }
  
-#define SIMD_WRAPPER_3(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b, Float const &c)   \
-    {\
-        return _mm256_##op(a, b, c);\
+#define SIMD_WRAPPER_3(op)                                                              \
+    static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b, Float const& c) \
+    {                                                                                   \
+        return _mm256_##op(a, b, c);                                                    \
      }
  
-#define SIMD_IWRAPPER_1(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a)   \
-    {\
-        return _mm256_##op(a);\
-    }
+#define SIMD_IWRAPPER_1(op) \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) { return _mm256_##op(a); }
  
-#define SIMD_IWRAPPER_2(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return _mm256_##op(a, b);\
+#define SIMD_IWRAPPER_2(op)                                                   \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return _mm256_##op(a, b);                                             \
      }
  
-#define SIMD_IFWRAPPER_2(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return castps_si( intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin)                                          \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return castps_si(intrin(castsi_ps(a), castsi_ps(b)));                 \
      }
  
-#define SIMD_IFWRAPPER_2I(op, intrin)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return castps_si( intrin(castsi_ps(a), castsi_ps(b), ImmT) );\
+#define SIMD_IFWRAPPER_2I(op, intrin)                                         \
+    template <int ImmT>                                                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return castps_si(intrin(castsi_ps(a), castsi_ps(b), ImmT));           \
      }
  
-#define SIMD_IWRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return _mm256_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin)                                         \
+    template <int ImmT>                                                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return _mm256_##intrin(a, b, ImmT);                                   \
      }
-#define SIMD_IWRAPPER_2I(op)  SIMD_IWRAPPER_2I_(op, op)
+#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
  
-#define SIMD_IWRAPPER_3(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b, Integer const &c)   \
-    {\
-        return _mm256_##op(a, b, c);\
+#define SIMD_IWRAPPER_3(op)                                                                     \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b, Integer const& c) \
+    {                                                                                           \
+        return _mm256_##op(a, b, c);                                                            \
      }
  
  // emulated integer simd
-#define SIMD_EMU_IWRAPPER_1(op) \
-    static SIMDINLINE \
-    Integer SIMDCALL op(Integer const &a)\
-    {\
-        return Integer\
-        {\
-            SIMD128T::op(a.v4[0]),\
-            SIMD128T::op(a.v4[1]),\
-        };\
+#define SIMD_EMU_IWRAPPER_1(op)                             \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+    {                                                       \
+        return Integer{                                     \
+            SIMD128T::op(a.v4[0]),                          \
+            SIMD128T::op(a.v4[1]),                          \
+        };                                                  \
      }
-#define SIMD_EMU_IWRAPPER_1L(op, shift) \
-    static SIMDINLINE \
-    Integer SIMDCALL op(Integer const &a)\
-    {\
-        return Integer \
-        {\
-            SIMD128T::op(a.v4[0]), \
-            SIMD128T::op(SIMD128T::template srli_si<shift>(a.v4[0])), \
-        };\
-    }\
-    static SIMDINLINE \
-    Integer SIMDCALL op(SIMD128Impl::Integer const &a)\
-    {\
-        return Integer \
-        {\
-            SIMD128T::op(a), \
-            SIMD128T::op(SIMD128T::template srli_si<shift>(a)), \
-        };\
+#define SIMD_EMU_IWRAPPER_1L(op, shift)                                  \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a)              \
+    {                                                                    \
+        return Integer{                                                  \
+            SIMD128T::op(a.v4[0]),                                       \
+            SIMD128T::op(SIMD128T::template srli_si<shift>(a.v4[0])),    \
+        };                                                               \
+    }                                                                    \
+    static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer const& a) \
+    {                                                                    \
+        return Integer{                                                  \
+            SIMD128T::op(a),                                             \
+            SIMD128T::op(SIMD128T::template srli_si<shift>(a)),          \
+        };                                                               \
      }
  
-#define SIMD_EMU_IWRAPPER_1I(op) \
-    template <int ImmT> static SIMDINLINE \
-    Integer SIMDCALL op(Integer const &a)\
-    {\
-        return Integer\
-        {\
-            SIMD128T::template op<ImmT>(a.v4[0]),\
-            SIMD128T::template op<ImmT>(a.v4[1]),\
-        };\
+#define SIMD_EMU_IWRAPPER_1I(op)                            \
+    template <int ImmT>                                     \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+    {                                                       \
+        return Integer{                                     \
+            SIMD128T::template op<ImmT>(a.v4[0]),           \
+            SIMD128T::template op<ImmT>(a.v4[1]),           \
+        };                                                  \
      }
  
-#define SIMD_EMU_IWRAPPER_2(op) \
-    static SIMDINLINE \
-    Integer SIMDCALL op(Integer const &a, Integer const &b)\
-    {\
-        return Integer\
-        {\
-            SIMD128T::op(a.v4[0], b.v4[0]),\
-            SIMD128T::op(a.v4[1], b.v4[1]),\
-        };\
+#define SIMD_EMU_IWRAPPER_2(op)                                               \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return Integer{                                                       \
+            SIMD128T::op(a.v4[0], b.v4[0]),                                   \
+            SIMD128T::op(a.v4[1], b.v4[1]),                                   \
+        };                                                                    \
      }
  
-#define SIMD_EMU_IWRAPPER_2I(op) \
-    template <int ImmT> static SIMDINLINE \
-    Integer SIMDCALL op(Integer const &a, Integer const &b)\
-    {\
-        return Integer\
-        {\
-            SIMD128T::template op<ImmT>(a.v4[0], b.v[0]),\
-            SIMD128T::template op<ImmT>(a.v4[1], b.v[1]),\
-        };\
+#define SIMD_EMU_IWRAPPER_2I(op)                                              \
+    template <int ImmT>                                                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return Integer{                                                       \
+            SIMD128T::template op<ImmT>(a.v4[0], b.v[0]),                     \
+            SIMD128T::template op<ImmT>(a.v4[1], b.v[1]),                     \
+        };                                                                    \
      }
  
  //-----------------------------------------------------------------------
  // Single precision floating point arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps);     // return a + b
-SIMD_WRAPPER_2(div_ps);     // return a / b
+SIMD_WRAPPER_2(add_ps); // return a + b
+SIMD_WRAPPER_2(div_ps); // return a / b
  
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float const &a, Float const &b, Float const &c) // return (a * b) + c
+static SIMDINLINE Float SIMDCALL fmadd_ps(Float const& a,
+                                          Float const& b,
+                                          Float const& c) // return (a * b) + c
  {
      return add_ps(mul_ps(a, b), c);
  }
  
-static SIMDINLINE Float SIMDCALL fmsub_ps(Float const &a, Float const &b, Float const &c) // return (a * b) - c
+static SIMDINLINE Float SIMDCALL fmsub_ps(Float const& a,
+                                          Float const& b,
+                                          Float const& c) // return (a * b) - c
  {
      return sub_ps(mul_ps(a, b), c);
  }
  
-SIMD_WRAPPER_2(max_ps);     // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps);     // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps);     // return a * b
-SIMD_WRAPPER_1(rcp_ps);     // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps);   // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps);     // return a - b
+SIMD_WRAPPER_2(max_ps);   // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps);   // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps);   // return a * b
+SIMD_WRAPPER_1(rcp_ps);   // return 1.0f / a
+SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps);   // return a - b
  
  template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float const &a)
+static SIMDINLINE Float SIMDCALL round_ps(Float const& a)
  {
      return _mm256_round_ps(a, static_cast<int>(RMT));
  }
  
-static SIMDINLINE Float SIMDCALL ceil_ps(Float const &a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float const& a)
+{
+    return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float const& a)
+{
+    return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
  
  //-----------------------------------------------------------------------
  // Integer (various width) arithmetic operations
@@ -209,7 +203,7 @@ static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<Roun
  SIMD_EMU_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
  SIMD_EMU_IWRAPPER_2(add_epi32); // return a + b (int32)
  SIMD_EMU_IWRAPPER_2(add_epi8);  // return a + b (int8)
-SIMD_EMU_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+SIMD_EMU_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
  SIMD_EMU_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
  SIMD_EMU_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
  SIMD_EMU_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
@@ -237,182 +231,184 @@ SIMD_EMU_IWRAPPER_2(or_si);     // return a | b       (int)
  SIMD_WRAPPER_2(xor_ps);         // return a ^ b       (float treated as int)
  SIMD_EMU_IWRAPPER_2(xor_si);    // return a ^ b       (int)
  
-
  //-----------------------------------------------------------------------
  // Shift operations
  //-----------------------------------------------------------------------
-SIMD_EMU_IWRAPPER_1I(slli_epi32);               // return a << ImmT
+SIMD_EMU_IWRAPPER_1I(slli_epi32); // return a << ImmT
  
-static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer const &vA, Integer const &vCount) // return a << b      (uint32)
+static SIMDINLINE Integer SIMDCALL sllv_epi32(Integer const& vA,
+                                              Integer const& vCount) // return a << b      (uint32)
  {
      int32_t aHi, aLow, countHi, countLow;
-    __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
-    __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
-    __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
+    __m128i vAHi      = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
+    __m128i vALow     = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
+    __m128i vCountHi  = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
      __m128i vCountLow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 0));
  
-    aHi = _mm_extract_epi32(vAHi, 0);
+    aHi     = _mm_extract_epi32(vAHi, 0);
      countHi = _mm_extract_epi32(vCountHi, 0);
      aHi <<= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 0);
  
-    aLow = _mm_extract_epi32(vALow, 0);
+    aLow     = _mm_extract_epi32(vALow, 0);
      countLow = _mm_extract_epi32(vCountLow, 0);
      aLow <<= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 0);
  
-    aHi = _mm_extract_epi32(vAHi, 1);
+    aHi     = _mm_extract_epi32(vAHi, 1);
      countHi = _mm_extract_epi32(vCountHi, 1);
      aHi <<= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 1);
  
-    aLow = _mm_extract_epi32(vALow, 1);
+    aLow     = _mm_extract_epi32(vALow, 1);
      countLow = _mm_extract_epi32(vCountLow, 1);
      aLow <<= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 1);
  
-    aHi = _mm_extract_epi32(vAHi, 2);
+    aHi     = _mm_extract_epi32(vAHi, 2);
      countHi = _mm_extract_epi32(vCountHi, 2);
      aHi <<= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 2);
  
-    aLow = _mm_extract_epi32(vALow, 2);
+    aLow     = _mm_extract_epi32(vALow, 2);
      countLow = _mm_extract_epi32(vCountLow, 2);
      aLow <<= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 2);
  
-    aHi = _mm_extract_epi32(vAHi, 3);
+    aHi     = _mm_extract_epi32(vAHi, 3);
      countHi = _mm_extract_epi32(vCountHi, 3);
      aHi <<= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 3);
  
-    aLow = _mm_extract_epi32(vALow, 3);
+    aLow     = _mm_extract_epi32(vALow, 3);
      countLow = _mm_extract_epi32(vCountLow, 3);
      aLow <<= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 3);
  
      __m256i ret = _mm256_set1_epi32(0);
-    ret = _mm256_insertf128_si256(ret, vAHi, 1);
-    ret = _mm256_insertf128_si256(ret, vALow, 0);
+    ret         = _mm256_insertf128_si256(ret, vAHi, 1);
+    ret         = _mm256_insertf128_si256(ret, vALow, 0);
      return ret;
  }
  
-SIMD_EMU_IWRAPPER_1I(srai_epi32);   // return a >> ImmT   (int32)
-SIMD_EMU_IWRAPPER_1I(srli_epi32);   // return a >> ImmT   (uint32)
-SIMD_EMU_IWRAPPER_1I(srli_si);      // return a >> (ImmT*8) (uint)
+SIMD_EMU_IWRAPPER_1I(srai_epi32); // return a >> ImmT   (int32)
+SIMD_EMU_IWRAPPER_1I(srli_epi32); // return a >> ImmT   (uint32)
+SIMD_EMU_IWRAPPER_1I(srli_si);    // return a >> (ImmT*8) (uint)
  
-template<int ImmT>                              // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a)
+template <int ImmT> // same as srli_si, but with Float cast to int
+static SIMDINLINE Float SIMDCALL srlisi_ps(Float const& a)
  {
      return castsi_ps(srli_si<ImmT>(castps_si(a)));
  }
  
-static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer const &vA, Integer const &vCount) // return a >> b      (uint32)
+static SIMDINLINE Integer SIMDCALL srlv_epi32(Integer const& vA,
+                                              Integer const& vCount) // return a >> b      (uint32)
  {
      int32_t aHi, aLow, countHi, countLow;
-    __m128i vAHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
-    __m128i vALow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
-    __m128i vCountHi = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
+    __m128i vAHi      = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 1));
+    __m128i vALow     = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vA), 0));
+    __m128i vCountHi  = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 1));
      __m128i vCountLow = _mm_castps_si128(_mm256_extractf128_ps(_mm256_castsi256_ps(vCount), 0));
  
-    aHi = _mm_extract_epi32(vAHi, 0);
+    aHi     = _mm_extract_epi32(vAHi, 0);
      countHi = _mm_extract_epi32(vCountHi, 0);
      aHi >>= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 0);
  
-    aLow = _mm_extract_epi32(vALow, 0);
+    aLow     = _mm_extract_epi32(vALow, 0);
      countLow = _mm_extract_epi32(vCountLow, 0);
      aLow >>= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 0);
  
-    aHi = _mm_extract_epi32(vAHi, 1);
+    aHi     = _mm_extract_epi32(vAHi, 1);
      countHi = _mm_extract_epi32(vCountHi, 1);
      aHi >>= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 1);
  
-    aLow = _mm_extract_epi32(vALow, 1);
+    aLow     = _mm_extract_epi32(vALow, 1);
      countLow = _mm_extract_epi32(vCountLow, 1);
      aLow >>= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 1);
  
-    aHi = _mm_extract_epi32(vAHi, 2);
+    aHi     = _mm_extract_epi32(vAHi, 2);
      countHi = _mm_extract_epi32(vCountHi, 2);
      aHi >>= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 2);
  
-    aLow = _mm_extract_epi32(vALow, 2);
+    aLow     = _mm_extract_epi32(vALow, 2);
      countLow = _mm_extract_epi32(vCountLow, 2);
      aLow >>= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 2);
  
-    aHi = _mm_extract_epi32(vAHi, 3);
+    aHi     = _mm_extract_epi32(vAHi, 3);
      countHi = _mm_extract_epi32(vCountHi, 3);
      aHi >>= countHi;
      vAHi = _mm_insert_epi32(vAHi, aHi, 3);
  
-    aLow = _mm_extract_epi32(vALow, 3);
+    aLow     = _mm_extract_epi32(vALow, 3);
      countLow = _mm_extract_epi32(vCountLow, 3);
      aLow >>= countLow;
      vALow = _mm_insert_epi32(vALow, aLow, 3);
  
      __m256i ret = _mm256_set1_epi32(0);
-    ret = _mm256_insertf128_si256(ret, vAHi, 1);
-    ret = _mm256_insertf128_si256(ret, vALow, 0);
+    ret         = _mm256_insertf128_si256(ret, vAHi, 1);
+    ret         = _mm256_insertf128_si256(ret, vALow, 0);
      return ret;
  }
  
-
-
  //-----------------------------------------------------------------------
  // Conversion operations
  //-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double const &a)   // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double const& a) // return *(Float*)(&a)
  {
      return _mm256_castpd_ps(a);
  }
  
-static SIMDINLINE Integer SIMDCALL castps_si(Float const &a)   // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float const& a) // return *(Integer*)(&a)
  {
      return _mm256_castps_si256(a);
  }
  
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer const &a)   // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer const& a) // return *(Double*)(&a)
  {
      return _mm256_castsi256_pd(a);
  }
  
-static SIMDINLINE Double SIMDCALL castps_pd(Float const &a)   // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float const& a) // return *(Double*)(&a)
  {
      return _mm256_castps_pd(a);
  }
  
-static SIMDINLINE Integer SIMDCALL castpd_si(Double const &a)   // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castpd_si(Double const& a) // return *(Integer*)(&a)
  {
      return _mm256_castpd_si256(a);
  }
  
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer const &a)   // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer const& a) // return *(Float*)(&a)
  {
      return _mm256_castsi256_ps(a);
  }
  
-static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer const &a) // return (float)a    (int32 --> float)
+static SIMDINLINE Float SIMDCALL
+                        cvtepi32_ps(Integer const& a) // return (float)a    (int32 --> float)
  {
      return _mm256_cvtepi32_ps(a);
  }
  
-SIMD_EMU_IWRAPPER_1L(cvtepu8_epi16, 8);                  // return (int16)a    (uint8 --> int16)
-SIMD_EMU_IWRAPPER_1L(cvtepu8_epi32, 4);                  // return (int32)a    (uint8 --> int32)
-SIMD_EMU_IWRAPPER_1L(cvtepu16_epi32, 8);                 // return (int32)a    (uint16 --> int32)
-SIMD_EMU_IWRAPPER_1L(cvtepu16_epi64, 4);                 // return (int64)a    (uint16 --> int64)
-SIMD_EMU_IWRAPPER_1L(cvtepu32_epi64, 8);                 // return (int64)a    (uint32 --> int64)
+SIMD_EMU_IWRAPPER_1L(cvtepu8_epi16, 8);  // return (int16)a    (uint8 --> int16)
+SIMD_EMU_IWRAPPER_1L(cvtepu8_epi32, 4);  // return (int32)a    (uint8 --> int32)
+SIMD_EMU_IWRAPPER_1L(cvtepu16_epi32, 8); // return (int32)a    (uint16 --> int32)
+SIMD_EMU_IWRAPPER_1L(cvtepu16_epi64, 4); // return (int64)a    (uint16 --> int64)
+SIMD_EMU_IWRAPPER_1L(cvtepu32_epi64, 8); // return (int64)a    (uint32 --> int64)
  
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float const &a)            // return (int32)a    (float --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvtps_epi32(Float const& a) // return (int32)a    (float --> int32)
  {
      return _mm256_cvtps_epi32(a);
  }
  
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a)           // return (int32)a    (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvttps_epi32(Float const& a) // return (int32)a    (rnd_to_zero(float) --> int32)
  {
      return _mm256_cvttps_epi32(a);
  }
@@ -420,79 +416,107 @@ static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a)           // ret
  //-----------------------------------------------------------------------
  // Comparison operations
  //-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float const &a, Float const &b) // return a (CmpTypeT) b
+template <CompareType CmpTypeT>
+static SIMDINLINE Float SIMDCALL cmp_ps(Float const& a, Float const& b) // return a (CmpTypeT) b
  {
      return _mm256_cmp_ps(a, b, static_cast<const int>(CmpTypeT));
  }
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::LE_OQ>(a, b);
+}
  
-SIMD_EMU_IWRAPPER_2(cmpeq_epi8);    // return a == b (int8)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi16);   // return a == b (int16)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi32);   // return a == b (int32)
-SIMD_EMU_IWRAPPER_2(cmpeq_epi64);   // return a == b (int64)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi8);    // return a > b (int8)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi16);   // return a > b (int16)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi32);   // return a > b (int32)
-SIMD_EMU_IWRAPPER_2(cmpgt_epi64);   // return a > b (int64)
-SIMD_EMU_IWRAPPER_2(cmplt_epi32);   // return a < b (int32)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi8);  // return a == b (int8)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_EMU_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi8);  // return a > b (int8)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_EMU_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+SIMD_EMU_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
  
-static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b)  // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL
+                       testz_ps(Float const& a, Float const& b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
  {
-    return  0 != _mm256_testz_ps(a, b);
+    return 0 != _mm256_testz_ps(a, b);
  }
  
-static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b)  // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL
+                       testz_si(Integer const& a, Integer const& b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
  {
-    return  0 != _mm256_testz_si256(a, b);
+    return 0 != _mm256_testz_si256(a, b);
  }
  
  //-----------------------------------------------------------------------
  // Blend / shuffle / permute operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps);  // return ImmT ? b : a  (float)
-SIMD_IFWRAPPER_2I(blend_epi32, _mm256_blend_ps);  // return ImmT ? b : a  (int32)
-SIMD_WRAPPER_3(blendv_ps);  // return mask ? b : a  (float)
+SIMD_WRAPPER_2I(blend_ps);                       // return ImmT ? b : a  (float)
+SIMD_IFWRAPPER_2I(blend_epi32, _mm256_blend_ps); // return ImmT ? b : a  (int32)
+SIMD_WRAPPER_3(blendv_ps);                       // return mask ? b : a  (float)
  
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Float const &mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+                                                Integer const& b,
+                                                Float const&   mask) // return mask ? b : a (int)
  {
      return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
  }
  
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Integer const &mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+                                                Integer const& b,
+                                                Integer const& mask) // return mask ? b : a (int)
  {
      return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
  }
  
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p)  // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+                        broadcast_ss(float const* p) // return *p (all elements in vector get same value)
  {
      return _mm256_broadcast_ss(p);
  }
  
-SIMD_EMU_IWRAPPER_2(packs_epi16);   // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_EMU_IWRAPPER_2(packs_epi32);   // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_EMU_IWRAPPER_2(packus_epi16);  // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_EMU_IWRAPPER_2(packus_epi32);  // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_EMU_IWRAPPER_2(packs_epi16); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_EMU_IWRAPPER_2(packs_epi32); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_EMU_IWRAPPER_2(
+    packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_EMU_IWRAPPER_2(
+    packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
  
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
  {
      return _mm256_permute_ps(a, ImmT);
  }
  
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
+static SIMDINLINE Integer SIMDCALL permute_epi32(
+    Integer const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
  {
      Integer result;
  
      // Ugly slow implementation
-    uint32_t const *pA = reinterpret_cast<uint32_t const*>(&a);
-    uint32_t const *pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
-    uint32_t *pResult = reinterpret_cast<uint32_t *>(&result);
+    uint32_t const* pA      = reinterpret_cast<uint32_t const*>(&a);
+    uint32_t const* pSwiz   = reinterpret_cast<uint32_t const*>(&swiz);
+    uint32_t*       pResult = reinterpret_cast<uint32_t*>(&result);
  
      for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
      {
@@ -502,14 +526,15 @@ static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const
      return result;
  }
  
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+                        permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
  {
      Float result;
  
      // Ugly slow implementation
-    float const *pA = reinterpret_cast<float const*>(&a);
-    uint32_t const *pSwiz = reinterpret_cast<uint32_t const*>(&swiz);
-    float *pResult = reinterpret_cast<float *>(&result);
+    float const*    pA      = reinterpret_cast<float const*>(&a);
+    uint32_t const* pSwiz   = reinterpret_cast<uint32_t const*>(&swiz);
+    float*          pResult = reinterpret_cast<float*>(&result);
  
      for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
      {
@@ -523,11 +548,10 @@ SIMD_WRAPPER_2I(permute2f128_ps);
  SIMD_DWRAPPER_2I(permute2f128_pd);
  SIMD_IWRAPPER_2I_(permute2f128_si, permute2f128_si256);
  
-
  SIMD_EMU_IWRAPPER_1I(shuffle_epi32);
  
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const &a, Integer const &b)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const& a, Integer const& b)
  {
      return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
  }
@@ -550,83 +574,88 @@ SIMD_WRAPPER_2(unpacklo_ps);
  //-----------------------------------------------------------------------
  // Load / store operations
  //-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
-    uint32_t *pOffsets = (uint32_t*)&idx;
-    Float vResult;
-    float* pResult = (float*)&vResult;
+    uint32_t* pOffsets = (uint32_t*)&idx;
+    Float     vResult;
+    float*    pResult = (float*)&vResult;
      for (uint32_t i = 0; i < SIMD_WIDTH; ++i)
      {
          uint32_t offset = pOffsets[i];
-        offset = offset * static_cast<uint32_t>(ScaleT);
-        pResult[i] = *(float const*)(((uint8_t const*)p + offset));
+        offset          = offset * static_cast<uint32_t>(ScaleT);
+        pResult[i]      = *(float const*)(((uint8_t const*)p + offset));
      }
  
      return vResult;
  }
  
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p)  // return *p    (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+                        load1_ps(float const* p) // return *p    (broadcast 1 value to all elements)
  {
      return broadcast_ss(p);
  }
  
-static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+                        load_ps(float const* p) // return *p    (loads SIMD width elements from memory)
  {
      return _mm256_load_ps(p);
  }
  
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
  {
      return _mm256_load_si256(&p->v);
  }
  
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+                        loadu_ps(float const* p) // return *p    (same as load_ps but allows for unaligned mem)
  {
      return _mm256_loadu_ps(p);
  }
  
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p    (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+                          loadu_si(Integer const* p) // return *p    (same as load_si but allows for unaligned mem)
  {
      return _mm256_lddqu_si256(&p->v);
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask)
-{
-    uint32_t *pOffsets = (uint32_t*)&idx;
-    Float vResult = old;
-    float* pResult = (float*)&vResult;
-    DWORD index;
-    uint32_t umask = movemask_ps(mask);
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
+{
+    uint32_t* pOffsets = (uint32_t*)&idx;
+    Float     vResult  = old;
+    float*    pResult  = (float*)&vResult;
+    DWORD     index;
+    uint32_t  umask = movemask_ps(mask);
      while (_BitScanForward(&index, umask))
      {
          umask &= ~(1 << index);
          uint32_t offset = pOffsets[index];
-        offset = offset * static_cast<uint32_t>(ScaleT);
-        pResult[index] = *(float const *)(((uint8_t const *)p + offset));
+        offset          = offset * static_cast<uint32_t>(ScaleT);
+        pResult[index]  = *(float const*)(((uint8_t const*)p + offset));
      }
  
      return vResult;
  }
  
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer const &mask, Float const &src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer const& mask, Float const& src)
  {
      _mm256_maskstore_ps(p, mask, src);
  }
  
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const& a)
  {
-    return SIMD128T::movemask_epi8(a.v4[0]) |
-           (SIMD128T::movemask_epi8(a.v4[1]) << 16);
+    return SIMD128T::movemask_epi8(a.v4[0]) | (SIMD128T::movemask_epi8(a.v4[1]) << 16);
  }
  
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const& a)
  {
      return static_cast<uint32_t>(_mm256_movemask_pd(a));
  }
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const& a)
  {
      return static_cast<uint32_t>(_mm256_movemask_ps(a));
  }
@@ -641,32 +670,34 @@ static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements a
      return _mm256_set1_epi8(i);
  }
  
-static SIMDINLINE Float SIMDCALL set1_ps(float f)  // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
  {
      return _mm256_set1_ps(f);
  }
  
-static SIMDINLINE Float SIMDCALL setzero_ps()      // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
  {
      return _mm256_setzero_ps();
  }
  
-static SIMDINLINE Integer SIMDCALL setzero_si()      // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
  {
      return _mm256_setzero_si256();
  }
  
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float const &a)    // *p = a   (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+                       store_ps(float* p, Float const& a) // *p = a   (stores all elements contiguously in memory)
  {
      _mm256_store_ps(p, a);
  }
  
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer const &a)   // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer const& a) // *p = a
  {
      _mm256_store_si256(&p->v, a);
  }
  
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float const &a)   // *p = a   (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+                       stream_ps(float* p, Float const& a) // *p = a   (same as store_ps, but doesn't keep memory in cache)
  {
      _mm256_stream_ps(p, a);
  }
@@ -675,43 +706,43 @@ static SIMDINLINE void SIMDCALL stream_ps(float *p, Float const &a)   // *p = a
  // Legacy interface (available only in SIMD256 width)
  //=======================================================================
  
-static SIMDINLINE Float SIMDCALL broadcast_ps(SIMD128Impl::Float const *p)
+static SIMDINLINE Float SIMDCALL broadcast_ps(SIMD128Impl::Float const* p)
  {
      return _mm256_broadcast_ps(&p->v);
  }
  
-template<int ImmT>
-static SIMDINLINE SIMD128Impl::Double SIMDCALL extractf128_pd(Double const &a)
+template <int ImmT>
+static SIMDINLINE SIMD128Impl::Double SIMDCALL extractf128_pd(Double const& a)
  {
      return _mm256_extractf128_pd(a, ImmT);
  }
  
-template<int ImmT>
-static SIMDINLINE SIMD128Impl::Float  SIMDCALL extractf128_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE SIMD128Impl::Float SIMDCALL extractf128_ps(Float const& a)
  {
      return _mm256_extractf128_ps(a, ImmT);
  }
  
-template<int ImmT>
-static SIMDINLINE SIMD128Impl::Integer SIMDCALL extractf128_si(Integer const &a)
+template <int ImmT>
+static SIMDINLINE SIMD128Impl::Integer SIMDCALL extractf128_si(Integer const& a)
  {
      return _mm256_extractf128_si256(a, ImmT);
  }
  
-template<int ImmT>
-static SIMDINLINE Double SIMDCALL insertf128_pd(Double const &a, SIMD128Impl::Double const &b)
+template <int ImmT>
+static SIMDINLINE Double SIMDCALL insertf128_pd(Double const& a, SIMD128Impl::Double const& b)
  {
      return _mm256_insertf128_pd(a, b, ImmT);
  }
  
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL insertf128_ps(Float const &a, SIMD128Impl::Float const &b)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL insertf128_ps(Float const& a, SIMD128Impl::Float const& b)
  {
      return _mm256_insertf128_ps(a, b, ImmT);
  }
  
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL insertf128_si(Integer const &a, SIMD128Impl::Integer const &b)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL insertf128_si(Integer const& a, SIMD128Impl::Integer const& b)
  {
      return _mm256_insertf128_si256(a, b, ImmT);
  }
@@ -727,33 +758,37 @@ static SIMDINLINE Integer SIMDCALL insertf128_si(Integer const &a, SIMD128Impl::
      _mm256_set_m128i(_mm_loadu_si128(hiaddr), _mm_loadu_si128(loaddr))
  #endif
  
-static SIMDINLINE Integer SIMDCALL loadu2_si(SIMD128Impl::Integer const* phi, SIMD128Impl::Integer const* plo)
+static SIMDINLINE Integer SIMDCALL loadu2_si(SIMD128Impl::Integer const* phi,
+                                             SIMD128Impl::Integer const* plo)
  {
      return _mm256_loadu2_m128i(&phi->v, &plo->v);
  }
  
-static SIMDINLINE Integer SIMDCALL set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL
+                          set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
  {
      return _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
-static SIMDINLINE Float SIMDCALL set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL
+                        set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
  {
      return _mm256_set_ps(i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
-static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer *phi, SIMD128Impl::Integer *plo, Integer const &src)
+static SIMDINLINE void SIMDCALL storeu2_si(SIMD128Impl::Integer* phi,
+                                           SIMD128Impl::Integer* plo,
+                                           Integer const&        src)
  {
      _mm256_storeu2_m128i(&phi->v, &plo->v, src);
  }
  
  static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
  {
-    Integer vec = set1_epi32(mask);
-    const Integer bit = set_epi32(
-        0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
-    vec = and_si(vec, bit);
-    vec = cmplt_epi32(setzero_si(), vec);
+    Integer       vec = set1_epi32(mask);
+    const Integer bit = set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
+    vec               = and_si(vec, bit);
+    vec               = cmplt_epi32(setzero_si(), vec);
      return castsi_ps(vec);
  }
  
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl

index 96c24fff9da52e9326ffa1a1e6d8a714be7ab65b..59a61cf926397ca7c148e25c03b7901e239b39bc 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX2_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -32,62 +32,61 @@
  // Mostly these are integer operations that are no longer emulated with SSE
  //============================================================================
  
-#define SIMD_IWRAPPER_1(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a)   \
-    {\
-        return _mm256_##op(a);\
+#define SIMD_IWRAPPER_1(op) \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) { return _mm256_##op(a); }
+
+#define SIMD_IWRAPPER_1L(op)                                \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+    {                                                       \
+        return _mm256_##op(_mm256_castsi256_si128(a));      \
      }
  
-#define SIMD_IWRAPPER_1L(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a)   \
-    {\
-        return _mm256_##op(_mm256_castsi256_si128(a));\
-    }\
-
-#define SIMD_IWRAPPER_1I(op)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a)   \
-    {\
-        return _mm256_##op(a, ImmT);\
+#define SIMD_IWRAPPER_1I(op)                                \
+    template <int ImmT>                                     \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+    {                                                       \
+        return _mm256_##op(a, ImmT);                        \
      }
  
-#define SIMD_IWRAPPER_1I_(op, intrin)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a)   \
-    {\
-        return _mm256_##intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin)                       \
+    template <int ImmT>                                     \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+    {                                                       \
+        return _mm256_##intrin(a, ImmT);                    \
      }
  
-#define SIMD_IWRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return _mm256_##intrin(a, b);\
+#define SIMD_IWRAPPER_2_(op, intrin)                                          \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return _mm256_##intrin(a, b);                                         \
      }
  
-#define SIMD_IWRAPPER_2(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return _mm256_##op(a, b);\
+#define SIMD_IWRAPPER_2(op)                                                   \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return _mm256_##op(a, b);                                             \
      }
  
-#define SIMD_IWRAPPER_2I(op)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return _mm256_##op(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I(op)                                                  \
+    template <int ImmT>                                                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return _mm256_##op(a, b, ImmT);                                       \
      }
  
-#define SIMD_IWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return _mm256_##op(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I(op)                                                  \
+    template <int ImmT>                                                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return _mm256_##op(a, b, ImmT);                                       \
      }
  
  //-----------------------------------------------------------------------
  // Floating point arithmetic operations
  //-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL fmadd_ps(Float const &a, Float const &b, Float const &c)   // return (a * b) + c
+static SIMDINLINE Float SIMDCALL fmadd_ps(Float const& a,
+                                          Float const& b,
+                                          Float const& c) // return (a * b) + c
  {
      return _mm256_fmadd_ps(a, b, c);
  }
@@ -98,7 +97,7 @@ static SIMDINLINE Float SIMDCALL fmadd_ps(Float const &a, Float const &b, Float
  SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
  SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
  SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
  SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
  SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
  SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
@@ -117,51 +116,50 @@ SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
  //-----------------------------------------------------------------------
  // Logical operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si,    and_si256);     // return a & b       (int)
-SIMD_IWRAPPER_2_(andnot_si, andnot_si256);  // return (~a) & b    (int)
-SIMD_IWRAPPER_2_(or_si,     or_si256);      // return a | b       (int)
-SIMD_IWRAPPER_2_(xor_si,    xor_si256);     // return a ^ b       (int)
-
+SIMD_IWRAPPER_2_(and_si, and_si256);       // return a & b       (int)
+SIMD_IWRAPPER_2_(andnot_si, andnot_si256); // return (~a) & b    (int)
+SIMD_IWRAPPER_2_(or_si, or_si256);         // return a | b       (int)
+SIMD_IWRAPPER_2_(xor_si, xor_si256);       // return a ^ b       (int)
  
  //-----------------------------------------------------------------------
  // Shift operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32);               // return a << ImmT
-SIMD_IWRAPPER_2(sllv_epi32);                // return a << b      (uint32)
-SIMD_IWRAPPER_1I(srai_epi32);               // return a >> ImmT   (int32)
-SIMD_IWRAPPER_1I(srli_epi32);               // return a >> ImmT   (uint32)
-SIMD_IWRAPPER_2(srlv_epi32);                // return a >> b      (uint32)
-SIMD_IWRAPPER_1I_(srli_si, srli_si256);     // return a >> (ImmT*8) (uint)
+SIMD_IWRAPPER_1I(slli_epi32);           // return a << ImmT
+SIMD_IWRAPPER_2(sllv_epi32);            // return a << b      (uint32)
+SIMD_IWRAPPER_1I(srai_epi32);           // return a >> ImmT   (int32)
+SIMD_IWRAPPER_1I(srli_epi32);           // return a >> ImmT   (uint32)
+SIMD_IWRAPPER_2(srlv_epi32);            // return a >> b      (uint32)
+SIMD_IWRAPPER_1I_(srli_si, srli_si256); // return a >> (ImmT*8) (uint)
  
-template<int ImmT>                          // same as srli_si, but with Float cast to int
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a)
+template <int ImmT> // same as srli_si, but with Float cast to int
+static SIMDINLINE Float SIMDCALL srlisi_ps(Float const& a)
  {
      return castsi_ps(srli_si<ImmT>(castps_si(a)));
  }
  
-
  //-----------------------------------------------------------------------
  // Conversion operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1L(cvtepu8_epi16);    // return (int16)a    (uint8 --> int16)
-SIMD_IWRAPPER_1L(cvtepu8_epi32);    // return (int32)a    (uint8 --> int32)
-SIMD_IWRAPPER_1L(cvtepu16_epi32);   // return (int32)a    (uint16 --> int32)
-SIMD_IWRAPPER_1L(cvtepu16_epi64);   // return (int64)a    (uint16 --> int64)
-SIMD_IWRAPPER_1L(cvtepu32_epi64);   // return (int64)a    (uint32 --> int64)
+SIMD_IWRAPPER_1L(cvtepu8_epi16);  // return (int16)a    (uint8 --> int16)
+SIMD_IWRAPPER_1L(cvtepu8_epi32);  // return (int32)a    (uint8 --> int32)
+SIMD_IWRAPPER_1L(cvtepu16_epi32); // return (int32)a    (uint16 --> int32)
+SIMD_IWRAPPER_1L(cvtepu16_epi64); // return (int64)a    (uint16 --> int64)
+SIMD_IWRAPPER_1L(cvtepu32_epi64); // return (int64)a    (uint32 --> int64)
  
  //-----------------------------------------------------------------------
  // Comparison operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_2(cmpeq_epi8);    // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16);   // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32);   // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64);   // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8);    // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16);   // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32);   // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64);   // return a > b (int64)
-
-static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer const &a, Integer const &b)   // return a < b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi8);  // return a == b (int8)
+SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_IWRAPPER_2(cmpgt_epi8);  // return a > b (int8)
+SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+
+static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer const& a,
+                                               Integer const& b) // return a < b (int32)
  {
      return cmpgt_epi32(b, a);
  }
@@ -169,28 +167,29 @@ static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer const &a, Integer const &
  //-----------------------------------------------------------------------
  // Blend / shuffle / permute operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_2I(blend_epi32);  // return ImmT ? b : a  (int32)
-SIMD_IWRAPPER_2(packs_epi16);   // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16);  // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32);  // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a  (int32)
+SIMD_IWRAPPER_2(packs_epi16);  // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32);  // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
  
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
  {
      return _mm256_permute_ps(a, ImmT);
  }
  
  SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
  
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+                        permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
  {
      return _mm256_permutevar8x32_ps(a, swiz);
  }
  
  SIMD_IWRAPPER_1I(shuffle_epi32);
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const &a, Integer const &b)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer const& a, Integer const& b)
  {
      return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
  }
@@ -207,22 +206,24 @@ SIMD_IWRAPPER_2(unpacklo_epi8);
  //-----------------------------------------------------------------------
  // Load / store operations
  //-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
      return _mm256_i32gather_ps(p, idx, static_cast<int>(ScaleT));
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
  {
-       // g++ in debug mode needs the explicit .v suffix instead of relying on operator __m256()
-       // Only for this intrinsic - not sure why. :(
+    // g++ in debug mode needs the explicit .v suffix instead of relying on operator __m256()
+    // Only for this intrinsic - not sure why. :(
      return _mm256_mask_i32gather_ps(old.v, p, idx.v, mask.v, static_cast<int>(ScaleT));
  }
  
-static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer const& a)
  {
      return static_cast<uint32_t>(_mm256_movemask_epi8(a));
  }
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl

index 3fcfd250f918028fc917f1f0a70ad41e6c631bc1..790609861e5442592f248f471425ac4a41fe61b7 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -34,120 +34,138 @@
  //============================================================================
  
  private:
-    static SIMDINLINE __m512  __conv(Float r) { return _mm512_castps256_ps512(r.v); }
-    static SIMDINLINE __m512d __conv(Double r) { return _mm512_castpd256_pd512(r.v); }
-    static SIMDINLINE __m512i __conv(Integer r) { return _mm512_castsi256_si512(r.v); }
-    static SIMDINLINE Float   __conv(__m512 r) { return _mm512_castps512_ps256(r); }
-    static SIMDINLINE Double  __conv(__m512d r) { return _mm512_castpd512_pd256(r); }
-    static SIMDINLINE Integer __conv(__m512i r) { return _mm512_castsi512_si256(r); }
-public:
+static SIMDINLINE __m512 __conv(Float r)
+{
+    return _mm512_castps256_ps512(r.v);
+}
+static SIMDINLINE __m512d __conv(Double r)
+{
+    return _mm512_castpd256_pd512(r.v);
+}
+static SIMDINLINE __m512i __conv(Integer r)
+{
+    return _mm512_castsi256_si512(r.v);
+}
+static SIMDINLINE Float __conv(__m512 r)
+{
+    return _mm512_castps512_ps256(r);
+}
+static SIMDINLINE Double __conv(__m512d r)
+{
+    return _mm512_castpd512_pd256(r);
+}
+static SIMDINLINE Integer __conv(__m512i r)
+{
+    return _mm512_castsi512_si256(r);
+}
  
-#define SIMD_WRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+public:
+#define SIMD_WRAPPER_1_(op, intrin, mask)                        \
+    static SIMDINLINE Float SIMDCALL op(Float a)                 \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_WRAPPER_1(op)  SIMD_WRAPPER_1_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xff))
  
-#define SIMD_WRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_WRAPPER_1I_(op, intrin, mask)                             \
+    template <int ImmT>                                                \
+    static SIMDINLINE Float SIMDCALL op(Float a)                       \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_WRAPPER_1I(op)  SIMD_WRAPPER_1I_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xff))
  
-#define SIMD_WRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_WRAPPER_2_(op, intrin, mask)                                   \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                   \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_WRAPPER_2(op)  SIMD_WRAPPER_2_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xff))
  
-#define SIMD_WRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT));\
+#define SIMD_WRAPPER_2I(op)                                                 \
+    template <int ImmT>                                                     \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                   \
+    {                                                                       \
+        return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT)); \
      }
  
-#define SIMD_WRAPPER_3_(op, intrin, mask)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\
+#define SIMD_WRAPPER_3_(op, intrin, mask)                                              \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)                     \
+    {                                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c))); \
      }
-#define SIMD_WRAPPER_3(op)  SIMD_WRAPPER_3_(op, op, __mmask16(0xff))
+#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xff))
  
-#define SIMD_DWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\
+#define SIMD_DWRAPPER_2I(op)                                               \
+    template <int ImmT>                                                    \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b)               \
+    {                                                                      \
+        return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT)); \
      }
  
-#define SIMD_IWRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask)                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)             \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_IWRAPPER_1_32(op)  SIMD_IWRAPPER_1_(op, op, __mmask16(0xff))
+#define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xff))
  
-#define SIMD_IWRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1I_(op, intrin, mask)                            \
+    template <int ImmT>                                                \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)                   \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_IWRAPPER_1I_32(op)  SIMD_IWRAPPER_1I_(op, op, __mmask16(0xff))
+#define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xff))
  
-#define SIMD_IWRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_2_(op, intrin, mask)                                  \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)             \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_IWRAPPER_2_32(op)  SIMD_IWRAPPER_2_(op, op, __mmask16(0xff))
+#define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xff))
  
-#define SIMD_IWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT));\
+#define SIMD_IWRAPPER_2I(op)                                                \
+    template <int ImmT>                                                     \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)             \
+    {                                                                       \
+        return __conv(_mm512_maskz_##op(0xff, __conv(a), __conv(b), ImmT)); \
      }
  
  //-----------------------------------------------------------------------
  // Single precision floating point arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps);     // return a + b
-SIMD_WRAPPER_2(div_ps);     // return a / b
-SIMD_WRAPPER_3(fmadd_ps);   // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps);   // return (a * b) - c
-SIMD_WRAPPER_2(max_ps);     // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps);     // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps);     // return a * b
+SIMD_WRAPPER_2(add_ps);                                 // return a + b
+SIMD_WRAPPER_2(div_ps);                                 // return a / b
+SIMD_WRAPPER_3(fmadd_ps);                               // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps);                               // return (a * b) - c
+SIMD_WRAPPER_2(max_ps);                                 // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps);                                 // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps);                                 // return a * b
  SIMD_WRAPPER_1_(rcp_ps, rcp14_ps, __mmask16(0xff));     // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xff));   // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps);     // return a - b
+SIMD_WRAPPER_1_(rsqrt_ps, rsqrt14_ps, __mmask16(0xff)); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps);                                 // return a - b
  
  //-----------------------------------------------------------------------
  // Integer (various width) arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1_32(abs_epi32);  // return absolute_value(a) (int32)
-SIMD_IWRAPPER_2_32(add_epi32);  // return a + b (int32)
-SIMD_IWRAPPER_2_32(max_epi32);  // return (a > b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(max_epu32);  // return (a > b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(min_epi32);  // return (a < b) ? a : b (int32)
-SIMD_IWRAPPER_2_32(min_epu32);  // return (a < b) ? a : b (uint32)
-SIMD_IWRAPPER_2_32(mul_epi32);  // return a * b (int32)
+SIMD_IWRAPPER_1_32(abs_epi32); // return absolute_value(a) (int32)
+SIMD_IWRAPPER_2_32(add_epi32); // return a + b (int32)
+SIMD_IWRAPPER_2_32(max_epi32); // return (a > b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(max_epu32); // return (a > b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32)
+SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32)
+SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32)
  
  // SIMD_IWRAPPER_2_8(add_epi8);    // return a + b (int8)
-// SIMD_IWRAPPER_2_8(adds_epu8);   // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+// SIMD_IWRAPPER_2_8(adds_epu8);   // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
  
  // return (a * b) & 0xFFFFFFFF
  //
  // Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
  // and store the low 32 bits of the intermediate integers in dst.
  SIMD_IWRAPPER_2_32(mullo_epi32);
-SIMD_IWRAPPER_2_32(sub_epi32);  // return a - b (int32)
+SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32)
  
  // SIMD_IWRAPPER_2_64(sub_epi64);  // return a - b (int64)
  // SIMD_IWRAPPER_2_8(subs_epu8);   // return (b > a) ? 0 : (a - b) (uint8)
@@ -155,23 +173,22 @@ SIMD_IWRAPPER_2_32(sub_epi32);  // return a - b (int32)
  //-----------------------------------------------------------------------
  // Logical operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si,    and_epi32, __mmask16(0xff));    // return a & b       (int)
+SIMD_IWRAPPER_2_(and_si, and_epi32, __mmask16(0xff));       // return a & b       (int)
  SIMD_IWRAPPER_2_(andnot_si, andnot_epi32, __mmask16(0xff)); // return (~a) & b    (int)
-SIMD_IWRAPPER_2_(or_si,     or_epi32, __mmask16(0xff));     // return a | b       (int)
-SIMD_IWRAPPER_2_(xor_si,    xor_epi32, __mmask16(0xff));    // return a ^ b       (int)
-
+SIMD_IWRAPPER_2_(or_si, or_epi32, __mmask16(0xff));         // return a | b       (int)
+SIMD_IWRAPPER_2_(xor_si, xor_epi32, __mmask16(0xff));       // return a ^ b       (int)
  
  //-----------------------------------------------------------------------
  // Shift operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I_32(slli_epi32);               // return a << ImmT
-SIMD_IWRAPPER_2_32(sllv_epi32);                // return a << b      (uint32)
-SIMD_IWRAPPER_1I_32(srai_epi32);               // return a >> ImmT   (int32)
-SIMD_IWRAPPER_1I_32(srli_epi32);               // return a >> ImmT   (uint32)
-SIMD_IWRAPPER_2_32(srlv_epi32);                // return a >> b      (uint32)
+SIMD_IWRAPPER_1I_32(slli_epi32); // return a << ImmT
+SIMD_IWRAPPER_2_32(sllv_epi32);  // return a << b      (uint32)
+SIMD_IWRAPPER_1I_32(srai_epi32); // return a >> ImmT   (int32)
+SIMD_IWRAPPER_1I_32(srli_epi32); // return a >> ImmT   (uint32)
+SIMD_IWRAPPER_2_32(srlv_epi32);  // return a >> b      (uint32)
  
  // use AVX2 version
-//SIMD_IWRAPPER_1I_(srli_si, srli_si256);     // return a >> (ImmT*8) (uint)
+// SIMD_IWRAPPER_1I_(srli_si, srli_si256);     // return a >> (ImmT*8) (uint)
  
  //-----------------------------------------------------------------------
  // Conversion operations (Use AVX2 versions)
@@ -185,16 +202,16 @@ SIMD_IWRAPPER_2_32(srlv_epi32);                // return a >> b      (uint32)
  //-----------------------------------------------------------------------
  // Comparison operations (Use AVX2 versions
  //-----------------------------------------------------------------------
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi8);    // return a == b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi16);   // return a == b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi32);   // return a == b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi64);   // return a == b (int64)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi8,);   // return a > b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi16);   // return a > b (int16)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi32);   // return a > b (int32)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi64);   // return a > b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi8);    // return a == b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi16);   // return a == b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi32);   // return a == b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi64);   // return a == b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi8,);   // return a > b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi16);   // return a > b (int16)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi32);   // return a > b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi64);   // return a > b (int64)
  //
-//static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b)   // return a < b (int32)
+// static SIMDINLINE Integer SIMDCALL cmplt_epi32(Integer a, Integer b)   // return a < b (int32)
  //{
  //    return cmpgt_epi32(b, a);
  //}
@@ -202,25 +219,28 @@ SIMD_IWRAPPER_2_32(srlv_epi32);                // return a >> b      (uint32)
  //-----------------------------------------------------------------------
  // Blend / shuffle / permute operations
  //-----------------------------------------------------------------------
-// SIMD_IWRAPPER_2_8(packs_epi16);     // int16 --> int8    See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-// SIMD_IWRAPPER_2_16(packs_epi32);    // int32 --> int16   See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-// SIMD_IWRAPPER_2_8(packus_epi16);    // uint16 --> uint8  See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2_16(packus_epi32);   // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+// SIMD_IWRAPPER_2_8(packs_epi16);     // int16 --> int8    See documentation for _mm256_packs_epi16
+// and _mm512_packs_epi16 SIMD_IWRAPPER_2_16(packs_epi32);    // int32 --> int16   See documentation
+// for _mm256_packs_epi32 and _mm512_packs_epi32 SIMD_IWRAPPER_2_8(packus_epi16);    // uint16 -->
+// uint8  See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+// SIMD_IWRAPPER_2_16(packus_epi32);   // uint32 --> uint16 See documentation for
+// _mm256_packus_epi32 and _mm512_packus_epi32
  
  // SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
  
-//static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+// static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz)    // return a[swiz[i]] for
+// each 32-bit lane i (float)
  //{
  //    return _mm256_permutevar8x32_ps(a, swiz);
  //}
  
  SIMD_IWRAPPER_1I_32(shuffle_epi32);
-//template<int ImmT>
-//static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
+// template<int ImmT>
+// static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
  //{
  //    return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
  //}
-//SIMD_IWRAPPER_2(shuffle_epi8);
+// SIMD_IWRAPPER_2(shuffle_epi8);
  SIMD_IWRAPPER_2_32(unpackhi_epi32);
  SIMD_IWRAPPER_2_32(unpacklo_epi32);
  
@@ -234,50 +254,47 @@ SIMD_IWRAPPER_2_32(unpacklo_epi32);
  //-----------------------------------------------------------------------
  // Load / store operations
  //-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+                        load_ps(float const* p) // return *p    (loads SIMD width elements from memory)
  {
      return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p));
  }
  
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
  {
      return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p));
  }
  
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+                        loadu_ps(float const* p) // return *p    (same as load_ps but allows for unaligned mem)
  {
      return __conv(_mm512_maskz_loadu_ps(__mmask16(0xff), p));
  }
  
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p    (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+                          loadu_si(Integer const* p) // return *p    (same as load_si but allows for unaligned mem)
  {
      return __conv(_mm512_maskz_loadu_epi32(__mmask16(0xff), p));
  }
  
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
      return __conv(_mm512_mask_i32gather_ps(
-                    _mm512_setzero_ps(),
-                    __mmask16(0xff),
-                    __conv(idx),
-                    p,
-                    static_cast<int>(ScaleT)));
+        _mm512_setzero_ps(), __mmask16(0xff), __conv(idx), p, static_cast<int>(ScaleT)));
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
  {
      __mmask16 m = 0xff;
-    m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)),
-                                _mm512_set1_epi32(0x80000000));
-    return __conv(_mm512_mask_i32gather_ps(
-                    __conv(old),
-                    m,
-                    __conv(idx),
-                    p,
-                    static_cast<int>(ScaleT)));
+    m           = _mm512_mask_test_epi32_mask(
+        m, _mm512_castps_si512(__conv(mask)), _mm512_set1_epi32(0x80000000));
+    return __conv(
+        _mm512_mask_i32gather_ps(__conv(old), m, __conv(idx), p, static_cast<int>(ScaleT)));
  }
  
  // static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
@@ -287,19 +304,20 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In
  //         _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
  // }
  
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
  {
      __mmask16 m = 0xff;
-    m = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
+    m           = _mm512_mask_test_epi32_mask(m, __conv(mask), _mm512_set1_epi32(0x80000000));
      _mm512_mask_storeu_ps(p, m, __conv(src));
  }
  
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a)    // *p = a   (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+                       store_ps(float* p, Float a) // *p = a   (stores all elements contiguously in memory)
  {
      _mm512_mask_storeu_ps(p, __mmask16(0xff), __conv(a));
  }
  
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a)   // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
  {
      _mm512_mask_storeu_epi32(p, __mmask16(0xff), __conv(a));
  }
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl

index 6ffe7c2a0f0a34d22ba4e1133dacdb805048f4c1..1acdc7e07ff8e24c92c66bdf87a13de2d0d5c985 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -33,65 +33,68 @@
  // register set.
  //============================================================================
  
-#define SIMD_DWRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Double SIMDCALL op(Double a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_DWRAPPER_1_(op, intrin, mask)                       \
+    static SIMDINLINE Double SIMDCALL op(Double a)               \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_DWRAPPER_1(op)  SIMD_DWRAPPER_1_(op, op, __mmask8(0xf))
+#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf))
  
-#define SIMD_DWRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Double SIMDCALL op(Double a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_DWRAPPER_1I_(op, intrin, mask)                            \
+    template <int ImmT>                                                \
+    static SIMDINLINE Double SIMDCALL op(Double a)                     \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_DWRAPPER_1I(op)  SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf))
+#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf))
  
-#define SIMD_DWRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_DWRAPPER_2_(op, intrin, mask)                                  \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b)                \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_DWRAPPER_2(op)  SIMD_DWRAPPER_2_(op, op, __mmask8(0xf))
+#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf))
  
-#define SIMD_IWRAPPER_1_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\
+#define SIMD_IWRAPPER_1_(op, intrin, mask)                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)             \
+    {                                                            \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a))); \
      }
-#define SIMD_IWRAPPER_1_8(op)   SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_1_16(op)  SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_1_64(op)  SIMD_IWRAPPER_1_(op, op, __mmask8(0xf))
+#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf))
  
-#define SIMD_IWRAPPER_1I_(op, intrin, mask)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\
+#define SIMD_IWRAPPER_1I_(op, intrin, mask)                            \
+    template <int ImmT>                                                \
+    static SIMDINLINE Integer SIMDCALL op(Integer a)                   \
+    {                                                                  \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT)); \
      }
-#define SIMD_IWRAPPER_1I_8(op)   SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_1I_16(op)  SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_1I_64(op)  SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf))
+#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf))
  
-#define SIMD_IWRAPPER_2_(op, intrin, mask)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\
+#define SIMD_IWRAPPER_2_(op, intrin, mask)                                  \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)             \
+    {                                                                       \
+        return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b))); \
      }
-#define SIMD_IWRAPPER_2_8(op)   SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull))
-#define SIMD_IWRAPPER_2_16(op)  SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff))
-#define SIMD_IWRAPPER_2_64(op)  SIMD_IWRAPPER_2_(op, op, __mmask8(0xf))
+#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull))
+#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff))
+#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf))
  
-
-SIMD_IWRAPPER_2_8(add_epi8);    // return a + b (int8)
-SIMD_IWRAPPER_2_8(adds_epu8);   // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
-SIMD_IWRAPPER_2_64(sub_epi64);  // return a - b (int64)
-SIMD_IWRAPPER_2_8(subs_epu8);   // return (b > a) ? 0 : (a - b) (uint8)
-SIMD_IWRAPPER_2_8(packs_epi16);     // int16 --> int8    See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2_16(packs_epi32);    // int32 --> int16   See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2_8(packus_epi16);    // uint16 --> uint8  See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2_16(packus_epi32);   // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2_8(add_epi8);      // return a + b (int8)
+SIMD_IWRAPPER_2_8(adds_epu8);     // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2_64(sub_epi64);    // return a - b (int64)
+SIMD_IWRAPPER_2_8(subs_epu8);     // return (b > a) ? 0 : (a - b) (uint8)
+SIMD_IWRAPPER_2_8(packs_epi16);   // int16 --> int8    See documentation for _mm256_packs_epi16 and
+                                  // _mm512_packs_epi16
+SIMD_IWRAPPER_2_16(packs_epi32);  // int32 --> int16   See documentation for _mm256_packs_epi32 and
+                                  // _mm512_packs_epi32
+SIMD_IWRAPPER_2_8(packus_epi16);  // uint16 --> uint8  See documentation for _mm256_packus_epi16 and
+                                  // _mm512_packus_epi16
+SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and
+                                  // _mm512_packus_epi32
  SIMD_IWRAPPER_2_16(unpackhi_epi16);
  SIMD_IWRAPPER_2_64(unpackhi_epi64);
  SIMD_IWRAPPER_2_8(unpackhi_epi8);
@@ -102,8 +105,7 @@ SIMD_IWRAPPER_2_8(unpacklo_epi8);
  static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a)
  {
      __mmask64 m = 0xffffffffull;
-    return static_cast<uint32_t>(
-        _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
+    return static_cast<uint32_t>(_mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80)));
  }
  
  #undef SIMD_DWRAPPER_1_
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl

index acd8ffd96885fe1877438da0e3e4b367796240bd..52b6ca2b61e8512c1d59a0f98d3c20ea15c0f326 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -32,4 +32,3 @@
  // These use native AVX512 instructions with masking to enable a larger
  // register set.
  //============================================================================
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl

index dfe19d3c04a603580fb1db8969e54ebffc06df86..e9e908ac3c639faf3486f960662dd780a1beb7a9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -1,41 +1,41 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
  
-#if defined(__GNUC__) && !defined( __clang__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
  // gcc as of 7.1 was missing these intrinsics
  #ifndef _mm512_cmpneq_ps_mask
-#define _mm512_cmpneq_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_NEQ_UQ)
+#define _mm512_cmpneq_ps_mask(a, b) _mm512_cmp_ps_mask((a), (b), _CMP_NEQ_UQ)
  #endif
  
  #ifndef _mm512_cmplt_ps_mask
-#define _mm512_cmplt_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_LT_OS)
+#define _mm512_cmplt_ps_mask(a, b) _mm512_cmp_ps_mask((a), (b), _CMP_LT_OS)
  #endif
  
  #ifndef _mm512_cmplt_pd_mask
-#define _mm512_cmplt_pd_mask(a,b) _mm512_cmp_pd_mask((a),(b),_CMP_LT_OS)
+#define _mm512_cmplt_pd_mask(a, b) _mm512_cmp_pd_mask((a), (b), _CMP_LT_OS)
  #endif
  
  #endif
@@ -47,136 +47,108 @@
  //============================================================================
  
  static const int TARGET_SIMD_WIDTH = 16;
-using SIMD256T = SIMD256Impl::AVX2Impl;
+using SIMD256T                     = SIMD256Impl::AVX2Impl;
  
-#define SIMD_WRAPPER_1_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return intrin(a);\
-    }
+#define SIMD_WRAPPER_1_(op, intrin) \
+    static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
  
-#define SIMD_WRAPPER_1(op)  \
-    SIMD_WRAPPER_1_(op, _mm512_##op)
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
  
-#define SIMD_WRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_##intrin(a, b);\
-    }
+#define SIMD_WRAPPER_2_(op, intrin) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
  #define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
  
-#define SIMD_WRAPPERI_2_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_castsi512_ps(_mm512_##intrin(\
-            _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+#define SIMD_WRAPPERI_2_(op, intrin)                                          \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                     \
+    {                                                                         \
+        return _mm512_castsi512_ps(                                           \
+            _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
      }
  
-#define SIMD_DWRAPPER_2(op)  \
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm512_##op(a, b);\
-    }
+#define SIMD_DWRAPPER_2(op) \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
  
-#define SIMD_WRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_WRAPPER_2I_(op, intrin)                      \
+    template <int ImmT>                                   \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+    {                                                     \
+        return _mm512_##intrin(a, b, ImmT);               \
      }
-#define SIMD_WRAPPER_2I(op)  SIMD_WRAPPER_2I_(op, op)
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
  
-#define SIMD_DWRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I_(op, intrin)                        \
+    template <int ImmT>                                      \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+    {                                                        \
+        return _mm512_##intrin(a, b, ImmT);                  \
      }
-#define SIMD_DWRAPPER_2I(op)  SIMD_DWRAPPER_2I_(op, op)
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
  
-#define SIMD_WRAPPER_3(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return _mm512_##op(a, b, c);\
-    }
+#define SIMD_WRAPPER_3(op) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
  
-#define SIMD_IWRAPPER_1(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
-#define SIMD_IWRAPPER_1_8(op)  \
-    static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
+#define SIMD_IWRAPPER_1(op) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
+#define SIMD_IWRAPPER_1_8(op) \
+    static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
  
-#define SIMD_IWRAPPER_1_4(op)  \
-    static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
+#define SIMD_IWRAPPER_1_4(op) \
+    static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
  
-#define SIMD_IWRAPPER_1I_(op, intrin)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin)                \
+    template <int ImmT>                              \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) \
+    {                                                \
+        return intrin(a, ImmT);                      \
      }
  #define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
  
-#define SIMD_IWRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm512_##intrin(a, b);\
-    }
-#define SIMD_IWRAPPER_2(op)  SIMD_IWRAPPER_2_(op, op)
+#define SIMD_IWRAPPER_2_(op, intrin) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
  
-#define SIMD_IWRAPPER_2_CMP(op, cmp)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return cmp(a, b);\
-    }
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
  
-#define SIMD_IFWRAPPER_2(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin)                                   \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)        \
+    {                                                                  \
+        return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
      }
  
-#define SIMD_IWRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin)                           \
+    template <int ImmT>                                         \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+    {                                                           \
+        return _mm512_##intrin(a, b, ImmT);                     \
      }
  #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
  
  private:
-    static SIMDINLINE Integer vmask(__mmask16 m)
-    {
-        return _mm512_maskz_set1_epi32(m, -1);
-    }
+static SIMDINLINE Integer vmask(__mmask16 m)
+{
+    return _mm512_maskz_set1_epi32(m, -1);
+}
  
-    static SIMDINLINE Integer vmask(__mmask8 m)
-    {
-        return _mm512_maskz_set1_epi64(m, -1LL);
-    }
+static SIMDINLINE Integer vmask(__mmask8 m)
+{
+    return _mm512_maskz_set1_epi64(m, -1LL);
+}
  
  public:
  //-----------------------------------------------------------------------
  // Single precision floating point arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps);     // return a + b
-SIMD_WRAPPER_2(div_ps);     // return a / b
-SIMD_WRAPPER_3(fmadd_ps);   // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps);   // return (a * b) - c
-SIMD_WRAPPER_2(max_ps);     // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps);     // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps);     // return a * b
-SIMD_WRAPPER_1_(rcp_ps, _mm512_rcp14_ps);       // return 1.0f / a
-SIMD_WRAPPER_1_(rsqrt_ps, _mm512_rsqrt14_ps);   // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps);     // return a - b
+SIMD_WRAPPER_2(add_ps);                       // return a + b
+SIMD_WRAPPER_2(div_ps);                       // return a / b
+SIMD_WRAPPER_3(fmadd_ps);                     // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps);                     // return (a * b) - c
+SIMD_WRAPPER_2(max_ps);                       // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps);                       // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps);                       // return a * b
+SIMD_WRAPPER_1_(rcp_ps, _mm512_rcp14_ps);     // return 1.0f / a
+SIMD_WRAPPER_1_(rsqrt_ps, _mm512_rsqrt14_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps);                       // return a - b
  
  template <RoundMode RMT>
  static SIMDINLINE Float SIMDCALL round_ps(Float a)
@@ -184,52 +156,57 @@ static SIMDINLINE Float SIMDCALL round_ps(Float a)
      return _mm512_roundscale_ps(a, static_cast<int>(RMT));
  }
  
-static SIMDINLINE Float SIMDCALL ceil_ps(Float a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float a)
+{
+    return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float a)
+{
+    return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
  
  //-----------------------------------------------------------------------
  // Integer (various width) arithmetic operations
  //-----------------------------------------------------------------------
  SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
  SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
-//SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
-//SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+// SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
+// SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
  SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
  SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
  SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
  SIMD_IWRAPPER_2(min_epu32); // return (a < b) ? a : b (uint32)
  SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
  
-                            // return (a * b) & 0xFFFFFFFF
-                            //
-                            // Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
-                            // and store the low 32 bits of the intermediate integers in dst.
+// return (a * b) & 0xFFFFFFFF
+//
+// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
+// and store the low 32 bits of the intermediate integers in dst.
  SIMD_IWRAPPER_2(mullo_epi32);
  SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
  SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-//SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+// SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
  
  //-----------------------------------------------------------------------
  // Logical operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_2_(and_si, and_si512);        // return a & b       (int)
-SIMD_IWRAPPER_2_(andnot_si, andnot_si512);  // return (~a) & b    (int)
-SIMD_IWRAPPER_2_(or_si, or_si512);          // return a | b       (int)
-SIMD_IWRAPPER_2_(xor_si, xor_si512);        // return a ^ b       (int)
+SIMD_IWRAPPER_2_(and_si, and_si512);       // return a & b       (int)
+SIMD_IWRAPPER_2_(andnot_si, andnot_si512); // return (~a) & b    (int)
+SIMD_IWRAPPER_2_(or_si, or_si512);         // return a | b       (int)
+SIMD_IWRAPPER_2_(xor_si, xor_si512);       // return a ^ b       (int)
  
  // SIMD_WRAPPER_2(and_ps);                     // return a & b       (float treated as int)
  // SIMD_WRAPPER_2(andnot_ps);                  // return (~a) & b    (float treated as int)
  // SIMD_WRAPPER_2(or_ps);                      // return a | b       (float treated as int)
  // SIMD_WRAPPER_2(xor_ps);                     // return a ^ b       (float treated as int)
  
-
  //-----------------------------------------------------------------------
  // Shift operations
  //-----------------------------------------------------------------------
-SIMD_IWRAPPER_1I(slli_epi32);               // return a << ImmT
+SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT
  SIMD_IWRAPPER_2(sllv_epi32);
-SIMD_IWRAPPER_1I(srai_epi32);               // return a >> ImmT   (int32)
-SIMD_IWRAPPER_1I(srli_epi32);               // return a >> ImmT   (uint32)
+SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT   (int32)
+SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT   (uint32)
  
  #if 0
  SIMD_IWRAPPER_1I_(srli_si, srli_si512);     // return a >> (ImmT*8) (uint)
@@ -246,32 +223,32 @@ SIMD_IWRAPPER_2(srlv_epi32);
  //-----------------------------------------------------------------------
  // Conversion operations
  //-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double a)   // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double a) // return *(Float*)(&a)
  {
      return _mm512_castpd_ps(a);
  }
  
-static SIMDINLINE Integer SIMDCALL castps_si(Float a)   // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float a) // return *(Integer*)(&a)
  {
      return _mm512_castps_si512(a);
  }
  
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer a)   // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer a) // return *(Double*)(&a)
  {
      return _mm512_castsi512_pd(a);
  }
  
-static SIMDINLINE Double SIMDCALL castps_pd(Float a)   // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float a) // return *(Double*)(&a)
  {
      return _mm512_castps_pd(a);
  }
  
-static SIMDINLINE Integer SIMDCALL castpd_si(Double a)   // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castpd_si(Double a) // return *(Integer*)(&a)
  {
      return _mm512_castpd_si512(a);
  }
  
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer a)   // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer a) // return *(Float*)(&a)
  {
      return _mm512_castsi512_ps(a);
  }
@@ -281,18 +258,19 @@ static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (float)a    (i
      return _mm512_cvtepi32_ps(a);
  }
  
-//SIMD_IWRAPPER_1_8(cvtepu8_epi16);     // return (int16)a    (uint8 --> int16)
-SIMD_IWRAPPER_1_4(cvtepu8_epi32);     // return (int32)a    (uint8 --> int32)
-SIMD_IWRAPPER_1_8(cvtepu16_epi32);    // return (int32)a    (uint16 --> int32)
-SIMD_IWRAPPER_1_4(cvtepu16_epi64);    // return (int64)a    (uint16 --> int64)
-SIMD_IWRAPPER_1_8(cvtepu32_epi64);    // return (int64)a    (uint32 --> int64)
+// SIMD_IWRAPPER_1_8(cvtepu8_epi16);     // return (int16)a    (uint8 --> int16)
+SIMD_IWRAPPER_1_4(cvtepu8_epi32);  // return (int32)a    (uint8 --> int32)
+SIMD_IWRAPPER_1_8(cvtepu16_epi32); // return (int32)a    (uint16 --> int32)
+SIMD_IWRAPPER_1_4(cvtepu16_epi64); // return (int64)a    (uint16 --> int64)
+SIMD_IWRAPPER_1_8(cvtepu32_epi64); // return (int64)a    (uint32 --> int64)
  
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a)            // return (int32)a    (float --> int32)
+static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float a) // return (int32)a    (float --> int32)
  {
      return _mm512_cvtps_epi32(a);
  }
  
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a)           // return (int32)a    (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvttps_epi32(Float a) // return (int32)a    (rnd_to_zero(float) --> int32)
  {
      return _mm512_cvttps_epi32(a);
  }
@@ -300,13 +278,13 @@ static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float a)           // return (in
  //-----------------------------------------------------------------------
  // Comparison operations
  //-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
+template <CompareType CmpTypeT>
  static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float a, Float b)
  {
      return _mm512_cmp_ps_mask(a, b, static_cast<const int>(CmpTypeT));
  }
  
-template<CompareType CmpTypeT>
+template <CompareType CmpTypeT>
  static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT) b
  {
      // Legacy vector mask generator
@@ -314,21 +292,39 @@ static SIMDINLINE Float SIMDCALL cmp_ps(Float a, Float b) // return a (CmpTypeT)
      return castsi_ps(vmask(result));
  }
  
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b)
+{
+    return cmp_ps<CompareType::LE_OQ>(a, b);
+}
  
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
  static SIMDINLINE Integer SIMDCALL cmp_epi32(Integer a, Integer b)
  {
      // Legacy vector mask generator
      __mmask16 result = _mm512_cmp_epi32_mask(a, b, static_cast<const int>(CmpTypeT));
      return vmask(result);
  }
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
  static SIMDINLINE Integer SIMDCALL cmp_epi64(Integer a, Integer b)
  {
      // Legacy vector mask generator
@@ -336,22 +332,24 @@ static SIMDINLINE Integer SIMDCALL cmp_epi64(Integer a, Integer b)
      return vmask(result);
  }
  
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8<CompareTypeInt::EQ>);    // return a == b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>);   // return a == b (int16)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32<CompareTypeInt::EQ>);   // return a == b (int32)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64<CompareTypeInt::EQ>);   // return a == b (int64)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8<CompareTypeInt::GT>);    // return a > b (int8)
-//SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>);   // return a > b (int16)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi32, cmp_epi32<CompareTypeInt::GT>);   // return a > b (int32)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi64, cmp_epi64<CompareTypeInt::GT>);   // return a > b (int64)
-SIMD_IWRAPPER_2_CMP(cmplt_epi32, cmp_epi32<CompareTypeInt::LT>);   // return a < b (int32)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8<CompareTypeInt::EQ>);    // return a == b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>);   // return a == b (int16)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32<CompareTypeInt::EQ>); // return a == b (int32)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64<CompareTypeInt::EQ>); // return a == b (int64)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8<CompareTypeInt::GT>);    // return a > b (int8)
+// SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>);   // return a > b (int16)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi32, cmp_epi32<CompareTypeInt::GT>); // return a > b (int32)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi64, cmp_epi64<CompareTypeInt::GT>); // return a > b (int64)
+SIMD_IWRAPPER_2_CMP(cmplt_epi32, cmp_epi32<CompareTypeInt::LT>); // return a < b (int32)
  
-static SIMDINLINE bool SIMDCALL testz_ps(Float a, Float b)  // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL testz_ps(Float a,
+                                         Float b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
  {
      return (0 == static_cast<int>(_mm512_test_epi32_mask(castps_si(a), castps_si(b))));
  }
  
-static SIMDINLINE bool SIMDCALL testz_si(Integer a, Integer b)  // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL testz_si(Integer a,
+                                         Integer b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
  {
      return (0 == static_cast<int>(_mm512_test_epi32_mask(a, b)));
  }
@@ -376,75 +374,82 @@ static SIMDINLINE Float blendv_ps(Float a, Float b, Float mask) // return mask ?
      return _mm512_mask_blend_ps(__mmask16(movemask_ps(mask)), a, b);
  }
  
-
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Float mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+                                                Integer b,
+                                                Float   mask) // return mask ? b : a (int)
  {
      return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), mask));
  }
  
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a, Integer b, Integer mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer a,
+                                                Integer b,
+                                                Integer mask) // return mask ? b : a (int)
  {
      return castps_si(blendv_ps(castsi_ps(a), castsi_ps(b), castsi_ps(mask)));
  }
  
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p)  // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+                        broadcast_ss(float const* p) // return *p (all elements in vector get same value)
  {
      return _mm512_set1_ps(*p);
  }
  
-template<int imm>
+template <int imm>
  static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float a)
  {
      return _mm256_castpd_ps(_mm512_extractf64x4_pd(_mm512_castps_pd(a), imm));
  }
  
-template<int imm>
+template <int imm>
  static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double a)
  {
      return _mm512_extractf64x4_pd(a, imm);
  }
  
-template<int imm>
+template <int imm>
  static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer a)
  {
      return _mm512_extracti64x4_epi64(a, imm);
  }
  
-template<int imm>
+template <int imm>
  static SIMDINLINE Float SIMDCALL insert_ps(Float a, SIMD256Impl::Float b)
  {
      return _mm512_castpd_ps(_mm512_insertf64x4(_mm512_castps_pd(a), _mm256_castps_pd(b), imm));
  }
  
-template<int imm>
+template <int imm>
  static SIMDINLINE Double SIMDCALL insert_pd(Double a, SIMD256Impl::Double b)
  {
      return _mm512_insertf64x4(a, b, imm);
  }
  
-template<int imm>
+template <int imm>
  static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b)
  {
      return _mm512_inserti64x4(a, b, imm);
  }
  
-// SIMD_IWRAPPER_2(packs_epi16);   // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16
-// SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32
-// SIMD_IWRAPPER_2(packus_epi16);  // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16
-// SIMD_IWRAPPER_2(packus_epi32);  // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32
+// SIMD_IWRAPPER_2(packs_epi16);   // See documentation for _mm512_packs_epi16 and
+// _mm512_packs_epi16 SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm512_packs_epi32
+// and _mm512_packs_epi32 SIMD_IWRAPPER_2(packus_epi16);  // See documentation for
+// _mm512_packus_epi16 and _mm512_packus_epi16 SIMD_IWRAPPER_2(packus_epi32);  // See documentation
+// for _mm512_packus_epi32 and _mm512_packus_epi32
  
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
  {
      return _mm512_permute_ps(a, ImmT);
  }
  
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Integer SIMDCALL
+                          permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
  {
      return _mm512_permutexvar_epi32(swiz, a);
  }
  
-static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+                        permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float)
  {
      return _mm512_permutexvar_ps(swiz, a);
  }
@@ -455,11 +460,11 @@ SIMD_IWRAPPER_2I_(permute2f128_si, shuffle_i32x4);
  
  SIMD_IWRAPPER_1I(shuffle_epi32);
  
-//SIMD_IWRAPPER_2(shuffle_epi8);
+// SIMD_IWRAPPER_2(shuffle_epi8);
  SIMD_DWRAPPER_2I(shuffle_pd);
  SIMD_WRAPPER_2I(shuffle_ps);
  
-template<int ImmT>
+template <int ImmT>
  static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
  {
      return castpd_si(shuffle_pd<ImmT>(castsi_pd(a), castsi_pd(b)));
@@ -467,73 +472,79 @@ static SIMDINLINE Integer SIMDCALL shuffle_epi64(Integer a, Integer b)
  
  SIMD_IWRAPPER_2(unpackhi_epi16);
  
-//SIMD_IFWRAPPER_2(unpackhi_epi32, _mm512_unpackhi_ps);
+// SIMD_IFWRAPPER_2(unpackhi_epi32, _mm512_unpackhi_ps);
  static SIMDINLINE Integer SIMDCALL unpackhi_epi32(Integer a, Integer b)
  {
      return castps_si(_mm512_unpackhi_ps(castsi_ps(a), castsi_ps(b)));
  }
  
  SIMD_IWRAPPER_2(unpackhi_epi64);
-//SIMD_IWRAPPER_2(unpackhi_epi8);
+// SIMD_IWRAPPER_2(unpackhi_epi8);
  SIMD_DWRAPPER_2(unpackhi_pd);
  SIMD_WRAPPER_2(unpackhi_ps);
-//SIMD_IWRAPPER_2(unpacklo_epi16);
+// SIMD_IWRAPPER_2(unpacklo_epi16);
  SIMD_IFWRAPPER_2(unpacklo_epi32, unpacklo_ps);
  SIMD_IWRAPPER_2(unpacklo_epi64);
-//SIMD_IWRAPPER_2(unpacklo_epi8);
+// SIMD_IWRAPPER_2(unpacklo_epi8);
  SIMD_DWRAPPER_2(unpacklo_pd);
  SIMD_WRAPPER_2(unpacklo_ps);
  
  //-----------------------------------------------------------------------
  // Load / store operations
  //-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
      return _mm512_i32gather_ps(idx, p, static_cast<int>(ScaleT));
  }
  
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p)  // return *p    (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+                        load1_ps(float const* p) // return *p    (broadcast 1 value to all elements)
  {
      return broadcast_ss(p);
  }
  
-static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+                        load_ps(float const* p) // return *p    (loads SIMD width elements from memory)
  {
      return _mm512_load_ps(p);
  }
  
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
  {
      return _mm512_load_si512(&p->v);
  }
  
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+                        loadu_ps(float const* p) // return *p    (same as load_ps but allows for unaligned mem)
  {
      return _mm512_loadu_ps(p);
  }
  
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p    (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+                          loadu_si(Integer const* p) // return *p    (same as load_si but allows for unaligned mem)
  {
      return _mm512_loadu_si512(p);
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask)
  {
      __mmask16 k = _mm512_cmpneq_ps_mask(mask, setzero_ps());
  
      return _mm512_mask_i32gather_ps(old, k, idx, p, static_cast<int>(ScaleT));
  }
  
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer mask, Float src)
  {
      Mask m = _mm512_cmplt_epi32_mask(mask, setzero_si());
      _mm512_mask_store_ps(p, m, src);
  }
  
-//static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
+// static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
  //{
  //    __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
  //    return static_cast<uint64_t>(m);
@@ -565,78 +576,99 @@ static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements a
      return _mm512_set1_epi8(i);
  }
  
-static SIMDINLINE Float SIMDCALL set1_ps(float f)  // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
  {
      return _mm512_set1_ps(f);
  }
  
-static SIMDINLINE Double SIMDCALL setzero_pd()      // return 0 (double)
+static SIMDINLINE Double SIMDCALL setzero_pd() // return 0 (double)
  {
      return _mm512_setzero_pd();
  }
  
-static SIMDINLINE Float SIMDCALL setzero_ps()      // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
  {
      return _mm512_setzero_ps();
  }
  
-static SIMDINLINE Integer SIMDCALL setzero_si()      // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
  {
      return _mm512_setzero_si512();
  }
  
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float a)    // *p = a   (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+                       store_ps(float* p, Float a) // *p = a   (stores all elements contiguously in memory)
  {
      _mm512_store_ps(p, a);
  }
  
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a)   // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer a) // *p = a
  {
      _mm512_store_si512(&p->v, a);
  }
  
-static SIMDINLINE void SIMDCALL storeu_si(Integer *p, Integer a) // *p = a    (same as store_si but allows for unaligned mem)
+static SIMDINLINE void SIMDCALL
+                       storeu_si(Integer* p, Integer a) // *p = a    (same as store_si but allows for unaligned mem)
  {
      _mm512_storeu_si512(&p->v, a);
  }
  
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float a)   // *p = a   (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+                       stream_ps(float* p, Float a) // *p = a   (same as store_ps, but doesn't keep memory in cache)
  {
      _mm512_stream_ps(p, a);
  }
  
-static SIMDINLINE Integer SIMDCALL set_epi32(
-    int i15, int i14, int i13, int i12, int i11, int i10, int i9, int i8,
-    int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL set_epi32(int i15,
+                                             int i14,
+                                             int i13,
+                                             int i12,
+                                             int i11,
+                                             int i10,
+                                             int i9,
+                                             int i8,
+                                             int i7,
+                                             int i6,
+                                             int i5,
+                                             int i4,
+                                             int i3,
+                                             int i2,
+                                             int i1,
+                                             int i0)
  {
-    return _mm512_set_epi32(
-        i15, i14, i13, i12, i11, i10, i9, i8,
-        i7, i6, i5, i4, i3, i2, i1, i0);
+    return _mm512_set_epi32(i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
-static SIMDINLINE Integer SIMDCALL set_epi32(
-    int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL
+                          set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
  {
-    return set_epi32(
-        0, 0, 0, 0, 0, 0, 0, 0,
-        i7, i6, i5, i4, i3, i2, i1, i0);
+    return set_epi32(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
-static SIMDINLINE Float SIMDCALL set_ps(
-    float i15, float i14, float i13, float i12, float i11, float i10, float i9, float i8,
-    float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL set_ps(float i15,
+                                        float i14,
+                                        float i13,
+                                        float i12,
+                                        float i11,
+                                        float i10,
+                                        float i9,
+                                        float i8,
+                                        float i7,
+                                        float i6,
+                                        float i5,
+                                        float i4,
+                                        float i3,
+                                        float i2,
+                                        float i1,
+                                        float i0)
  {
-    return _mm512_set_ps(
-        i15, i14, i13, i12, i11, i10, i9, i8,
-        i7, i6, i5, i4, i3, i2, i1, i0);
+    return _mm512_set_ps(i15, i14, i13, i12, i11, i10, i9, i8, i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
-static SIMDINLINE Float SIMDCALL set_ps(
-    float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL
+                        set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
  {
-    return set_ps(
-        0, 0, 0, 0, 0, 0, 0, 0,
-        i7, i6, i5, i4, i3, i2, i1, i0);
+    return set_ps(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
  static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
@@ -665,4 +697,3 @@ static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
  #undef SIMD_IWRAPPER_2
  #undef SIMD_IWRAPPER_2_
  #undef SIMD_IWRAPPER_2I
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl

index fed6307f4bc3c4e9ac97c3d5c51f4edc1069422b..82aa2bb41739614230395183c8be600af4705431 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -29,139 +29,111 @@
  //
  //============================================================================
  
-#define SIMD_WRAPPER_1_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return intrin(a);\
-    }
+#define SIMD_WRAPPER_1_(op, intrin) \
+    static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
  
-#define SIMD_WRAPPER_1(op)  \
-    SIMD_WRAPPER_1_(op, _mm512_##op)
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
  
-#define SIMD_WRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_##intrin(a, b);\
-    }
+#define SIMD_WRAPPER_2_(op, intrin) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
  #define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
  
-#define SIMD_WRAPPERI_2_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_castsi512_ps(_mm512_##intrin(\
-            _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+#define SIMD_WRAPPERI_2_(op, intrin)                                          \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                     \
+    {                                                                         \
+        return _mm512_castsi512_ps(                                           \
+            _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
      }
  
-#define SIMD_DWRAPPER_2(op)  \
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm512_##op(a, b);\
-    }
+#define SIMD_DWRAPPER_2(op) \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
  
-#define SIMD_WRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_WRAPPER_2I_(op, intrin)                      \
+    template <int ImmT>                                   \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+    {                                                     \
+        return _mm512_##intrin(a, b, ImmT);               \
      }
-#define SIMD_WRAPPER_2I(op)  SIMD_WRAPPER_2I_(op, op)
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
  
-#define SIMD_DWRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I_(op, intrin)                        \
+    template <int ImmT>                                      \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+    {                                                        \
+        return _mm512_##intrin(a, b, ImmT);                  \
      }
-#define SIMD_DWRAPPER_2I(op)  SIMD_DWRAPPER_2I_(op, op)
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
  
-#define SIMD_WRAPPER_3(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return _mm512_##op(a, b, c);\
-    }
+#define SIMD_WRAPPER_3(op) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
  
-#define SIMD_IWRAPPER_1(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
-#define SIMD_IWRAPPER_1_8(op)  \
-    static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
+#define SIMD_IWRAPPER_1(op) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
+#define SIMD_IWRAPPER_1_8(op) \
+    static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
  
-#define SIMD_IWRAPPER_1_4(op)  \
-    static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
+#define SIMD_IWRAPPER_1_4(op) \
+    static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
  
-#define SIMD_IWRAPPER_1I_(op, intrin)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin)                \
+    template <int ImmT>                              \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) \
+    {                                                \
+        return intrin(a, ImmT);                      \
      }
  #define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
  
-#define SIMD_IWRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm512_##intrin(a, b);\
-    }
-#define SIMD_IWRAPPER_2(op)  SIMD_IWRAPPER_2_(op, op)
+#define SIMD_IWRAPPER_2_(op, intrin) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
  
-#define SIMD_IWRAPPER_2_CMP(op, cmp)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return cmp(a, b);\
-    }
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
  
-#define SIMD_IFWRAPPER_2(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin)                                   \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)        \
+    {                                                                  \
+        return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
      }
  
-#define SIMD_IWRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin)                           \
+    template <int ImmT>                                         \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+    {                                                           \
+        return _mm512_##intrin(a, b, ImmT);                     \
      }
  #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
  
  private:
-    static SIMDINLINE Integer vmask(__mmask32 m)
-    {
-        return _mm512_maskz_set1_epi16(m, -1);
-    }
-    static SIMDINLINE Integer vmask(__mmask64 m)
-    {
-        return _mm512_maskz_set1_epi8(m, -1);
-    }
-public:
+static SIMDINLINE Integer vmask(__mmask32 m)
+{
+    return _mm512_maskz_set1_epi16(m, -1);
+}
+static SIMDINLINE Integer vmask(__mmask64 m)
+{
+    return _mm512_maskz_set1_epi8(m, -1);
+}
  
-SIMD_IWRAPPER_2(add_epi8);                  // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8);                 // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
-SIMD_IWRAPPER_2(subs_epu8);                 // return (b > a) ? 0 : (a - b) (uint8)
+public:
+SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
+SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
  
-SIMD_WRAPPER_2(and_ps);                     // return a & b       (float treated as int)
-SIMD_WRAPPER_2(andnot_ps);                  // return (~a) & b    (float treated as int)
-SIMD_WRAPPER_2(or_ps);                      // return a | b       (float treated as int)
-SIMD_WRAPPER_2(xor_ps);                     // return a ^ b       (float treated as int)
+SIMD_WRAPPER_2(and_ps);    // return a & b       (float treated as int)
+SIMD_WRAPPER_2(andnot_ps); // return (~a) & b    (float treated as int)
+SIMD_WRAPPER_2(or_ps);     // return a | b       (float treated as int)
+SIMD_WRAPPER_2(xor_ps);    // return a ^ b       (float treated as int)
  
-SIMD_IWRAPPER_1_8(cvtepu8_epi16);           // return (int16)a    (uint8 --> int16)
+SIMD_IWRAPPER_1_8(cvtepu8_epi16); // return (int16)a    (uint8 --> int16)
  
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
  static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b)
  {
      // Legacy vector mask generator
      __mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast<const int>(CmpTypeT));
      return vmask(result);
  }
-template<CompareTypeInt CmpTypeT>
+template <CompareTypeInt CmpTypeT>
  static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
  {
      // Legacy vector mask generator
@@ -169,19 +141,19 @@ static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
      return vmask(result);
  }
  
-SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8<CompareTypeInt::EQ>);    // return a == b (int8)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>);   // return a == b (int16)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8<CompareTypeInt::GT>);    // return a > b (int8)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>);   // return a > b (int16)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi8, cmp_epi8<CompareTypeInt::EQ>);   // return a == b (int8)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>); // return a == b (int16)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi8, cmp_epi8<CompareTypeInt::GT>);   // return a > b (int8)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>); // return a > b (int16)
  
-SIMD_IWRAPPER_2(packs_epi16);               // See documentation for _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32);               // See documentation for _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16);              // See documentation for _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32);              // See documentation for _mm512_packus_epi32
+SIMD_IWRAPPER_2(packs_epi16);  // See documentation for _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32);  // See documentation for _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32
  
-SIMD_IWRAPPER_2(unpackhi_epi8);             // See documentation for _mm512_unpackhi_epi8
-SIMD_IWRAPPER_2(unpacklo_epi16);            // See documentation for _mm512_unpacklo_epi16
-SIMD_IWRAPPER_2(unpacklo_epi8);             // See documentation for _mm512_unpacklo_epi8
+SIMD_IWRAPPER_2(unpackhi_epi8);  // See documentation for _mm512_unpackhi_epi8
+SIMD_IWRAPPER_2(unpacklo_epi16); // See documentation for _mm512_unpacklo_epi16
+SIMD_IWRAPPER_2(unpacklo_epi8);  // See documentation for _mm512_unpacklo_epi8
  
  SIMD_IWRAPPER_2(shuffle_epi8);
  
@@ -191,8 +163,6 @@ static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
      return static_cast<uint64_t>(m);
  }
  
-
-
  #undef SIMD_WRAPPER_1_
  #undef SIMD_WRAPPER_1
  #undef SIMD_WRAPPER_2
@@ -214,4 +184,3 @@ static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
  #undef SIMD_IWRAPPER_2
  #undef SIMD_IWRAPPER_2_
  #undef SIMD_IWRAPPER_2I
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl

index 690ab386b46e648a4ffee33e90ab166064120200..9ec3ff6c6b11e41ffdb7e03fbccd89c92dc744d9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -29,113 +29,85 @@
  //
  //============================================================================
  
-#define SIMD_WRAPPER_1_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a)   \
-    {\
-        return intrin(a);\
-    }
+#define SIMD_WRAPPER_1_(op, intrin) \
+    static SIMDINLINE Float SIMDCALL op(Float a) { return intrin(a); }
  
-#define SIMD_WRAPPER_1(op)  \
-    SIMD_WRAPPER_1_(op, _mm512_##op)
+#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, _mm512_##op)
  
-#define SIMD_WRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_##intrin(a, b);\
-    }
+#define SIMD_WRAPPER_2_(op, intrin) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) { return _mm512_##intrin(a, b); }
  #define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op)
  
-#define SIMD_WRAPPERI_2_(op, intrin)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_castsi512_ps(_mm512_##intrin(\
-            _mm512_castps_si512(a), _mm512_castps_si512(b)));\
+#define SIMD_WRAPPERI_2_(op, intrin)                                          \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b)                     \
+    {                                                                         \
+        return _mm512_castsi512_ps(                                           \
+            _mm512_##intrin(_mm512_castps_si512(a), _mm512_castps_si512(b))); \
      }
  
-#define SIMD_DWRAPPER_2(op)  \
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm512_##op(a, b);\
-    }
+#define SIMD_DWRAPPER_2(op) \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) { return _mm512_##op(a, b); }
  
-#define SIMD_WRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_WRAPPER_2I_(op, intrin)                      \
+    template <int ImmT>                                   \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b) \
+    {                                                     \
+        return _mm512_##intrin(a, b, ImmT);               \
      }
-#define SIMD_WRAPPER_2I(op)  SIMD_WRAPPER_2I_(op, op)
+#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op)
  
-#define SIMD_DWRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Double SIMDCALL op(Double a, Double b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_DWRAPPER_2I_(op, intrin)                        \
+    template <int ImmT>                                      \
+    static SIMDINLINE Double SIMDCALL op(Double a, Double b) \
+    {                                                        \
+        return _mm512_##intrin(a, b, ImmT);                  \
      }
-#define SIMD_DWRAPPER_2I(op)  SIMD_DWRAPPER_2I_(op, op)
+#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op)
  
-#define SIMD_WRAPPER_3(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c)   \
-    {\
-        return _mm512_##op(a, b, c);\
-    }
+#define SIMD_WRAPPER_3(op) \
+    static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) { return _mm512_##op(a, b, c); }
  
-#define SIMD_IWRAPPER_1(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
-#define SIMD_IWRAPPER_1_8(op)  \
-    static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
+#define SIMD_IWRAPPER_1(op) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) { return _mm512_##op(a); }
+#define SIMD_IWRAPPER_1_8(op) \
+    static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) { return _mm512_##op(a); }
  
-#define SIMD_IWRAPPER_1_4(op)  \
-    static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a)   \
-    {\
-        return _mm512_##op(a);\
-    }
+#define SIMD_IWRAPPER_1_4(op) \
+    static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) { return _mm512_##op(a); }
  
-#define SIMD_IWRAPPER_1I_(op, intrin)  \
-    template<int ImmT> \
-    static SIMDINLINE Integer SIMDCALL op(Integer a)   \
-    {\
-        return intrin(a, ImmT);\
+#define SIMD_IWRAPPER_1I_(op, intrin)                \
+    template <int ImmT>                              \
+    static SIMDINLINE Integer SIMDCALL op(Integer a) \
+    {                                                \
+        return intrin(a, ImmT);                      \
      }
  #define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op)
  
-#define SIMD_IWRAPPER_2_(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm512_##intrin(a, b);\
-    }
-#define SIMD_IWRAPPER_2(op)  SIMD_IWRAPPER_2_(op, op)
+#define SIMD_IWRAPPER_2_(op, intrin) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return _mm512_##intrin(a, b); }
+#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op)
  
-#define SIMD_IWRAPPER_2_CMP(op, cmp)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return cmp(a, b);\
-    }
+#define SIMD_IWRAPPER_2_CMP(op, cmp) \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) { return cmp(a, b); }
  
-#define SIMD_IFWRAPPER_2(op, intrin)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\
+#define SIMD_IFWRAPPER_2(op, intrin)                                   \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)        \
+    {                                                                  \
+        return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b))); \
      }
  
-#define SIMD_IWRAPPER_2I_(op, intrin)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b)   \
-    {\
-        return _mm512_##intrin(a, b, ImmT);\
+#define SIMD_IWRAPPER_2I_(op, intrin)                           \
+    template <int ImmT>                                         \
+    static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \
+    {                                                           \
+        return _mm512_##intrin(a, b, ImmT);                     \
      }
  #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
  
-SIMD_WRAPPERI_2_(and_ps, and_epi32);          // return a & b       (float treated as int)
-SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32);    // return (~a) & b    (float treated as int)
-SIMD_WRAPPERI_2_(or_ps, or_epi32);            // return a | b       (float treated as int)
-SIMD_WRAPPERI_2_(xor_ps, xor_epi32);          // return a ^ b       (float treated as int)
+SIMD_WRAPPERI_2_(and_ps, and_epi32);       // return a & b       (float treated as int)
+SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b    (float treated as int)
+SIMD_WRAPPERI_2_(or_ps, or_epi32);         // return a | b       (float treated as int)
+SIMD_WRAPPERI_2_(xor_ps, xor_epi32);       // return a ^ b       (float treated as int)
  
  #undef SIMD_WRAPPER_1_
  #undef SIMD_WRAPPER_1
@@ -158,4 +130,3 @@ SIMD_WRAPPERI_2_(xor_ps, xor_epi32);          // return a ^ b       (float treat
  #undef SIMD_IWRAPPER_2
  #undef SIMD_IWRAPPER_2_
  #undef SIMD_IWRAPPER_2I
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl

index 3e36ce5bd3654a6d0a80b1dfc9c1a6b75470317d..f9d4b8c3902b80c033ef9d0a38ae8471de466c6a 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl

index 3e36ce5bd3654a6d0a80b1dfc9c1a6b75470317d..f9d4b8c3902b80c033ef9d0a38ae8471de466c6a 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl

index 3e36ce5bd3654a6d0a80b1dfc9c1a6b75470317d..f9d4b8c3902b80c033ef9d0a38ae8471de466c6a 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX512_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl

index 55981dceba1d09d041a064defc9f6601e6fbfb7f..91705f2646d1ea777e69279e48c6e7510295cae9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
@@ -29,149 +29,143 @@
  //============================================================================
  
  static const int TARGET_SIMD_WIDTH = 8;
-using SIMD128T = SIMD128Impl::AVXImpl;
-
-#define SIMD_WRAPPER_1(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float const &a)   \
-    {\
-        return Float\
-        {\
-            SIMD256T::op(a.v8[0]),\
-            SIMD256T::op(a.v8[1]),\
-        };\
+using SIMD128T                     = SIMD128Impl::AVXImpl;
+
+#define SIMD_WRAPPER_1(op)                              \
+    static SIMDINLINE Float SIMDCALL op(Float const& a) \
+    {                                                   \
+        return Float{                                   \
+            SIMD256T::op(a.v8[0]),                      \
+            SIMD256T::op(a.v8[1]),                      \
+        };                                              \
      }
  
-#define SIMD_WRAPPER_2(op)  \
-    static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b)    \
-    {\
-        return Float\
-        {\
-            SIMD256T::op(a.v8[0], b.v8[0]),\
-            SIMD256T::op(a.v8[1], b.v8[1]),\
-        };\
+#define SIMD_WRAPPER_2(op)                                              \
+    static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+    {                                                                   \
+        return Float{                                                   \
+            SIMD256T::op(a.v8[0], b.v8[0]),                             \
+            SIMD256T::op(a.v8[1], b.v8[1]),                             \
+        };                                                              \
      }
  
-#define SIMD_WRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b)   \
-    {\
-        return Float\
-        {\
-            SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]),\
-            SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]),\
-        };\
+#define SIMD_WRAPPER_2I(op)                                                              \
+    template <int ImmT>                                                                  \
+    static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b)                  \
+    {                                                                                    \
+        return Float{                                                                    \
+            SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]),                        \
+            SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]), \
+        };                                                                               \
      }
  
-#define SIMD_WRAPPER_2I_1(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b)   \
-    {\
-        return Float\
-        {\
-            SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]),\
-            SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]),\
-        };\
+#define SIMD_WRAPPER_2I_1(op)                                           \
+    template <int ImmT>                                                 \
+    static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b) \
+    {                                                                   \
+        return Float{                                                   \
+            SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]),              \
+            SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]),              \
+        };                                                              \
      }
  
-#define SIMD_WRAPPER_3(op)  \
-        static SIMDINLINE Float SIMDCALL op(Float const &a, Float const &b, Float const &c)   \
-        {\
-            return Float\
-            {\
-                SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]),\
-                SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]),\
-            };\
-        }
-
-#define SIMD_IWRAPPER_1(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a)   \
-    {\
-        return Integer\
-        {\
-            SIMD256T::op(a.v8[0]),\
-            SIMD256T::op(a.v8[1]),\
-        };\
+#define SIMD_WRAPPER_3(op)                                                              \
+    static SIMDINLINE Float SIMDCALL op(Float const& a, Float const& b, Float const& c) \
+    {                                                                                   \
+        return Float{                                                                   \
+            SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]),                                    \
+            SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]),                                    \
+        };                                                                              \
      }
  
-#define SIMD_IWRAPPER_2(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return Integer\
-        {\
-            SIMD256T::op(a.v8[0], b.v8[0]),\
-            SIMD256T::op(a.v8[1], b.v8[1]),\
-        };\
+#define SIMD_IWRAPPER_1(op)                                 \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a) \
+    {                                                       \
+        return Integer{                                     \
+            SIMD256T::op(a.v8[0]),                          \
+            SIMD256T::op(a.v8[1]),                          \
+        };                                                  \
      }
  
-#define SIMD_IWRAPPER_2I(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return Integer\
-        {\
-            SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]),\
-            SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]),\
-        };\
+#define SIMD_IWRAPPER_2(op)                                                   \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return Integer{                                                       \
+            SIMD256T::op(a.v8[0], b.v8[0]),                                   \
+            SIMD256T::op(a.v8[1], b.v8[1]),                                   \
+        };                                                                    \
      }
  
-#define SIMD_IWRAPPER_2I_1(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return Integer\
-        {\
-            SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]),\
-            SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]),\
-        };\
+#define SIMD_IWRAPPER_2I(op)                                                             \
+    template <int ImmT>                                                                  \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b)            \
+    {                                                                                    \
+        return Integer{                                                                  \
+            SIMD256T::template op<0xFF & ImmT>(a.v8[0], b.v8[0]),                        \
+            SIMD256T::template op<0xFF & (ImmT >> TARGET_SIMD_WIDTH)>(a.v8[1], b.v8[1]), \
+        };                                                                               \
      }
  
-#define SIMD_IWRAPPER_2I_2(op)  \
-    template<int ImmT>\
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b)   \
-    {\
-        return Integer\
-        {\
-            SIMD256T::template op<0xF & ImmT>(a.v8[0], b.v8[0]),\
-            SIMD256T::template op<0xF & (ImmT >> 4)>(a.v8[1], b.v8[1]),\
-        };\
+#define SIMD_IWRAPPER_2I_1(op)                                                \
+    template <int ImmT>                                                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return Integer{                                                       \
+            SIMD256T::template op<ImmT>(a.v8[0], b.v8[0]),                    \
+            SIMD256T::template op<ImmT>(a.v8[1], b.v8[1]),                    \
+        };                                                                    \
      }
  
-#define SIMD_IWRAPPER_3(op)  \
-    static SIMDINLINE Integer SIMDCALL op(Integer const &a, Integer const &b, Integer const &c)   \
-    {\
-        return Integer\
-        {\
-            SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]),\
-            SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]),\
-        };\
+#define SIMD_IWRAPPER_2I_2(op)                                                \
+    template <int ImmT>                                                       \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b) \
+    {                                                                         \
+        return Integer{                                                       \
+            SIMD256T::template op<0xF & ImmT>(a.v8[0], b.v8[0]),              \
+            SIMD256T::template op<0xF & (ImmT >> 4)>(a.v8[1], b.v8[1]),       \
+        };                                                                    \
+    }
+
+#define SIMD_IWRAPPER_3(op)                                                                     \
+    static SIMDINLINE Integer SIMDCALL op(Integer const& a, Integer const& b, Integer const& c) \
+    {                                                                                           \
+        return Integer{                                                                         \
+            SIMD256T::op(a.v8[0], b.v8[0], c.v8[0]),                                            \
+            SIMD256T::op(a.v8[1], b.v8[1], c.v8[1]),                                            \
+        };                                                                                      \
      }
  
  //-----------------------------------------------------------------------
  // Single precision floating point arithmetic operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2(add_ps);     // return a + b
-SIMD_WRAPPER_2(div_ps);     // return a / b
-SIMD_WRAPPER_3(fmadd_ps);   // return (a * b) + c
-SIMD_WRAPPER_3(fmsub_ps);   // return (a * b) - c
-SIMD_WRAPPER_2(max_ps);     // return (a > b) ? a : b
-SIMD_WRAPPER_2(min_ps);     // return (a < b) ? a : b
-SIMD_WRAPPER_2(mul_ps);     // return a * b
-SIMD_WRAPPER_1(rcp_ps);     // return 1.0f / a
-SIMD_WRAPPER_1(rsqrt_ps);   // return 1.0f / sqrt(a)
-SIMD_WRAPPER_2(sub_ps);     // return a - b
+SIMD_WRAPPER_2(add_ps);   // return a + b
+SIMD_WRAPPER_2(div_ps);   // return a / b
+SIMD_WRAPPER_3(fmadd_ps); // return (a * b) + c
+SIMD_WRAPPER_3(fmsub_ps); // return (a * b) - c
+SIMD_WRAPPER_2(max_ps);   // return (a > b) ? a : b
+SIMD_WRAPPER_2(min_ps);   // return (a < b) ? a : b
+SIMD_WRAPPER_2(mul_ps);   // return a * b
+SIMD_WRAPPER_1(rcp_ps);   // return 1.0f / a
+SIMD_WRAPPER_1(rsqrt_ps); // return 1.0f / sqrt(a)
+SIMD_WRAPPER_2(sub_ps);   // return a - b
  
  template <RoundMode RMT>
-static SIMDINLINE Float SIMDCALL round_ps(Float const &a)
+static SIMDINLINE Float SIMDCALL round_ps(Float const& a)
  {
-    return Float
-    {
+    return Float{
          SIMD256T::template round_ps<RMT>(a.v8[0]),
          SIMD256T::template round_ps<RMT>(a.v8[1]),
      };
  }
  
-static SIMDINLINE Float SIMDCALL ceil_ps(Float const &a) { return round_ps<RoundMode::CEIL_NOEXC>(a); }
-static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<RoundMode::FLOOR_NOEXC>(a); }
+static SIMDINLINE Float SIMDCALL ceil_ps(Float const& a)
+{
+    return round_ps<RoundMode::CEIL_NOEXC>(a);
+}
+static SIMDINLINE Float SIMDCALL floor_ps(Float const& a)
+{
+    return round_ps<RoundMode::FLOOR_NOEXC>(a);
+}
  
  //-----------------------------------------------------------------------
  // Integer (various width) arithmetic operations
@@ -179,7 +173,7 @@ static SIMDINLINE Float SIMDCALL floor_ps(Float const &a) { return round_ps<Roun
  SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
  SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
  SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
  SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
  SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
  SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
@@ -207,178 +201,168 @@ SIMD_IWRAPPER_2(or_si);     // return a | b       (int)
  SIMD_WRAPPER_2(xor_ps);     // return a ^ b       (float treated as int)
  SIMD_IWRAPPER_2(xor_si);    // return a ^ b       (int)
  
-
  //-----------------------------------------------------------------------
  // Shift operations
  //-----------------------------------------------------------------------
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL slli_epi32(Integer const &a)      // return a << ImmT
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL slli_epi32(Integer const& a) // return a << ImmT
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::template slli_epi32<ImmT>(a.v8[0]),
          SIMD256T::template slli_epi32<ImmT>(a.v8[1]),
      };
  }
  
-SIMD_IWRAPPER_2(sllv_epi32);                                // return a << b      (uint32)
+SIMD_IWRAPPER_2(sllv_epi32); // return a << b      (uint32)
  
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL srai_epi32(Integer const &a)      // return a >> ImmT   (int32)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL srai_epi32(Integer const& a) // return a >> ImmT   (int32)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::template srai_epi32<ImmT>(a.v8[0]),
          SIMD256T::template srai_epi32<ImmT>(a.v8[1]),
      };
  }
  
-template<int ImmT>
-static SIMDINLINE Integer SIMDCALL srli_epi32(Integer const &a)      // return a >> ImmT   (uint32)
+template <int ImmT>
+static SIMDINLINE Integer SIMDCALL srli_epi32(Integer const& a) // return a >> ImmT   (uint32)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::template srli_epi32<ImmT>(a.v8[0]),
          SIMD256T::template srli_epi32<ImmT>(a.v8[1]),
      };
  }
  
-template<int ImmT>                                          // for each 128-bit lane:
-static SIMDINLINE Integer SIMDCALL srli_si(Integer const &a)         //  return a >> (ImmT*8) (uint)
+template <int ImmT>                                          // for each 128-bit lane:
+static SIMDINLINE Integer SIMDCALL srli_si(Integer const& a) //  return a >> (ImmT*8) (uint)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::template srli_si<ImmT>(a.v8[0]),
          SIMD256T::template srli_si<ImmT>(a.v8[1]),
      };
  }
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL srlisi_ps(Float const &a)       // same as srli_si, but with Float cast to int
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL
+                        srlisi_ps(Float const& a) // same as srli_si, but with Float cast to int
  {
-    return Float
-    {
+    return Float{
          SIMD256T::template srlisi_ps<ImmT>(a.v8[0]),
          SIMD256T::template srlisi_ps<ImmT>(a.v8[1]),
      };
  }
  
-SIMD_IWRAPPER_2(srlv_epi32);                                // return a >> b      (uint32)
+SIMD_IWRAPPER_2(srlv_epi32); // return a >> b      (uint32)
  
  //-----------------------------------------------------------------------
  // Conversion operations
  //-----------------------------------------------------------------------
-static SIMDINLINE Float SIMDCALL castpd_ps(Double const &a)              // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castpd_ps(Double const& a) // return *(Float*)(&a)
  {
-    return Float
-    {
+    return Float{
          SIMD256T::castpd_ps(a.v8[0]),
          SIMD256T::castpd_ps(a.v8[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL castps_si(Float const &a)              // return *(Integer*)(&a)
+static SIMDINLINE Integer SIMDCALL castps_si(Float const& a) // return *(Integer*)(&a)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::castps_si(a.v8[0]),
          SIMD256T::castps_si(a.v8[1]),
      };
  }
  
-static SIMDINLINE Double SIMDCALL castsi_pd(Integer const &a)              // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castsi_pd(Integer const& a) // return *(Double*)(&a)
  {
-    return Double
-    {
+    return Double{
          SIMD256T::castsi_pd(a.v8[0]),
          SIMD256T::castsi_pd(a.v8[1]),
      };
  }
  
-static SIMDINLINE Double SIMDCALL castps_pd(Float const &a)   // return *(Double*)(&a)
+static SIMDINLINE Double SIMDCALL castps_pd(Float const& a) // return *(Double*)(&a)
  {
-    return Double
-    {
+    return Double{
          SIMD256T::castps_pd(a.v8[0]),
          SIMD256T::castps_pd(a.v8[1]),
      };
  }
  
-static SIMDINLINE Float SIMDCALL castsi_ps(Integer const &a)              // return *(Float*)(&a)
+static SIMDINLINE Float SIMDCALL castsi_ps(Integer const& a) // return *(Float*)(&a)
  {
-    return Float
-    {
+    return Float{
          SIMD256T::castsi_ps(a.v8[0]),
          SIMD256T::castsi_ps(a.v8[1]),
      };
  }
  
-static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer const &a)            // return (float)a    (int32 --> float)
+static SIMDINLINE Float SIMDCALL
+                        cvtepi32_ps(Integer const& a) // return (float)a    (int32 --> float)
  {
-    return Float
-    {
+    return Float{
          SIMD256T::cvtepi32_ps(a.v8[0]),
          SIMD256T::cvtepi32_ps(a.v8[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL cvtepu8_epi16(SIMD256Impl::Integer const &a)          // return (int16)a    (uint8 --> int16)
+static SIMDINLINE Integer SIMDCALL
+                          cvtepu8_epi16(SIMD256Impl::Integer const& a) // return (int16)a    (uint8 --> int16)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::cvtepu8_epi16(a.v4[0]),
          SIMD256T::cvtepu8_epi16(a.v4[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL cvtepu8_epi32(SIMD256Impl::Integer const &a)          // return (int32)a    (uint8 --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvtepu8_epi32(SIMD256Impl::Integer const& a) // return (int32)a    (uint8 --> int32)
  {
-    return Integer
-       {
+    return Integer{
          SIMD256T::cvtepu8_epi32(a.v4[0]),
          SIMD256T::cvtepu8_epi32(SIMD128T::template srli_si<8>(a.v4[0])),
-       };
+    };
  }
  
-static SIMDINLINE Integer SIMDCALL cvtepu16_epi32(SIMD256Impl::Integer const &a)         // return (int32)a    (uint16 --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvtepu16_epi32(SIMD256Impl::Integer const& a) // return (int32)a    (uint16 --> int32)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::cvtepu16_epi32(a.v4[0]),
          SIMD256T::cvtepu16_epi32(a.v4[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL cvtepu16_epi64(SIMD256Impl::Integer const &a)         // return (int64)a    (uint16 --> int64)
+static SIMDINLINE Integer SIMDCALL
+                          cvtepu16_epi64(SIMD256Impl::Integer const& a) // return (int64)a    (uint16 --> int64)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::cvtepu16_epi64(a.v4[0]),
          SIMD256T::cvtepu16_epi64(SIMD128T::template srli_si<8>(a.v4[0])),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL cvtepu32_epi64(SIMD256Impl::Integer const &a)         // return (int64)a    (uint32 --> int64)
+static SIMDINLINE Integer SIMDCALL
+                          cvtepu32_epi64(SIMD256Impl::Integer const& a) // return (int64)a    (uint32 --> int64)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::cvtepu32_epi64(a.v4[0]),
          SIMD256T::cvtepu32_epi64(a.v4[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL cvtps_epi32(Float const &a)            // return (int32)a    (float --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvtps_epi32(Float const& a) // return (int32)a    (float --> int32)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::cvtps_epi32(a.v8[0]),
          SIMD256T::cvtps_epi32(a.v8[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a)           // return (int32)a    (rnd_to_zero(float) --> int32)
+static SIMDINLINE Integer SIMDCALL
+                          cvttps_epi32(Float const& a) // return (int32)a    (rnd_to_zero(float) --> int32)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::cvtps_epi32(a.v8[0]),
          SIMD256T::cvtps_epi32(a.v8[1]),
      };
@@ -387,126 +371,144 @@ static SIMDINLINE Integer SIMDCALL cvttps_epi32(Float const &a)           // ret
  //-----------------------------------------------------------------------
  // Comparison operations
  //-----------------------------------------------------------------------
-template<CompareType CmpTypeT>
-static SIMDINLINE Float SIMDCALL cmp_ps(Float const &a, Float const &b) // return a (CmpTypeT) b
+template <CompareType CmpTypeT>
+static SIMDINLINE Float SIMDCALL cmp_ps(Float const& a, Float const& b) // return a (CmpTypeT) b
  {
-    return Float
-    {
+    return Float{
          SIMD256T::template cmp_ps<CmpTypeT>(a.v8[0], b.v8[0]),
          SIMD256T::template cmp_ps<CmpTypeT>(a.v8[1], b.v8[1]),
      };
  }
-static SIMDINLINE Float SIMDCALL cmplt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GT_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::NEQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::EQ_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmpge_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
-static SIMDINLINE Float SIMDCALL cmple_ps(Float const &a, Float const &b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
+static SIMDINLINE Float SIMDCALL cmplt_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::LT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpgt_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::GT_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpneq_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::NEQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpeq_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::EQ_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmpge_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::GE_OQ>(a, b);
+}
+static SIMDINLINE Float SIMDCALL cmple_ps(Float const& a, Float const& b)
+{
+    return cmp_ps<CompareType::LE_OQ>(a, b);
+}
  
-template<CompareType CmpTypeT>
-static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float const &a, Float const &b)
+template <CompareType CmpTypeT>
+static SIMDINLINE Mask SIMDCALL cmp_ps_mask(Float const& a, Float const& b)
  {
      return static_cast<Mask>(movemask_ps(cmp_ps<CmpTypeT>(a, b)));
  }
  
+SIMD_IWRAPPER_2(cmpeq_epi8);  // return a == b (int8)
+SIMD_IWRAPPER_2(cmpeq_epi16); // return a == b (int16)
+SIMD_IWRAPPER_2(cmpeq_epi32); // return a == b (int32)
+SIMD_IWRAPPER_2(cmpeq_epi64); // return a == b (int64)
+SIMD_IWRAPPER_2(cmpgt_epi8);  // return a > b (int8)
+SIMD_IWRAPPER_2(cmpgt_epi16); // return a > b (int16)
+SIMD_IWRAPPER_2(cmpgt_epi32); // return a > b (int32)
+SIMD_IWRAPPER_2(cmpgt_epi64); // return a > b (int64)
+SIMD_IWRAPPER_2(cmplt_epi32); // return a < b (int32)
  
-SIMD_IWRAPPER_2(cmpeq_epi8);    // return a == b (int8)
-SIMD_IWRAPPER_2(cmpeq_epi16);   // return a == b (int16)
-SIMD_IWRAPPER_2(cmpeq_epi32);   // return a == b (int32)
-SIMD_IWRAPPER_2(cmpeq_epi64);   // return a == b (int64)
-SIMD_IWRAPPER_2(cmpgt_epi8);    // return a > b (int8)
-SIMD_IWRAPPER_2(cmpgt_epi16);   // return a > b (int16)
-SIMD_IWRAPPER_2(cmpgt_epi32);   // return a > b (int32)
-SIMD_IWRAPPER_2(cmpgt_epi64);   // return a > b (int64)
-SIMD_IWRAPPER_2(cmplt_epi32);   // return a < b (int32)
-
-static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b)  // return all_lanes_zero(a & b) ? 1 : 0 (float)
+static SIMDINLINE bool SIMDCALL
+                       testz_ps(Float const& a, Float const& b) // return all_lanes_zero(a & b) ? 1 : 0 (float)
  {
-    return  0 != (SIMD256T::testz_ps(a.v8[0], b.v8[0]) &
-                  SIMD256T::testz_ps(a.v8[1], b.v8[1]));
+    return 0 != (SIMD256T::testz_ps(a.v8[0], b.v8[0]) & SIMD256T::testz_ps(a.v8[1], b.v8[1]));
  }
  
-static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b)  // return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL
+                       testz_si(Integer const& a, Integer const& b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
  {
-    return  0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) &
-                  SIMD256T::testz_si(a.v8[1], b.v8[1]));
+    return 0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) & SIMD256T::testz_si(a.v8[1], b.v8[1]));
  }
  
  //-----------------------------------------------------------------------
  // Blend / shuffle / permute operations
  //-----------------------------------------------------------------------
-SIMD_WRAPPER_2I(blend_ps);  // return ImmT ? b : a  (float)
-SIMD_IWRAPPER_2I(blend_epi32);  // return ImmT ? b : a  (int32)
-SIMD_WRAPPER_3(blendv_ps);  // return mask ? b : a  (float)
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Float const &mask) // return mask ? b : a (int)
-{
-    return Integer
-    {
+SIMD_WRAPPER_2I(blend_ps);     // return ImmT ? b : a  (float)
+SIMD_IWRAPPER_2I(blend_epi32); // return ImmT ? b : a  (int32)
+SIMD_WRAPPER_3(blendv_ps);     // return mask ? b : a  (float)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+                                                Integer const& b,
+                                                Float const&   mask) // return mask ? b : a (int)
+{
+    return Integer{
          SIMD256T::blendv_epi32(a.v8[0], b.v8[0], mask.v8[0]),
          SIMD256T::blendv_epi32(a.v8[1], b.v8[1], mask.v8[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const &a, Integer const &b, Integer const &mask) // return mask ? b : a (int)
+static SIMDINLINE Integer SIMDCALL blendv_epi32(Integer const& a,
+                                                Integer const& b,
+                                                Integer const& mask) // return mask ? b : a (int)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::blendv_epi32(a.v8[0], b.v8[0], mask.v8[0]),
          SIMD256T::blendv_epi32(a.v8[1], b.v8[1], mask.v8[1]),
      };
  }
  
-static SIMDINLINE Float SIMDCALL broadcast_ss(float const *p)         // return *p (all elements in vector get same value)
+static SIMDINLINE Float SIMDCALL
+                        broadcast_ss(float const* p) // return *p (all elements in vector get same value)
  {
      float f = *p;
-    return Float
-    {
+    return Float{
          SIMD256T::set1_ps(f),
          SIMD256T::set1_ps(f),
      };
  }
  
-template<int imm>
-static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float const &a)
+template <int imm>
+static SIMDINLINE SIMD256Impl::Float SIMDCALL extract_ps(Float const& a)
  {
      SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
      return a.v8[imm];
  }
  
-template<int imm>
-static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double const &a)
+template <int imm>
+static SIMDINLINE SIMD256Impl::Double SIMDCALL extract_pd(Double const& a)
  {
      SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
      return a.v8[imm];
  }
  
-template<int imm>
-static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer const &a)
+template <int imm>
+static SIMDINLINE SIMD256Impl::Integer SIMDCALL extract_si(Integer const& a)
  {
      SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
      return a.v8[imm];
  }
  
-template<int imm>
-static SIMDINLINE Float SIMDCALL insert_ps(Float const &a, SIMD256Impl::Float const &b)
+template <int imm>
+static SIMDINLINE Float SIMDCALL insert_ps(Float const& a, SIMD256Impl::Float const& b)
  {
      SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
-    Float r = a;
+    Float r   = a;
      r.v8[imm] = b;
      return r;
  }
  
-template<int imm>
-static SIMDINLINE Double SIMDCALL insert_pd(Double const &a, SIMD256Impl::Double const &b)
+template <int imm>
+static SIMDINLINE Double SIMDCALL insert_pd(Double const& a, SIMD256Impl::Double const& b)
  {
      SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
-    Double r = a;
+    Double r  = a;
      r.v8[imm] = b;
      return r;
  }
  
-template<int imm>
-static SIMDINLINE Integer SIMDCALL insert_si(Integer const &a, SIMD256Impl::Integer const &b)
+template <int imm>
+static SIMDINLINE Integer SIMDCALL insert_si(Integer const& a, SIMD256Impl::Integer const& b)
  {
      SWR_ASSERT(imm == 0 || imm == 1, "Invalid control code: %d", imm);
      Integer r = a;
@@ -514,27 +516,28 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer const &a, SIMD256Impl::Inte
      return r;
  }
  
-SIMD_IWRAPPER_2(packs_epi16);      // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32);      // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16);     // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32);     // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_2(packs_epi16);  // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32);  // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
  
-template<int ImmT>
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a)
+template <int ImmT>
+static SIMDINLINE Float SIMDCALL permute_ps(Float const& a)
  {
-    return Float
-    {
+    return Float{
          SIMD256T::template permute_ps<ImmT>(a.v8[0]),
          SIMD256T::template permute_ps<ImmT>(a.v8[1]),
      };
  }
  
-static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const &a, Integer const &swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
+static SIMDINLINE Integer SIMDCALL permute_epi32(
+    Integer const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (int32)
  {
      return castps_si(permute_ps(castsi_ps(a), swiz));
  }
  
-static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz)    // return a[swiz[i]] for each 32-bit lane i (float)
+static SIMDINLINE Float SIMDCALL
+                        permute_ps(Float const& a, Integer const& swiz) // return a[swiz[i]] for each 32-bit lane i (float)
  {
      const auto mask = SIMD256T::set1_epi32(7);
  
@@ -544,10 +547,11 @@ static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz)
      auto hilo = SIMD256T::permute_ps(a.v8[0], SIMD256T::and_si(swiz.v8[1], mask));
      auto hihi = SIMD256T::permute_ps(a.v8[1], SIMD256T::and_si(swiz.v8[1], mask));
  
-    return Float
-    {
-        SIMD256T::blendv_ps(lolo, lohi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[0], mask))),
-        SIMD256T::blendv_ps(hilo, hihi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[1], mask))),
+    return Float{
+        SIMD256T::blendv_ps(
+            lolo, lohi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[0], mask))),
+        SIMD256T::blendv_ps(
+            hilo, hihi, SIMD256T::castsi_ps(SIMD256T::cmpgt_epi32(swiz.v8[1], mask))),
      };
  }
  
@@ -562,7 +566,7 @@ static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz)
  //              ESAC
  //              RETURN tmp[127:0]
  //      }
-//      
+//
  //      dst[127:0]   : = SELECT4(a[511:0], imm8[1:0])
  //      dst[255:128] : = SELECT4(a[511:0], imm8[3:2])
  //      dst[383:256] : = SELECT4(b[511:0], imm8[5:4])
@@ -574,32 +578,35 @@ static SIMDINLINE Float SIMDCALL permute_ps(Float const &a, Integer const &swiz)
  // AVX instructions for emulation.
  //
  template <int shuf>
-static SIMDINLINE Float SIMDCALL permute2f128_ps(Float const &a, Float const &b)
+static SIMDINLINE Float SIMDCALL permute2f128_ps(Float const& a, Float const& b)
  {
-    return Float
-    {
-        SIMD256T::template permute2f128_ps<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0], a.v8[1]),
-        SIMD256T::template permute2f128_ps<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0], b.v8[1]),
+    return Float{
+        SIMD256T::template permute2f128_ps<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
+                                                                                        a.v8[1]),
+        SIMD256T::template permute2f128_ps<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
+                                                                                        b.v8[1]),
      };
  }
  
  template <int shuf>
-static SIMDINLINE Double SIMDCALL permute2f128_pd(Double const &a, Double const &b)
+static SIMDINLINE Double SIMDCALL permute2f128_pd(Double const& a, Double const& b)
  {
-    return Double
-    {
-        SIMD256T::template permute2f128_pd<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0], a.v8[1]),
-        SIMD256T::template permute2f128_pd<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0], b.v8[1]),
+    return Double{
+        SIMD256T::template permute2f128_pd<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
+                                                                                        a.v8[1]),
+        SIMD256T::template permute2f128_pd<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
+                                                                                        b.v8[1]),
      };
  }
  
  template <int shuf>
-static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer const &a, Integer const &b)
+static SIMDINLINE Integer SIMDCALL permute2f128_si(Integer const& a, Integer const& b)
  {
-    return Integer
-    {
-        SIMD256T::template permute2f128_si<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0], a.v8[1]),
-        SIMD256T::template permute2f128_si<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0], b.v8[1]),
+    return Integer{
+        SIMD256T::template permute2f128_si<((shuf & 0x03) << 0) | ((shuf & 0x0C) << 2)>(a.v8[0],
+                                                                                        a.v8[1]),
+        SIMD256T::template permute2f128_si<((shuf & 0x30) >> 4) | ((shuf & 0xC0) >> 2)>(b.v8[0],
+                                                                                        b.v8[1]),
      };
  }
  
@@ -624,209 +631,193 @@ SIMD_WRAPPER_2(unpacklo_ps);
  //-----------------------------------------------------------------------
  // Load / store operations
  //-----------------------------------------------------------------------
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL i32gather_ps(float const* p, Integer const &idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        i32gather_ps(float const* p, Integer const& idx) // return *(float*)(((int8*)p) + (idx * ScaleT))
  {
-    return Float
-    {
+    return Float{
          SIMD256T::template i32gather_ps<ScaleT>(p, idx.v8[0]),
          SIMD256T::template i32gather_ps<ScaleT>(p, idx.v8[1]),
      };
  }
  
-static SIMDINLINE Float SIMDCALL load1_ps(float const *p)  // return *p    (broadcast 1 value to all elements)
+static SIMDINLINE Float SIMDCALL
+                        load1_ps(float const* p) // return *p    (broadcast 1 value to all elements)
  {
      return broadcast_ss(p);
  }
  
-static SIMDINLINE Float SIMDCALL load_ps(float const *p)   // return *p    (loads SIMD width elements from memory)
+static SIMDINLINE Float SIMDCALL
+                        load_ps(float const* p) // return *p    (loads SIMD width elements from memory)
  {
-    return Float
-    {
-        SIMD256T::load_ps(p),
-        SIMD256T::load_ps(p + TARGET_SIMD_WIDTH)
-    };
+    return Float{SIMD256T::load_ps(p), SIMD256T::load_ps(p + TARGET_SIMD_WIDTH)};
  }
  
-static SIMDINLINE Integer SIMDCALL load_si(Integer const *p)  // return *p
+static SIMDINLINE Integer SIMDCALL load_si(Integer const* p) // return *p
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::load_si(&p->v8[0]),
          SIMD256T::load_si(&p->v8[1]),
      };
  }
  
-static SIMDINLINE Float SIMDCALL loadu_ps(float const *p)  // return *p    (same as load_ps but allows for unaligned mem)
+static SIMDINLINE Float SIMDCALL
+                        loadu_ps(float const* p) // return *p    (same as load_ps but allows for unaligned mem)
  {
-    return Float
-    {
-        SIMD256T::loadu_ps(p),
-        SIMD256T::loadu_ps(p + TARGET_SIMD_WIDTH)
-    };
+    return Float{SIMD256T::loadu_ps(p), SIMD256T::loadu_ps(p + TARGET_SIMD_WIDTH)};
  }
  
-static SIMDINLINE Integer SIMDCALL loadu_si(Integer const *p) // return *p    (same as load_si but allows for unaligned mem)
+static SIMDINLINE Integer SIMDCALL
+                          loadu_si(Integer const* p) // return *p    (same as load_si but allows for unaligned mem)
  {
-    return Integer
-    {
+    return Integer{
          SIMD256T::loadu_si(&p->v8[0]),
          SIMD256T::loadu_si(&p->v8[1]),
      };
  }
  
  // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
-template<ScaleFactor ScaleT>
-static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float const &old, float const* p, Integer const &idx, Float const &mask)
+template <ScaleFactor ScaleT>
+static SIMDINLINE Float SIMDCALL
+                        mask_i32gather_ps(Float const& old, float const* p, Integer const& idx, Float const& mask)
  {
-    return Float
-    {
+    return Float{
          SIMD256T::template mask_i32gather_ps<ScaleT>(old.v8[0], p, idx.v8[0], mask.v8[0]),
          SIMD256T::template mask_i32gather_ps<ScaleT>(old.v8[1], p, idx.v8[1], mask.v8[1]),
      };
  }
  
-static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer const &mask, Float const &src)
+static SIMDINLINE void SIMDCALL maskstore_ps(float* p, Integer const& mask, Float const& src)
  {
      SIMD256T::maskstore_ps(p, mask.v8[0], src.v8[0]);
      SIMD256T::maskstore_ps(p + TARGET_SIMD_WIDTH, mask.v8[1], src.v8[1]);
  }
  
-static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer const &a)
+static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer const& a)
  {
      uint64_t mask = static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[0]));
-             mask |= static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[1])) << (TARGET_SIMD_WIDTH * 4);
+    mask |= static_cast<uint64_t>(SIMD256T::movemask_epi8(a.v8[1])) << (TARGET_SIMD_WIDTH * 4);
  
      return mask;
  }
  
-static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double const& a)
  {
      uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[0]));
-             mask |= static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[1])) << (TARGET_SIMD_WIDTH / 2);
+    mask |= static_cast<uint32_t>(SIMD256T::movemask_pd(a.v8[1])) << (TARGET_SIMD_WIDTH / 2);
  
      return mask;
  }
-static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const &a)
+static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float const& a)
  {
      uint32_t mask = static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[0]));
-             mask |= static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[1])) << TARGET_SIMD_WIDTH;
+    mask |= static_cast<uint32_t>(SIMD256T::movemask_ps(a.v8[1])) << TARGET_SIMD_WIDTH;
  
      return mask;
  }
  
  static SIMDINLINE Integer SIMDCALL set1_epi32(int i) // return i (all elements are same value)
  {
-    return Integer
-    {
-        SIMD256T::set1_epi32(i),
-        SIMD256T::set1_epi32(i)
-    };
+    return Integer{SIMD256T::set1_epi32(i), SIMD256T::set1_epi32(i)};
  }
  
  static SIMDINLINE Integer SIMDCALL set1_epi8(char i) // return i (all elements are same value)
  {
-    return Integer
-    {
-        SIMD256T::set1_epi8(i),
-        SIMD256T::set1_epi8(i)
-    };
+    return Integer{SIMD256T::set1_epi8(i), SIMD256T::set1_epi8(i)};
  }
  
-static SIMDINLINE Float SIMDCALL set1_ps(float f)  // return f (all elements are same value)
+static SIMDINLINE Float SIMDCALL set1_ps(float f) // return f (all elements are same value)
  {
-    return Float
-    {
-        SIMD256T::set1_ps(f),
-        SIMD256T::set1_ps(f)
-    };
+    return Float{SIMD256T::set1_ps(f), SIMD256T::set1_ps(f)};
  }
  
-static SIMDINLINE Float SIMDCALL setzero_ps()      // return 0 (float)
+static SIMDINLINE Float SIMDCALL setzero_ps() // return 0 (float)
  {
-    return Float
-    {
-        SIMD256T::setzero_ps(),
-        SIMD256T::setzero_ps()
-    };
+    return Float{SIMD256T::setzero_ps(), SIMD256T::setzero_ps()};
  }
  
-static SIMDINLINE Integer SIMDCALL setzero_si()      // return 0 (integer)
+static SIMDINLINE Integer SIMDCALL setzero_si() // return 0 (integer)
  {
-    return Integer
-    {
-        SIMD256T::setzero_si(),
-        SIMD256T::setzero_si()
-    };
+    return Integer{SIMD256T::setzero_si(), SIMD256T::setzero_si()};
  }
  
-static SIMDINLINE void SIMDCALL store_ps(float *p, Float const &a)    // *p = a   (stores all elements contiguously in memory)
+static SIMDINLINE void SIMDCALL
+                       store_ps(float* p, Float const& a) // *p = a   (stores all elements contiguously in memory)
  {
      SIMD256T::store_ps(p, a.v8[0]);
      SIMD256T::store_ps(p + TARGET_SIMD_WIDTH, a.v8[1]);
  }
  
-static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer const &a)   // *p = a
+static SIMDINLINE void SIMDCALL store_si(Integer* p, Integer const& a) // *p = a
  {
      SIMD256T::store_si(&p->v8[0], a.v8[0]);
      SIMD256T::store_si(&p->v8[1], a.v8[1]);
  }
  
-static SIMDINLINE void SIMDCALL stream_ps(float *p, Float const &a)   // *p = a   (same as store_ps, but doesn't keep memory in cache)
+static SIMDINLINE void SIMDCALL
+                       stream_ps(float* p, Float const& a) // *p = a   (same as store_ps, but doesn't keep memory in cache)
  {
      SIMD256T::stream_ps(p, a.v8[0]);
      SIMD256T::stream_ps(p + TARGET_SIMD_WIDTH, a.v8[1]);
  }
  
-static SIMDINLINE Integer SIMDCALL set_epi32(
-    int i15, int i14, int i13, int i12, int i11, int i10, int i9, int i8,
-    int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL set_epi32(int i15,
+                                             int i14,
+                                             int i13,
+                                             int i12,
+                                             int i11,
+                                             int i10,
+                                             int i9,
+                                             int i8,
+                                             int i7,
+                                             int i6,
+                                             int i5,
+                                             int i4,
+                                             int i3,
+                                             int i2,
+                                             int i1,
+                                             int i0)
  {
-    return Integer
-    {
-        SIMD256T::set_epi32(
-            i7, i6, i5, i4, i3, i2, i1, i0),
-        SIMD256T::set_epi32(
-            i15, i14, i13, i12, i11, i10, i9, i8)
-    };
+    return Integer{SIMD256T::set_epi32(i7, i6, i5, i4, i3, i2, i1, i0),
+                   SIMD256T::set_epi32(i15, i14, i13, i12, i11, i10, i9, i8)};
  }
  
-static SIMDINLINE Integer SIMDCALL set_epi32(
-    int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
+static SIMDINLINE Integer SIMDCALL
+                          set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0)
  {
-    return set_epi32(
-        0, 0, 0, 0, 0, 0, 0, 0,
-        i7, i6, i5, i4, i3, i2, i1, i0);
+    return set_epi32(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
-static SIMDINLINE Float SIMDCALL set_ps(
-    float i15, float i14, float i13, float i12, float i11, float i10, float i9, float i8,
-    float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL set_ps(float i15,
+                                        float i14,
+                                        float i13,
+                                        float i12,
+                                        float i11,
+                                        float i10,
+                                        float i9,
+                                        float i8,
+                                        float i7,
+                                        float i6,
+                                        float i5,
+                                        float i4,
+                                        float i3,
+                                        float i2,
+                                        float i1,
+                                        float i0)
  {
-    return Float
-    {
-        SIMD256T::set_ps(
-            i7, i6, i5, i4, i3, i2, i1, i0),
-        SIMD256T::set_ps(
-            i15, i14, i13, i12, i11, i10, i9, i8)
-    };
+    return Float{SIMD256T::set_ps(i7, i6, i5, i4, i3, i2, i1, i0),
+                 SIMD256T::set_ps(i15, i14, i13, i12, i11, i10, i9, i8)};
  }
  
-static SIMDINLINE Float SIMDCALL set_ps(
-    float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
+static SIMDINLINE Float SIMDCALL
+                        set_ps(float i7, float i6, float i5, float i4, float i3, float i2, float i1, float i0)
  {
-    return set_ps(
-        0, 0, 0, 0, 0, 0, 0, 0,
-        i7, i6, i5, i4, i3, i2, i1, i0);
+    return set_ps(0, 0, 0, 0, 0, 0, 0, 0, i7, i6, i5, i4, i3, i2, i1, i0);
  }
  
  static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask)
  {
-    return Float
-    {
-        SIMD256T::vmask_ps(mask),
-        SIMD256T::vmask_ps(mask >> TARGET_SIMD_WIDTH)
-    };
+    return Float{SIMD256T::vmask_ps(mask), SIMD256T::vmask_ps(mask >> TARGET_SIMD_WIDTH)};
  }
  
  #undef SIMD_WRAPPER_1
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl

index bc5bff477a42e8aba0c0a35978d9bf206af522f1..473934824ee3a80a8762599bed5c9a6886d87309 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl
@@ -1,28 +1,27 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #if !defined(__SIMD_LIB_AVX_HPP__)
  #error Do not include this file directly, use "simdlib.hpp" instead.
  #endif
  
  // no backwards compatibility for simd mask-enabled functions
-
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp

index df2df1b09cdd5002122c03538b2f524aea9e53a4..7902bcb2b641548efe3783e1dd7108d6eeccfe4e 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #pragma once
  #if 0
  //===========================================================================
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp

index 0fad0e1fd8cf6027d02a9f1f4fb1328b68d2c6a1..944c3c23fd3b913e86f208f8b2821ef1b396ee1b 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  #pragma once
  
  #if !defined(__cplusplus)
@@ -30,9 +30,9 @@
  #include <inttypes.h>
  #include <stdint.h>
  
-#define SIMD_ARCH_AVX       0
-#define SIMD_ARCH_AVX2      1
-#define SIMD_ARCH_AVX512    2
+#define SIMD_ARCH_AVX 0
+#define SIMD_ARCH_AVX2 1
+#define SIMD_ARCH_AVX512 2
  
  #if !defined(SIMD_ARCH)
  #define SIMD_ARCH SIMD_ARCH_AVX
@@ -55,81 +55,81 @@ namespace SIMDImpl
  {
      enum class CompareType
      {
-        EQ_OQ      = 0x00, // Equal (ordered, nonsignaling)
-        LT_OS      = 0x01, // Less-than (ordered, signaling)
-        LE_OS      = 0x02, // Less-than-or-equal (ordered, signaling)
-        UNORD_Q    = 0x03, // Unordered (nonsignaling)
-        NEQ_UQ     = 0x04, // Not-equal (unordered, nonsignaling)
-        NLT_US     = 0x05, // Not-less-than (unordered, signaling)
-        NLE_US     = 0x06, // Not-less-than-or-equal (unordered, signaling)
-        ORD_Q      = 0x07, // Ordered (nonsignaling)
-        EQ_UQ      = 0x08, // Equal (unordered, non-signaling)
-        NGE_US     = 0x09, // Not-greater-than-or-equal (unordered, signaling)
-        NGT_US     = 0x0A, // Not-greater-than (unordered, signaling)
-        FALSE_OQ   = 0x0B, // False (ordered, nonsignaling)
-        NEQ_OQ     = 0x0C, // Not-equal (ordered, non-signaling)
-        GE_OS      = 0x0D, // Greater-than-or-equal (ordered, signaling)
-        GT_OS      = 0x0E, // Greater-than (ordered, signaling)
-        TRUE_UQ    = 0x0F, // True (unordered, non-signaling)
-        EQ_OS      = 0x10, // Equal (ordered, signaling)
-        LT_OQ      = 0x11, // Less-than (ordered, nonsignaling)
-        LE_OQ      = 0x12, // Less-than-or-equal (ordered, nonsignaling)
-        UNORD_S    = 0x13, // Unordered (signaling)
-        NEQ_US     = 0x14, // Not-equal (unordered, signaling)
-        NLT_UQ     = 0x15, // Not-less-than (unordered, nonsignaling)
-        NLE_UQ     = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
-        ORD_S      = 0x17, // Ordered (signaling)
-        EQ_US      = 0x18, // Equal (unordered, signaling)
-        NGE_UQ     = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
-        NGT_UQ     = 0x1A, // Not-greater-than (unordered, nonsignaling)
-        FALSE_OS   = 0x1B, // False (ordered, signaling)
-        NEQ_OS     = 0x1C, // Not-equal (ordered, signaling)
-        GE_OQ      = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
-        GT_OQ      = 0x1E, // Greater-than (ordered, nonsignaling)
-        TRUE_US    = 0x1F, // True (unordered, signaling)
+        EQ_OQ    = 0x00, // Equal (ordered, nonsignaling)
+        LT_OS    = 0x01, // Less-than (ordered, signaling)
+        LE_OS    = 0x02, // Less-than-or-equal (ordered, signaling)
+        UNORD_Q  = 0x03, // Unordered (nonsignaling)
+        NEQ_UQ   = 0x04, // Not-equal (unordered, nonsignaling)
+        NLT_US   = 0x05, // Not-less-than (unordered, signaling)
+        NLE_US   = 0x06, // Not-less-than-or-equal (unordered, signaling)
+        ORD_Q    = 0x07, // Ordered (nonsignaling)
+        EQ_UQ    = 0x08, // Equal (unordered, non-signaling)
+        NGE_US   = 0x09, // Not-greater-than-or-equal (unordered, signaling)
+        NGT_US   = 0x0A, // Not-greater-than (unordered, signaling)
+        FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
+        NEQ_OQ   = 0x0C, // Not-equal (ordered, non-signaling)
+        GE_OS    = 0x0D, // Greater-than-or-equal (ordered, signaling)
+        GT_OS    = 0x0E, // Greater-than (ordered, signaling)
+        TRUE_UQ  = 0x0F, // True (unordered, non-signaling)
+        EQ_OS    = 0x10, // Equal (ordered, signaling)
+        LT_OQ    = 0x11, // Less-than (ordered, nonsignaling)
+        LE_OQ    = 0x12, // Less-than-or-equal (ordered, nonsignaling)
+        UNORD_S  = 0x13, // Unordered (signaling)
+        NEQ_US   = 0x14, // Not-equal (unordered, signaling)
+        NLT_UQ   = 0x15, // Not-less-than (unordered, nonsignaling)
+        NLE_UQ   = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
+        ORD_S    = 0x17, // Ordered (signaling)
+        EQ_US    = 0x18, // Equal (unordered, signaling)
+        NGE_UQ   = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
+        NGT_UQ   = 0x1A, // Not-greater-than (unordered, nonsignaling)
+        FALSE_OS = 0x1B, // False (ordered, signaling)
+        NEQ_OS   = 0x1C, // Not-equal (ordered, signaling)
+        GE_OQ    = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
+        GT_OQ    = 0x1E, // Greater-than (ordered, nonsignaling)
+        TRUE_US  = 0x1F, // True (unordered, signaling)
      };
  
  #if SIMD_ARCH >= SIMD_ARCH_AVX512
      enum class CompareTypeInt
      {
-        EQ  = _MM_CMPINT_EQ,    // Equal
-        LT  = _MM_CMPINT_LT,    // Less than
-        LE  = _MM_CMPINT_LE,    // Less than or Equal
-        NE  = _MM_CMPINT_NE,    // Not Equal
-        GE  = _MM_CMPINT_GE,    // Greater than or Equal
-        GT  = _MM_CMPINT_GT,    // Greater than
+        EQ = _MM_CMPINT_EQ, // Equal
+        LT = _MM_CMPINT_LT, // Less than
+        LE = _MM_CMPINT_LE, // Less than or Equal
+        NE = _MM_CMPINT_NE, // Not Equal
+        GE = _MM_CMPINT_GE, // Greater than or Equal
+        GT = _MM_CMPINT_GT, // Greater than
      };
  #endif // SIMD_ARCH >= SIMD_ARCH_AVX512
  
      enum class ScaleFactor
      {
-        SF_1 = 1,   // No scaling
-        SF_2 = 2,   // Scale offset by 2
-        SF_4 = 4,   // Scale offset by 4
-        SF_8 = 8,   // Scale offset by 8
+        SF_1 = 1, // No scaling
+        SF_2 = 2, // Scale offset by 2
+        SF_4 = 4, // Scale offset by 4
+        SF_8 = 8, // Scale offset by 8
      };
  
      enum class RoundMode
      {
-        TO_NEAREST_INT  = 0x00, // Round to nearest integer == TRUNCATE(value + 0.5)
-        TO_NEG_INF      = 0x01, // Round to negative infinity
-        TO_POS_INF      = 0x02, // Round to positive infinity
-        TO_ZERO         = 0x03, // Round to 0 a.k.a. truncate
-        CUR_DIRECTION   = 0x04, // Round in direction set in MXCSR register
-        
-        RAISE_EXC       = 0x00, // Raise exception on overflow
-        NO_EXC          = 0x08, // Suppress exceptions
-        
-        NINT            = static_cast<int>(TO_NEAREST_INT)  | static_cast<int>(RAISE_EXC),
-        NINT_NOEXC      = static_cast<int>(TO_NEAREST_INT)  | static_cast<int>(NO_EXC),
-        FLOOR           = static_cast<int>(TO_NEG_INF)      | static_cast<int>(RAISE_EXC),
-        FLOOR_NOEXC     = static_cast<int>(TO_NEG_INF)      | static_cast<int>(NO_EXC),
-        CEIL            = static_cast<int>(TO_POS_INF)      | static_cast<int>(RAISE_EXC),
-        CEIL_NOEXC      = static_cast<int>(TO_POS_INF)      | static_cast<int>(NO_EXC),
-        TRUNC           = static_cast<int>(TO_ZERO)         | static_cast<int>(RAISE_EXC),
-        TRUNC_NOEXC     = static_cast<int>(TO_ZERO)         | static_cast<int>(NO_EXC),
-        RINT            = static_cast<int>(CUR_DIRECTION)   | static_cast<int>(RAISE_EXC),
-        NEARBYINT       = static_cast<int>(CUR_DIRECTION)   | static_cast<int>(NO_EXC),
+        TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + 0.5)
+        TO_NEG_INF     = 0x01, // Round to negative infinity
+        TO_POS_INF     = 0x02, // Round to positive infinity
+        TO_ZERO        = 0x03, // Round to 0 a.k.a. truncate
+        CUR_DIRECTION  = 0x04, // Round in direction set in MXCSR register
+
+        RAISE_EXC = 0x00, // Raise exception on overflow
+        NO_EXC    = 0x08, // Suppress exceptions
+
+        NINT        = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
+        NINT_NOEXC  = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
+        FLOOR       = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
+        FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
+        CEIL        = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
+        CEIL_NOEXC  = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
+        TRUNC       = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
+        TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
+        RINT        = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
+        NEARBYINT   = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
      };
  
      struct Traits
@@ -140,7 +140,7 @@ namespace SIMDImpl
      };
  
      // Attribute, 4-dimensional attribute in SIMD SOA layout
-    template<typename Float, typename Integer, typename Double>
+    template <typename Float, typename Integer, typename Double>
      union Vec4
      {
          Float   v[4];
@@ -148,14 +148,14 @@ namespace SIMDImpl
          Double  vd[4];
          struct
          {
-            Float  x;
-            Float  y;
-            Float  z;
-            Float  w;
+            Float x;
+            Float y;
+            Float z;
+            Float w;
          };
-        SIMDINLINE Float& SIMDCALL operator[] (const int i) { return v[i]; }
-        SIMDINLINE Float const & SIMDCALL operator[] (const int i) const { return v[i]; }
-        SIMDINLINE Vec4& SIMDCALL operator=(Vec4 const & in)
+        SIMDINLINE Float& SIMDCALL operator[](const int i) { return v[i]; }
+        SIMDINLINE Float const& SIMDCALL operator[](const int i) const { return v[i]; }
+        SIMDINLINE Vec4& SIMDCALL operator=(Vec4 const& in)
          {
              v[0] = in.v[0];
              v[1] = in.v[1];
@@ -171,8 +171,16 @@ namespace SIMDImpl
          {
              SIMDINLINE Float() = default;
              SIMDINLINE Float(__m128 in) : v(in) {}
-            SIMDINLINE Float& SIMDCALL operator=(__m128 in) { v = in; return *this; }
-            SIMDINLINE Float& SIMDCALL operator=(Float const & in) { v = in.v; return *this; }
+            SIMDINLINE Float& SIMDCALL operator=(__m128 in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Float& SIMDCALL operator=(Float const& in)
+            {
+                v = in.v;
+                return *this;
+            }
              SIMDINLINE SIMDCALL operator __m128() const { return v; }
  
              SIMDALIGN(__m128, 16) v;
@@ -182,8 +190,16 @@ namespace SIMDImpl
          {
              SIMDINLINE Integer() = default;
              SIMDINLINE Integer(__m128i in) : v(in) {}
-            SIMDINLINE Integer& SIMDCALL operator=(__m128i in) { v = in; return *this; }
-            SIMDINLINE Integer& SIMDCALL operator=(Integer const & in) { v = in.v; return *this; }
+            SIMDINLINE Integer& SIMDCALL operator=(__m128i in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
+            {
+                v = in.v;
+                return *this;
+            }
              SIMDINLINE SIMDCALL operator __m128i() const { return v; }
  
              SIMDALIGN(__m128i, 16) v;
@@ -193,8 +209,16 @@ namespace SIMDImpl
          {
              SIMDINLINE Double() = default;
              SIMDINLINE Double(__m128d in) : v(in) {}
-            SIMDINLINE Double& SIMDCALL operator=(__m128d in) { v = in; return *this; }
-            SIMDINLINE Double& SIMDCALL operator=(Double const & in) { v = in.v; return *this; }
+            SIMDINLINE Double& SIMDCALL operator=(__m128d in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Double& SIMDCALL operator=(Double const& in)
+            {
+                v = in.v;
+                return *this;
+            }
              SIMDINLINE SIMDCALL operator __m128d() const { return v; }
  
              SIMDALIGN(__m128d, 16) v;
@@ -204,7 +228,7 @@ namespace SIMDImpl
          using Mask = uint8_t;
  
          static const uint32_t SIMD_WIDTH = 4;
-    } // ns SIMD128Impl
+    } // namespace SIMD128Impl
  
      namespace SIMD256Impl
      {
@@ -212,12 +236,21 @@ namespace SIMDImpl
          {
              SIMDINLINE Float() = default;
              SIMDINLINE Float(__m256 in) : v(in) {}
-            SIMDINLINE Float(SIMD128Impl::Float const &in_lo, SIMD128Impl::Float const &in_hi = _mm_setzero_ps())
+            SIMDINLINE Float(SIMD128Impl::Float const& in_lo,
+                             SIMD128Impl::Float const& in_hi = _mm_setzero_ps())
              {
                  v = _mm256_insertf128_ps(_mm256_castps128_ps256(in_lo), in_hi, 0x1);
              }
-            SIMDINLINE Float& SIMDCALL operator=(__m256 in) { v = in; return *this; }
-            SIMDINLINE Float& SIMDCALL operator=(Float const & in) { v = in.v; return *this; }
+            SIMDINLINE Float& SIMDCALL operator=(__m256 in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Float& SIMDCALL operator=(Float const& in)
+            {
+                v = in.v;
+                return *this;
+            }
              SIMDINLINE SIMDCALL operator __m256() const { return v; }
  
              SIMDALIGN(__m256, 32) v;
@@ -228,12 +261,21 @@ namespace SIMDImpl
          {
              SIMDINLINE Integer() = default;
              SIMDINLINE Integer(__m256i in) : v(in) {}
-            SIMDINLINE Integer(SIMD128Impl::Integer const &in_lo, SIMD128Impl::Integer const &in_hi = _mm_setzero_si128())
+            SIMDINLINE Integer(SIMD128Impl::Integer const& in_lo,
+                               SIMD128Impl::Integer const& in_hi = _mm_setzero_si128())
              {
                  v = _mm256_insertf128_si256(_mm256_castsi128_si256(in_lo), in_hi, 0x1);
              }
-            SIMDINLINE Integer& SIMDCALL operator=(__m256i in) { v = in; return *this; }
-            SIMDINLINE Integer& SIMDCALL operator=(Integer const & in) { v = in.v; return *this; }
+            SIMDINLINE Integer& SIMDCALL operator=(__m256i in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
+            {
+                v = in.v;
+                return *this;
+            }
              SIMDINLINE SIMDCALL operator __m256i() const { return v; }
  
              SIMDALIGN(__m256i, 32) v;
@@ -243,13 +285,22 @@ namespace SIMDImpl
          union Double
          {
              SIMDINLINE Double() = default;
-            SIMDINLINE Double(__m256d const &in) : v(in) {}
-            SIMDINLINE Double(SIMD128Impl::Double const &in_lo, SIMD128Impl::Double const &in_hi = _mm_setzero_pd())
+            SIMDINLINE Double(__m256d const& in) : v(in) {}
+            SIMDINLINE Double(SIMD128Impl::Double const& in_lo,
+                              SIMD128Impl::Double const& in_hi = _mm_setzero_pd())
              {
                  v = _mm256_insertf128_pd(_mm256_castpd128_pd256(in_lo), in_hi, 0x1);
              }
-            SIMDINLINE Double& SIMDCALL operator=(__m256d in) { v = in; return *this; }
-            SIMDINLINE Double& SIMDCALL operator=(Double const & in) { v = in.v; return *this; }
+            SIMDINLINE Double& SIMDCALL operator=(__m256d in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Double& SIMDCALL operator=(Double const& in)
+            {
+                v = in.v;
+                return *this;
+            }
              SIMDINLINE SIMDCALL operator __m256d() const { return v; }
  
              SIMDALIGN(__m256d, 32) v;
@@ -260,7 +311,7 @@ namespace SIMDImpl
          using Mask = uint8_t;
  
          static const uint32_t SIMD_WIDTH = 8;
-    } // ns SIMD256Impl
+    } // namespace SIMD256Impl
  
      namespace SIMD512Impl
      {
@@ -282,14 +333,14 @@ namespace SIMDImpl
          union __m512i
          {
          private:
-            int8_t              m512i_i8[64];
-            int16_t             m512i_i16[32];
-            int32_t             m512i_i32[16];
-            int64_t             m512i_i64[8];
-            uint8_t             m512i_u8[64];
-            uint16_t            m512i_u16[32];
-            uint32_t            m512i_u32[16];
-            uint64_t            m512i_u64[8];
+            int8_t   m512i_i8[64];
+            int16_t  m512i_i16[32];
+            int32_t  m512i_i32[16];
+            int64_t  m512i_i64[8];
+            uint8_t  m512i_u8[64];
+            uint16_t m512i_u16[32];
+            uint32_t m512i_u32[16];
+            uint64_t m512i_u64[8];
          };
  
          using __mmask16 = uint16_t;
@@ -305,9 +356,18 @@ namespace SIMDImpl
          {
              SIMDINLINE Float() = default;
              SIMDINLINE Float(__m512 in) : v(in) {}
-            SIMDINLINE Float(SIMD256Impl::Float const &in_lo, SIMD256Impl::Float const &in_hi = _mm256_setzero_ps()) { v8[0] = in_lo; v8[1] = in_hi; }
-            SIMDINLINE Float& SIMDCALL operator=(__m512 in) { v = in; return *this; }
-            SIMDINLINE Float& SIMDCALL operator=(Float const & in)
+            SIMDINLINE Float(SIMD256Impl::Float const& in_lo,
+                             SIMD256Impl::Float const& in_hi = _mm256_setzero_ps())
+            {
+                v8[0] = in_lo;
+                v8[1] = in_hi;
+            }
+            SIMDINLINE Float& SIMDCALL operator=(__m512 in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Float& SIMDCALL operator=(Float const& in)
              {
  #if SIMD_ARCH >= SIMD_ARCH_AVX512
                  v = in.v;
@@ -327,9 +387,18 @@ namespace SIMDImpl
          {
              SIMDINLINE Integer() = default;
              SIMDINLINE Integer(__m512i in) : v(in) {}
-            SIMDINLINE Integer(SIMD256Impl::Integer const &in_lo, SIMD256Impl::Integer const &in_hi = _mm256_setzero_si256()) { v8[0] = in_lo; v8[1] = in_hi; }
-            SIMDINLINE Integer& SIMDCALL operator=(__m512i in) { v = in; return *this; }
-            SIMDINLINE Integer& SIMDCALL operator=(Integer const & in)
+            SIMDINLINE Integer(SIMD256Impl::Integer const& in_lo,
+                               SIMD256Impl::Integer const& in_hi = _mm256_setzero_si256())
+            {
+                v8[0] = in_lo;
+                v8[1] = in_hi;
+            }
+            SIMDINLINE Integer& SIMDCALL operator=(__m512i in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
              {
  #if SIMD_ARCH >= SIMD_ARCH_AVX512
                  v = in.v;
@@ -350,9 +419,18 @@ namespace SIMDImpl
          {
              SIMDINLINE Double() = default;
              SIMDINLINE Double(__m512d in) : v(in) {}
-            SIMDINLINE Double(SIMD256Impl::Double const &in_lo, SIMD256Impl::Double const &in_hi = _mm256_setzero_pd()) { v8[0] = in_lo; v8[1] = in_hi; }
-            SIMDINLINE Double& SIMDCALL operator=(__m512d in) { v = in; return *this; }
-            SIMDINLINE Double& SIMDCALL operator=(Double const & in)
+            SIMDINLINE Double(SIMD256Impl::Double const& in_lo,
+                              SIMD256Impl::Double const& in_hi = _mm256_setzero_pd())
+            {
+                v8[0] = in_lo;
+                v8[1] = in_hi;
+            }
+            SIMDINLINE Double& SIMDCALL operator=(__m512d in)
+            {
+                v = in;
+                return *this;
+            }
+            SIMDINLINE Double& SIMDCALL operator=(Double const& in)
              {
  #if SIMD_ARCH >= SIMD_ARCH_AVX512
                  v = in.v;
@@ -375,5 +453,5 @@ namespace SIMDImpl
          static const uint32_t SIMD_WIDTH = 16;
  
  #undef SIMD_ALIGNMENT_BYTES
-    } // ns SIMD512Impl
-} // ns SIMDImpl
+    } // namespace SIMD512Impl
+} // namespace SIMDImpl
diff --git a/src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp b/src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp

index 43b74a68fdec4edd9ab17ec008a8a80614fed08e..8e874fbc223c898100fa0f07d4b74e4c1fe4c6d1 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp
+++ b/src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #include "common/os.h"
  #include <stdarg.h>
@@ -38,29 +38,32 @@ namespace ConsoleUtils
  {
      enum class TextColor
      {
-        BLACK      = 0,
+        BLACK = 0,
  #if defined(_WIN32)
-        RED        = 4,
-        GREEN      = 2,
-        BLUE       = 1,
+        RED   = 4,
+        GREEN = 2,
+        BLUE  = 1,
  #else
-        RED        = 1,
-        GREEN      = 2,
-        BLUE       = 4,
+        RED   = 1,
+        GREEN = 2,
+        BLUE  = 4,
  #endif // _WIN32
-        PURPLE     = static_cast<uint32_t>(RED) | static_cast<uint32_t>(BLUE),
-        CYAN       = static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
-        YELLOW     = static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN),
-        WHITE      = static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
+        PURPLE = static_cast<uint32_t>(RED) | static_cast<uint32_t>(BLUE),
+        CYAN   = static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
+        YELLOW = static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN),
+        WHITE =
+            static_cast<uint32_t>(RED) | static_cast<uint32_t>(GREEN) | static_cast<uint32_t>(BLUE),
      };
  
      enum class TextStyle
      {
-        NORMAL     = 0,
-        INTENSITY  = 1,
+        NORMAL    = 0,
+        INTENSITY = 1,
      };
  
-    void SetTextColor(FILE* stream, TextColor color = TextColor::WHITE, TextStyle style = TextStyle::NORMAL)
+    void SetTextColor(FILE*     stream,
+                      TextColor color = TextColor::WHITE,
+                      TextStyle style = TextStyle::NORMAL)
      {
  #if defined(_WIN32)
  
@@ -89,7 +92,8 @@ namespace ConsoleUtils
  #else // !_WIN32
  
          // Print ANSI codes
-        uint32_t cc = 30 + ((style == TextStyle::INTENSITY) ? 60 : 0) + static_cast<uint32_t>(color);
+        uint32_t cc =
+            30 + ((style == TextStyle::INTENSITY) ? 60 : 0) + static_cast<uint32_t>(color);
          fprintf(stream, "\033[0m\033[%d;%dm", static_cast<uint32_t>(style), cc);
  
  #endif
@@ -110,17 +114,16 @@ namespace ConsoleUtils
      }
  
      static std::mutex g_stderrMutex;
-} // ns ConsoleUtils
-
-bool SwrAssert(
-        bool        chkDebugger,
-        bool&       enabled,
-        const char* pExpression,
-        const char* pFileName,
-        uint32_t    lineNum,
-        const char* pFunction,
-        const char* pFmtString,
-        ...)
+} // namespace ConsoleUtils
+
+bool SwrAssert(bool        chkDebugger,
+               bool&       enabled,
+               const char* pExpression,
+               const char* pFileName,
+               uint32_t    lineNum,
+               const char* pFunction,
+               const char* pFmtString,
+               ...)
  {
      using namespace ConsoleUtils;
      std::lock_guard<std::mutex> l(g_stderrMutex);
@@ -151,7 +154,7 @@ bool SwrAssert(
  
  #if defined(_WIN32)
      static const int MAX_MESSAGE_LEN = 2048;
-    char msgBuf[MAX_MESSAGE_LEN];
+    char             msgBuf[MAX_MESSAGE_LEN];
  
      sprintf_s(msgBuf, "%s(%d): ASSERT: %s\n", pFileName, lineNum, pExpression);
      msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
@@ -169,15 +172,13 @@ bool SwrAssert(
      {
          va_list args;
          va_start(args, pFmtString);
-        offset = _vsnprintf_s(
-                msgBuf,
-                sizeof(msgBuf),
-                sizeof(msgBuf),
-                pFmtString,
-                args);
+        offset = _vsnprintf_s(msgBuf, sizeof(msgBuf), sizeof(msgBuf), pFmtString, args);
          va_end(args);
  
-        if (offset < 0) { return true; }
+        if (offset < 0)
+        {
+            return true;
+        }
  
          OutputDebugStringA("\t");
          OutputDebugStringA(msgBuf);
@@ -186,46 +187,51 @@ bool SwrAssert(
  
      if (enabled && KNOB_ENABLE_ASSERT_DIALOGS)
      {
-        int retval = sprintf_s(
-                &msgBuf[offset],
-                MAX_MESSAGE_LEN - offset,
-                "\n\n"
-                "File: %s\n"
-                "Line: %d\n"
-                "\n"
-                "Expression: %s\n\n"
-                "Cancel: Disable this assert for the remainder of the process\n"
-                "Try Again: Break into the debugger\n"
-                "Continue: Continue execution (but leave assert enabled)",
-                pFileName,
-                lineNum,
-                pExpression);
-
-        if (retval < 0) { return true; }
+        int retval = sprintf_s(&msgBuf[offset],
+                               MAX_MESSAGE_LEN - offset,
+                               "\n\n"
+                               "File: %s\n"
+                               "Line: %d\n"
+                               "\n"
+                               "Expression: %s\n\n"
+                               "Cancel: Disable this assert for the remainder of the process\n"
+                               "Try Again: Break into the debugger\n"
+                               "Continue: Continue execution (but leave assert enabled)",
+                               pFileName,
+                               lineNum,
+                               pExpression);
+
+        if (retval < 0)
+        {
+            return true;
+        }
  
          offset += retval;
  
          if (!IsDebuggerPresent())
          {
-            sprintf_s(
-                    &msgBuf[offset],
-                    MAX_MESSAGE_LEN - offset,
-                    "\n\n*** NO DEBUGGER DETECTED ***\n\nPressing \"Try Again\" will cause a program crash!");
+            sprintf_s(&msgBuf[offset],
+                      MAX_MESSAGE_LEN - offset,
+                      "\n\n*** NO DEBUGGER DETECTED ***\n\nPressing \"Try Again\" will cause a "
+                      "program crash!");
          }
  
-        retval = MessageBoxA(nullptr, msgBuf, "Assert Failed", MB_CANCELTRYCONTINUE | MB_ICONEXCLAMATION | MB_SETFOREGROUND);
+        retval = MessageBoxA(nullptr,
+                             msgBuf,
+                             "Assert Failed",
+                             MB_CANCELTRYCONTINUE | MB_ICONEXCLAMATION | MB_SETFOREGROUND);
  
          switch (retval)
          {
-            case IDCANCEL:
-                enabled = false;
-                return false;
+        case IDCANCEL:
+            enabled = false;
+            return false;
  
-            case IDTRYAGAIN:
-                return true;
+        case IDTRYAGAIN:
+            return true;
  
-            case IDCONTINUE:
-                return false;
+        case IDCONTINUE:
+            return false;
          }
      }
      else
@@ -238,11 +244,7 @@ bool SwrAssert(
  }
  
  void SwrTrace(
-        const char* pFileName,
-        uint32_t    lineNum,
-        const char* pFunction,
-        const char* pFmtString,
-        ...)
+    const char* pFileName, uint32_t lineNum, const char* pFunction, const char* pFmtString, ...)
  {
      using namespace ConsoleUtils;
      std::lock_guard<std::mutex> l(g_stderrMutex);
@@ -266,7 +268,7 @@ void SwrTrace(
  
  #if defined(_WIN32)
      static const int MAX_MESSAGE_LEN = 2048;
-    char msgBuf[MAX_MESSAGE_LEN];
+    char             msgBuf[MAX_MESSAGE_LEN];
  
      sprintf_s(msgBuf, "%s(%d): TRACE in %s\n", pFileName, lineNum, pFunction);
      msgBuf[MAX_MESSAGE_LEN - 2] = '\n';
@@ -279,15 +281,13 @@ void SwrTrace(
      {
          va_list args;
          va_start(args, pFmtString);
-        offset = _vsnprintf_s(
-                msgBuf,
-                sizeof(msgBuf),
-                sizeof(msgBuf),
-                pFmtString,
-                args);
+        offset = _vsnprintf_s(msgBuf, sizeof(msgBuf), sizeof(msgBuf), pFmtString, args);
          va_end(args);
  
-        if (offset < 0) { return; }
+        if (offset < 0)
+        {
+            return;
+        }
  
          OutputDebugStringA("\t");
          OutputDebugStringA(msgBuf);
diff --git a/src/gallium/drivers/swr/rasterizer/common/swr_assert.h b/src/gallium/drivers/swr/rasterizer/common/swr_assert.h

index a9e5bb4e77f088157d67fea3ee9c1c136cad1d45..d74b7981255f45da8be45c5033755fb7f1609fb5 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/swr_assert.h
+++ b/src/gallium/drivers/swr/rasterizer/common/swr_assert.h
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #ifndef __SWR_ASSERT_H__
  #define __SWR_ASSERT_H__
@@ -55,28 +55,38 @@
  
  // Stupid preprocessor tricks to avoid -Wall / -W4 warnings
  #if defined(_MSC_VER)
-#define _SWR_WARN_DISABLE __pragma(warning(push)) __pragma(warning(disable:4127))
+#define _SWR_WARN_DISABLE __pragma(warning(push)) __pragma(warning(disable : 4127))
  #define _SWR_WARN_RESTORE __pragma(warning(pop))
  #else // ! MSVC compiler
  #define _SWR_WARN_DISABLE
  #define _SWR_WARN_RESTORE
  #endif
  
-#define _SWR_MACRO_START do {
-#define _SWR_MACRO_END  \
-    _SWR_WARN_DISABLE   \
-    } while(0)          \
+#define _SWR_MACRO_START \
+    do                   \
+    {
+#define _SWR_MACRO_END \
+    _SWR_WARN_DISABLE  \
+    }                  \
+    while (0)          \
      _SWR_WARN_RESTORE
  
-
  #if defined(_WIN32)
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START __assume(e); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...)        \
+    _SWR_MACRO_START __assume(e); \
+    _SWR_MACRO_END
  #elif defined(__clang__)
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START __builtin_assume(e); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...)                \
+    _SWR_MACRO_START __builtin_assume(e); \
+    _SWR_MACRO_END
  #elif defined(__GNUC__)
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START ((e) ? ((void)0) : __builtin_unreachable()); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...)                                       \
+    _SWR_MACRO_START((e) ? ((void)0) : __builtin_unreachable()); \
+    _SWR_MACRO_END
  #else
-#define SWR_ASSUME(e, ...) _SWR_MACRO_START ASSUME(e); _SWR_MACRO_END
+#define SWR_ASSUME(e, ...)      \
+    _SWR_MACRO_START ASSUME(e); \
+    _SWR_MACRO_END
  #endif
  
  #if !defined(SWR_ENABLE_ASSERTS)
@@ -110,47 +120,50 @@
  
  #else
  
-bool SwrAssert(
-    bool        chkDebugger,
-    bool&       enabled,
-    const char* pExpression,
-    const char* pFileName,
-    uint32_t    lineNum,
-    const char* function,
-    const char* pFmtString = nullptr,
-    ...);
+bool SwrAssert(bool        chkDebugger,
+               bool&       enabled,
+               const char* pExpression,
+               const char* pFileName,
+               uint32_t    lineNum,
+               const char* function,
+               const char* pFmtString = nullptr,
+               ...);
  
  void SwrTrace(
-    const char* pFileName,
-    uint32_t    lineNum,
-    const char* function,
-    const char* pFmtString,
-    ...);
-
-#define _SWR_ASSERT(chkDebugger, e, ...)    \
-    _SWR_MACRO_START \
-    bool expFailed = !(e);\
-    if (expFailed) {\
-        static bool swrAssertEnabled = true;\
-        expFailed = SwrAssert(chkDebugger, swrAssertEnabled, #e, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__);\
-        if (expFailed) { DEBUGBREAK; }\
-    }\
+    const char* pFileName, uint32_t lineNum, const char* function, const char* pFmtString, ...);
+
+#define _SWR_ASSERT(chkDebugger, e, ...)                                                                            \
+    _SWR_MACRO_START                                                                                                \
+    bool expFailed = !(e);                                                                                          \
+    if (expFailed)                                                                                                  \
+    {                                                                                                               \
+        static bool swrAssertEnabled = true;                                                                        \
+        expFailed                    = SwrAssert(                                                                   \
+            chkDebugger, swrAssertEnabled, #e, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__); \
+        if (expFailed)                                                                                              \
+        {                                                                                                           \
+            DEBUGBREAK;                                                                                             \
+        }                                                                                                           \
+    }                                                                                                               \
      _SWR_MACRO_END
  
-#define _SWR_INVALID(chkDebugger, ...)    \
-    _SWR_MACRO_START \
-    static bool swrAssertEnabled = true;\
-    bool expFailed = SwrAssert(chkDebugger, swrAssertEnabled, "", __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__);\
-    if (expFailed) { DEBUGBREAK; }\
+#define _SWR_INVALID(chkDebugger, ...)                                                                     \
+    _SWR_MACRO_START                                                                                       \
+    static bool swrAssertEnabled = true;                                                                   \
+    bool        expFailed        = SwrAssert(                                                              \
+        chkDebugger, swrAssertEnabled, "", __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__); \
+    if (expFailed)                                                                                         \
+    {                                                                                                      \
+        DEBUGBREAK;                                                                                        \
+    }                                                                                                      \
      _SWR_MACRO_END
  
-#define _SWR_TRACE(_fmtstr, ...) \
-    SwrTrace(__FILE__, __LINE__, __FUNCTION__, _fmtstr, ##__VA_ARGS__);
+#define _SWR_TRACE(_fmtstr, ...) SwrTrace(__FILE__, __LINE__, __FUNCTION__, _fmtstr, ##__VA_ARGS__);
  
  #if SWR_ENABLE_ASSERTS
-#define SWR_ASSERT(e, ...)              _SWR_ASSERT(true, e, ##__VA_ARGS__)
-#define SWR_ASSUME_ASSERT(e, ...)       SWR_ASSERT(e, ##__VA_ARGS__)
-#define SWR_TRACE(_fmtstr, ...)         _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
+#define SWR_ASSERT(e, ...) _SWR_ASSERT(true, e, ##__VA_ARGS__)
+#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSERT(e, ##__VA_ARGS__)
+#define SWR_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
  
  #if defined(assert)
  #undef assert
@@ -160,24 +173,25 @@ void SwrTrace(
  #endif // SWR_ENABLE_ASSERTS
  
  #if SWR_ENABLE_REL_ASSERTS
-#define SWR_REL_ASSERT(e, ...)          _SWR_ASSERT(false, e, ##__VA_ARGS__)
-#define SWR_REL_ASSUME_ASSERT(e, ...)   SWR_REL_ASSERT(e, ##__VA_ARGS__)
-#define SWR_REL_TRACE(_fmtstr, ...)     _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
+#define SWR_REL_ASSERT(e, ...) _SWR_ASSERT(false, e, ##__VA_ARGS__)
+#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_REL_ASSERT(e, ##__VA_ARGS__)
+#define SWR_REL_TRACE(_fmtstr, ...) _SWR_TRACE(_fmtstr, ##__VA_ARGS__)
  
  // SWR_INVALID is always enabled
  // Funky handling to allow 0 arguments with g++/gcc
  // This is needed because you can't "swallow commas" with ##_VA_ARGS__ unless
  // there is a first argument to the macro.  So having a macro that can optionally
  // accept 0 arguments is tricky.
-#define _SWR_INVALID_0()                _SWR_INVALID(false)
-#define _SWR_INVALID_1(...)             _SWR_INVALID(false, ##__VA_ARGS__)
+#define _SWR_INVALID_0() _SWR_INVALID(false)
+#define _SWR_INVALID_1(...) _SWR_INVALID(false, ##__VA_ARGS__)
  #define _SWR_INVALID_VARGS_(_10, _9, _8, _7, _6, _5, _4, _3, _2, _1, N, ...) N
-#define _SWR_INVALID_VARGS(...)         _SWR_INVALID_VARGS_(__VA_ARGS__, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
-#define _SWR_INVALID_VARGS_0()          1, 2, 3, 4, 5, 6, 7, 9, 9, 10
-#define _SWR_INVALID_CONCAT_(a, b)      a##b
-#define _SWR_INVALID_CONCAT(a, b)       _SWR_INVALID_CONCAT_(a, b)
-#define SWR_INVALID(...)                \
-    _SWR_INVALID_CONCAT(_SWR_INVALID_,_SWR_INVALID_VARGS(_SWR_INVALID_VARGS_0 __VA_ARGS__ ()))(__VA_ARGS__)
+#define _SWR_INVALID_VARGS(...) _SWR_INVALID_VARGS_(__VA_ARGS__, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+#define _SWR_INVALID_VARGS_0() 1, 2, 3, 4, 5, 6, 7, 9, 9, 10
+#define _SWR_INVALID_CONCAT_(a, b) a##b
+#define _SWR_INVALID_CONCAT(a, b) _SWR_INVALID_CONCAT_(a, b)
+#define SWR_INVALID(...)                                                                       \
+    _SWR_INVALID_CONCAT(_SWR_INVALID_, _SWR_INVALID_VARGS(_SWR_INVALID_VARGS_0 __VA_ARGS__())) \
+    (__VA_ARGS__)
  #endif
  
  #endif // C++
@@ -185,20 +199,33 @@ void SwrTrace(
  #endif // SWR_ENABLE_ASSERTS || SWR_ENABLE_REL_ASSERTS
  
  // Needed to allow passing bitfield members to sizeof() in disabled asserts
-template<typename T>
-static bool SwrSizeofWorkaround(T) {return false;}
+template <typename T>
+static bool SwrSizeofWorkaround(T)
+{
+    return false;
+}
  
  #if !SWR_ENABLE_ASSERTS
-#define SWR_ASSERT(e, ...)              _SWR_MACRO_START (void)sizeof(SwrSizeofWorkaround(e)); _SWR_MACRO_END
-#define SWR_ASSUME_ASSERT(e, ...)       SWR_ASSUME(e, ##__VA_ARGS__)
-#define SWR_TRACE(_fmtstr, ...)         _SWR_MACRO_START (void)(0); _SWR_MACRO_END
+#define SWR_ASSERT(e, ...)                                 \
+    _SWR_MACRO_START(void) sizeof(SwrSizeofWorkaround(e)); \
+    _SWR_MACRO_END
+#define SWR_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
+#define SWR_TRACE(_fmtstr, ...) \
+    _SWR_MACRO_START(void)(0);  \
+    _SWR_MACRO_END
  #endif
  
  #if !SWR_ENABLE_REL_ASSERTS
-#define SWR_REL_ASSERT(e, ...)          _SWR_MACRO_START (void)sizeof(SwrSizeofWorkaround(e)); _SWR_MACRO_END
-#define SWR_INVALID(...)                _SWR_MACRO_START (void)(0); _SWR_MACRO_END
-#define SWR_REL_ASSUME_ASSERT(e, ...)   SWR_ASSUME(e, ##__VA_ARGS__)
-#define SWR_REL_TRACE(_fmtstr, ...)     _SWR_MACRO_START (void)(0); _SWR_MACRO_END
+#define SWR_REL_ASSERT(e, ...)                             \
+    _SWR_MACRO_START(void) sizeof(SwrSizeofWorkaround(e)); \
+    _SWR_MACRO_END
+#define SWR_INVALID(...)       \
+    _SWR_MACRO_START(void)(0); \
+    _SWR_MACRO_END
+#define SWR_REL_ASSUME_ASSERT(e, ...) SWR_ASSUME(e, ##__VA_ARGS__)
+#define SWR_REL_TRACE(_fmtstr, ...) \
+    _SWR_MACRO_START(void)(0);      \
+    _SWR_MACRO_END
  #endif
  
  #if defined(_MSC_VER)
@@ -211,4 +238,4 @@ static bool SwrSizeofWorkaround(T) {return false;}
  
  #define SWR_NOT_IMPL SWR_INVALID("%s not implemented", SWR_FUNCTION_DECL)
  
-#endif//__SWR_ASSERT_H__
+#endif //__SWR_ASSERT_H__
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp

index c932ec0bd6613a8a92fde3f23d43e031bef6de0b..00f331303eeac5b90e9645272a789a06eb67d02f 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file api.cpp
-*
-* @brief API implementation
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file api.cpp
+ *
+ * @brief API implementation
+ *
+ ******************************************************************************/
  
  #include <cfloat>
  #include <cmath>
@@ -46,16 +46,16 @@
  
  #include "common/os.h"
  
-static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
+static const SWR_RECT g_MaxScissorRect = {0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y};
  
-void SetupDefaultState(SWR_CONTEXT *pContext);
+void SetupDefaultState(SWR_CONTEXT* pContext);
  
  static INLINE SWR_CONTEXT* GetContext(HANDLE hContext)
  {
      return (SWR_CONTEXT*)hContext;
  }
  
-void WakeAllThreads(SWR_CONTEXT *pContext)
+void WakeAllThreads(SWR_CONTEXT* pContext)
  {
      pContext->FifosNotEmpty.notify_all();
  }
@@ -63,15 +63,14 @@ void WakeAllThreads(SWR_CONTEXT *pContext)
  //////////////////////////////////////////////////////////////////////////
  /// @brief Create SWR Context.
  /// @param pCreateInfo - pointer to creation info.
-HANDLE SwrCreateContext(
-    SWR_CREATECONTEXT_INFO* pCreateInfo)
+HANDLE SwrCreateContext(SWR_CREATECONTEXT_INFO* pCreateInfo)
  {
      RDTSC_RESET();
      RDTSC_INIT(0);
  
      void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4);
      memset(pContextMem, 0, sizeof(SWR_CONTEXT));
-    SWR_CONTEXT *pContext = new (pContextMem) SWR_CONTEXT();
+    SWR_CONTEXT* pContext = new (pContextMem) SWR_CONTEXT();
  
      pContext->privateStateSize = pCreateInfo->privateStateSize;
  
@@ -84,8 +83,10 @@ HANDLE SwrCreateContext(
      pContext->dcRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
      pContext->dsRing.Init(pContext->MAX_DRAWS_IN_FLIGHT);
  
-    pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
-    pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
+    pContext->pMacroTileManagerArray =
+        (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
+    pContext->pDispatchQueueArray =
+        (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64);
  
      for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
      {
@@ -102,14 +103,14 @@ HANDLE SwrCreateContext(
      }
      else
      {
-        pContext->threadInfo.MAX_WORKER_THREADS         = KNOB_MAX_WORKER_THREADS;
-        pContext->threadInfo.BASE_NUMA_NODE             = KNOB_BASE_NUMA_NODE;
-        pContext->threadInfo.BASE_CORE                  = KNOB_BASE_CORE;
-        pContext->threadInfo.BASE_THREAD                = KNOB_BASE_THREAD;
-        pContext->threadInfo.MAX_NUMA_NODES             = KNOB_MAX_NUMA_NODES;
-        pContext->threadInfo.MAX_CORES_PER_NUMA_NODE    = KNOB_MAX_CORES_PER_NUMA_NODE;
-        pContext->threadInfo.MAX_THREADS_PER_CORE       = KNOB_MAX_THREADS_PER_CORE;
-        pContext->threadInfo.SINGLE_THREADED            = KNOB_SINGLE_THREADED;
+        pContext->threadInfo.MAX_WORKER_THREADS      = KNOB_MAX_WORKER_THREADS;
+        pContext->threadInfo.BASE_NUMA_NODE          = KNOB_BASE_NUMA_NODE;
+        pContext->threadInfo.BASE_CORE               = KNOB_BASE_CORE;
+        pContext->threadInfo.BASE_THREAD             = KNOB_BASE_THREAD;
+        pContext->threadInfo.MAX_NUMA_NODES          = KNOB_MAX_NUMA_NODES;
+        pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;
+        pContext->threadInfo.MAX_THREADS_PER_CORE    = KNOB_MAX_THREADS_PER_CORE;
+        pContext->threadInfo.SINGLE_THREADED         = KNOB_SINGLE_THREADED;
      }
  
      if (pCreateInfo->pApiThreadInfo)
@@ -118,9 +119,9 @@ HANDLE SwrCreateContext(
      }
      else
      {
-        pContext->apiThreadInfo.bindAPIThread0          = true;
-        pContext->apiThreadInfo.numAPIReservedThreads   = 1;
-        pContext->apiThreadInfo.numAPIThreadsPerCore    = 1;
+        pContext->apiThreadInfo.bindAPIThread0        = true;
+        pContext->apiThreadInfo.numAPIReservedThreads = 1;
+        pContext->apiThreadInfo.numAPIThreadsPerCore  = 1;
      }
  
      if (pCreateInfo->pWorkerPrivateState)
@@ -146,12 +147,14 @@ HANDLE SwrCreateContext(
      }
  
      pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads];
-    pContext->pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
+    pContext->pStats =
+        (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64);
  
  #if defined(KNOB_ENABLE_AR)
      // Setup ArchRast thread contexts which includes +1 for API thread.
-    pContext->pArContext = new HANDLE[pContext->NumWorkerThreads+1];
-    pContext->pArContext[pContext->NumWorkerThreads] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
+    pContext->pArContext = new HANDLE[pContext->NumWorkerThreads + 1];
+    pContext->pArContext[pContext->NumWorkerThreads] =
+        ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API);
  #endif
  
      // Allocate scratch space for workers.
@@ -159,14 +162,17 @@ HANDLE SwrCreateContext(
      for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
      {
  #if defined(_WIN32)
-        uint32_t numaNode = pContext->threadPool.pThreadData ?
-            pContext->threadPool.pThreadData[i].numaId : 0;
-        pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma(
-            GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE),
-            MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE,
-            numaNode);
+        uint32_t numaNode =
+            pContext->threadPool.pThreadData ? pContext->threadPool.pThreadData[i].numaId : 0;
+        pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma(GetCurrentProcess(),
+                                                              nullptr,
+                                                              32 * sizeof(KILOBYTE),
+                                                              MEM_RESERVE | MEM_COMMIT,
+                                                              PAGE_READWRITE,
+                                                              numaNode);
  #else
-        pContext->ppScratch[i] = (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
+        pContext->ppScratch[i] =
+            (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4);
  #endif
  
  #if defined(KNOB_ENABLE_AR)
@@ -187,13 +193,13 @@ HANDLE SwrCreateContext(
      pContext->pHotTileMgr = new HotTileMgr();
  
      // initialize callback functions
-    pContext->pfnLoadTile = pCreateInfo->pfnLoadTile;
-    pContext->pfnStoreTile = pCreateInfo->pfnStoreTile;
-    pContext->pfnClearTile = pCreateInfo->pfnClearTile;
+    pContext->pfnLoadTile            = pCreateInfo->pfnLoadTile;
+    pContext->pfnStoreTile           = pCreateInfo->pfnStoreTile;
+    pContext->pfnClearTile           = pCreateInfo->pfnClearTile;
      pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset;
-    pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats;
-    pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE;
-    
+    pContext->pfnUpdateStats         = pCreateInfo->pfnUpdateStats;
+    pContext->pfnUpdateStatsFE       = pCreateInfo->pfnUpdateStatsFE;
+
  
      // pass pointer to bucket manager back to caller
  #ifdef KNOB_ENABLE_RDTSC
@@ -212,11 +218,11 @@ void CopyState(DRAW_STATE& dst, const DRAW_STATE& src)
      memcpy(&dst.state, &src.state, sizeof(API_STATE));
  }
  
-template<bool IsDraw>
-void QueueWork(SWR_CONTEXT *pContext)
+template <bool IsDraw>
+void QueueWork(SWR_CONTEXT* pContext)
  {
-    DRAW_CONTEXT* pDC = pContext->pCurDrawContext;
-    uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
+    DRAW_CONTEXT* pDC     = pContext->pCurDrawContext;
+    uint32_t      dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
  
      if (IsDraw)
      {
@@ -249,7 +255,8 @@ void QueueWork(SWR_CONTEXT *pContext)
  
          if (IsDraw)
          {
-            uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId };
+            uint32_t curDraw[2] = {pContext->pCurDrawContext->drawId,
+                                   pContext->pCurDrawContext->drawId};
              WorkOnFifoFE(pContext, 0, curDraw[0]);
              WorkOnFifoBE(pContext, 0, curDraw[1], *pContext->pSingleThreadLockedTiles, 0, 0);
          }
@@ -259,8 +266,11 @@ void QueueWork(SWR_CONTEXT *pContext)
              WorkOnCompute(pContext, 0, curDispatch);
          }
  
-        // Dequeue the work here, if not already done, since we're single threaded (i.e. no workers).
-        while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {}
+        // Dequeue the work here, if not already done, since we're single threaded (i.e. no
+        // workers).
+        while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0)
+        {
+        }
  
          // restore csr
          _mm_setcsr(mxcsr);
@@ -272,9 +282,10 @@ void QueueWork(SWR_CONTEXT *pContext)
          RDTSC_END(APIDrawWakeAllThreads, 1);
      }
  
-    // Set current draw context to NULL so that next state call forces a new draw context to be created and populated.
+    // Set current draw context to NULL so that next state call forces a new draw context to be
+    // created and populated.
      pContext->pPrevDrawContext = pContext->pCurDrawContext;
-    pContext->pCurDrawContext = nullptr;
+    pContext->pCurDrawContext  = nullptr;
  }
  
  INLINE void QueueDraw(SWR_CONTEXT* pContext)
@@ -287,7 +298,7 @@ INLINE void QueueDispatch(SWR_CONTEXT* pContext)
      QueueWork<false>(pContext);
  }
  
-DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
+DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT* pContext, bool isSplitDraw = false)
  {
      RDTSC_BEGIN(APIGetDrawContext, 0);
      // If current draw context is null then need to obtain a new draw context to use from ring.
@@ -309,14 +320,14 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
              pContext->cachingArenaAllocator.FreeOldBlocks();
  
              pContext->lastFrameChecked = pContext->frameCount;
-            pContext->lastDrawChecked = curDraw;
+            pContext->lastDrawChecked  = curDraw;
          }
  
          DRAW_CONTEXT* pCurDrawContext = &pContext->dcRing[dcIndex];
-        pContext->pCurDrawContext = pCurDrawContext;
+        pContext->pCurDrawContext     = pCurDrawContext;
  
          // Assign next available entry in DS ring to this DC.
-        uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT;
+        uint32_t dsIndex        = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT;
          pCurDrawContext->pState = &pContext->dsRing[dsIndex];
  
          // Copy previous state to current state.
@@ -336,7 +347,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
  
                  pCurDrawContext->pState->pPrivateState = nullptr;
  
-                pContext->curStateId++;  // Progress state ring index forward.
+                pContext->curStateId++; // Progress state ring index forward.
              }
              else
              {
@@ -349,21 +360,21 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
          else
          {
              SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true);
-            pContext->curStateId++;  // Progress state ring index forward.
+            pContext->curStateId++; // Progress state ring index forward.
          }
  
          SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true);
  
          // Reset dependency
-        pCurDrawContext->dependent = false;
+        pCurDrawContext->dependent   = false;
          pCurDrawContext->dependentFE = false;
  
-        pCurDrawContext->pContext = pContext;
+        pCurDrawContext->pContext  = pContext;
          pCurDrawContext->isCompute = false; // Dispatch has to set this to true.
  
-        pCurDrawContext->doneFE = false;
-        pCurDrawContext->FeLock = 0;
-        pCurDrawContext->threadsDone = 0;
+        pCurDrawContext->doneFE                         = false;
+        pCurDrawContext->FeLock                         = 0;
+        pCurDrawContext->threadsDone                    = 0;
          pCurDrawContext->retireCallback.pfnCallbackFunc = nullptr;
  
          pCurDrawContext->dynState.Reset(pContext->NumWorkerThreads);
@@ -382,7 +393,7 @@ DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false)
      return pContext->pCurDrawContext;
  }
  
-API_STATE* GetDrawState(SWR_CONTEXT *pContext)
+API_STATE* GetDrawState(SWR_CONTEXT* pContext)
  {
      DRAW_CONTEXT* pDC = GetDrawContext(pContext);
      SWR_ASSERT(pDC->pState != nullptr);
@@ -392,13 +403,13 @@ API_STATE* GetDrawState(SWR_CONTEXT *pContext)
  
  void SwrDestroyContext(HANDLE hContext)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
-    pDC->FeWork.type = SHUTDOWN;
+    pDC->FeWork.type    = SHUTDOWN;
      pDC->FeWork.pfnWork = ProcessShutdown;
  
-    //enqueue
+    // enqueue
      QueueDraw(pContext);
  
      DestroyThreadPool(pContext, &pContext->threadPool);
@@ -442,72 +453,65 @@ void SwrDestroyContext(HANDLE hContext)
  
  void SwrBindApiThread(HANDLE hContext, uint32_t apiThreadId)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
+    SWR_CONTEXT* pContext = GetContext(hContext);
      BindApiThread(pContext, apiThreadId);
  }
  
-void SWR_API SwrSaveState(
-    HANDLE hContext,
-    void* pOutputStateBlock,
-    size_t memSize)
+void SWR_API SwrSaveState(HANDLE hContext, void* pOutputStateBlock, size_t memSize)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    auto pSrc = GetDrawState(pContext);
+    SWR_CONTEXT* pContext = GetContext(hContext);
+    auto         pSrc     = GetDrawState(pContext);
      SWR_ASSERT(pOutputStateBlock && memSize >= sizeof(*pSrc));
  
      memcpy(pOutputStateBlock, pSrc, sizeof(*pSrc));
  }
  
-void SWR_API SwrRestoreState(
-    HANDLE hContext,
-    const void* pStateBlock,
-    size_t memSize)
+void SWR_API SwrRestoreState(HANDLE hContext, const void* pStateBlock, size_t memSize)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    auto pDst = GetDrawState(pContext);
+    SWR_CONTEXT* pContext = GetContext(hContext);
+    auto         pDst     = GetDrawState(pContext);
      SWR_ASSERT(pStateBlock && memSize >= sizeof(*pDst));
  
      memcpy(pDst, pStateBlock, sizeof(*pDst));
  }
  
-void SetupDefaultState(SWR_CONTEXT *pContext)
+void SetupDefaultState(SWR_CONTEXT* pContext)
  {
      API_STATE* pState = GetDrawState(pContext);
  
-    pState->rastState.cullMode = SWR_CULLMODE_NONE;
+    pState->rastState.cullMode     = SWR_CULLMODE_NONE;
      pState->rastState.frontWinding = SWR_FRONTWINDING_CCW;
  
-    pState->depthBoundsState.depthBoundsTestEnable = false;
+    pState->depthBoundsState.depthBoundsTestEnable   = false;
      pState->depthBoundsState.depthBoundsTestMinValue = 0.0f;
      pState->depthBoundsState.depthBoundsTestMaxValue = 1.0f;
  }
  
-void SWR_API SwrSync(
-    HANDLE hContext,
-    PFN_CALLBACK_FUNC pfnFunc,
-    uint64_t userData,
-    uint64_t userData2,
-    uint64_t userData3)
+void SWR_API SwrSync(HANDLE            hContext,
+                     PFN_CALLBACK_FUNC pfnFunc,
+                     uint64_t          userData,
+                     uint64_t          userData2,
+                     uint64_t          userData3)
  {
      SWR_ASSERT(pfnFunc != nullptr);
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      RDTSC_BEGIN(APISync, 0);
  
-    pDC->FeWork.type = SYNC;
+    pDC->FeWork.type    = SYNC;
      pDC->FeWork.pfnWork = ProcessSync;
  
      // Setup callback function
      pDC->retireCallback.pfnCallbackFunc = pfnFunc;
-    pDC->retireCallback.userData = userData;
-    pDC->retireCallback.userData2 = userData2;
-    pDC->retireCallback.userData3 = userData3;
+    pDC->retireCallback.userData        = userData;
+    pDC->retireCallback.userData2       = userData2;
+    pDC->retireCallback.userData3       = userData3;
  
      AR_API_EVENT(SwrSyncEvent(pDC->drawId));
  
-    //enqueue
+    // enqueue
      QueueDraw(pContext);
  
      RDTSC_END(APISync, 1);
@@ -515,15 +519,15 @@ void SWR_API SwrSync(
  
  void SwrStallBE(HANDLE hContext)
  {
-    SWR_CONTEXT* pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      pDC->dependent = true;
  }
  
  void SwrWaitForIdle(HANDLE hContext)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
+    SWR_CONTEXT* pContext = GetContext(hContext);
  
      RDTSC_BEGIN(APIWaitForIdle, 0);
  
@@ -537,7 +541,7 @@ void SwrWaitForIdle(HANDLE hContext)
  
  void SwrWaitForIdleFE(HANDLE hContext)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
+    SWR_CONTEXT* pContext = GetContext(hContext);
  
      RDTSC_BEGIN(APIWaitForIdle, 0);
  
@@ -549,42 +553,34 @@ void SwrWaitForIdleFE(HANDLE hContext)
      RDTSC_END(APIWaitForIdle, 1);
  }
  
-void SwrSetVertexBuffers(
-    HANDLE hContext,
-    uint32_t numBuffers,
-    const SWR_VERTEX_BUFFER_STATE* pVertexBuffers)
+void SwrSetVertexBuffers(HANDLE                         hContext,
+                         uint32_t                       numBuffers,
+                         const SWR_VERTEX_BUFFER_STATE* pVertexBuffers)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      for (uint32_t i = 0; i < numBuffers; ++i)
      {
-        const SWR_VERTEX_BUFFER_STATE *pVB = &pVertexBuffers[i];
-        pState->vertexBuffers[pVB->index] = *pVB;
+        const SWR_VERTEX_BUFFER_STATE* pVB = &pVertexBuffers[i];
+        pState->vertexBuffers[pVB->index]  = *pVB;
      }
  }
  
-void SwrSetIndexBuffer(
-    HANDLE hContext,
-    const SWR_INDEX_BUFFER_STATE* pIndexBuffer)
+void SwrSetIndexBuffer(HANDLE hContext, const SWR_INDEX_BUFFER_STATE* pIndexBuffer)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      pState->indexBuffer = *pIndexBuffer;
  }
  
-void SwrSetFetchFunc(
-    HANDLE hContext,
-    PFN_FETCH_FUNC    pfnFetchFunc)
+void SwrSetFetchFunc(HANDLE hContext, PFN_FETCH_FUNC pfnFetchFunc)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      pState->pfnFetchFunc = pfnFetchFunc;
  }
  
-void SwrSetSoFunc(
-    HANDLE hContext,
-    PFN_SO_FUNC    pfnSoFunc,
-    uint32_t streamIndex)
+void SwrSetSoFunc(HANDLE hContext, PFN_SO_FUNC pfnSoFunc, uint32_t streamIndex)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
@@ -593,19 +589,14 @@ void SwrSetSoFunc(
      pState->pfnSoFunc[streamIndex] = pfnSoFunc;
  }
  
-void SwrSetSoState(
-    HANDLE hContext,
-    SWR_STREAMOUT_STATE* pSoState)
+void SwrSetSoState(HANDLE hContext, SWR_STREAMOUT_STATE* pSoState)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      pState->soState = *pSoState;
  }
  
-void SwrSetSoBuffers(
-    HANDLE hContext,
-    SWR_STREAMOUT_BUFFER* pSoBuffer,
-    uint32_t slot)
+void SwrSetSoBuffers(HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t slot)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
@@ -614,168 +605,136 @@ void SwrSetSoBuffers(
      pState->soBuffer[slot] = *pSoBuffer;
  }
  
-void SwrSetVertexFunc(
-    HANDLE hContext,
-    PFN_VERTEX_FUNC pfnVertexFunc)
+void SwrSetVertexFunc(HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      pState->pfnVertexFunc = pfnVertexFunc;
  }
  
-void SwrSetFrontendState(
-    HANDLE hContext,
-    SWR_FRONTEND_STATE *pFEState)
+void SwrSetFrontendState(HANDLE hContext, SWR_FRONTEND_STATE* pFEState)
  {
-    API_STATE* pState = GetDrawState(GetContext(hContext));
+    API_STATE* pState     = GetDrawState(GetContext(hContext));
      pState->frontendState = *pFEState;
  }
  
-void SwrSetGsState(
-    HANDLE hContext,
-    SWR_GS_STATE *pGSState)
+void SwrSetGsState(HANDLE hContext, SWR_GS_STATE* pGSState)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
-    pState->gsState = *pGSState;
+    pState->gsState   = *pGSState;
  }
  
-void SwrSetGsFunc(
-    HANDLE hContext,
-    PFN_GS_FUNC pfnGsFunc)
+void SwrSetGsFunc(HANDLE hContext, PFN_GS_FUNC pfnGsFunc)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
      pState->pfnGsFunc = pfnGsFunc;
  }
  
-void SwrSetCsFunc(
-    HANDLE hContext,
-    PFN_CS_FUNC pfnCsFunc,
-    uint32_t totalThreadsInGroup,
-    uint32_t totalSpillFillSize,
-    uint32_t scratchSpaceSizePerInstance,
-    uint32_t numInstances)
-{
-    API_STATE* pState = GetDrawState(GetContext(hContext));
-    pState->pfnCsFunc = pfnCsFunc;
-    pState->totalThreadsInGroup = totalThreadsInGroup;
-    pState->totalSpillFillSize = totalSpillFillSize;
-    pState->scratchSpaceSize = scratchSpaceSizePerInstance;
+void SwrSetCsFunc(HANDLE      hContext,
+                  PFN_CS_FUNC pfnCsFunc,
+                  uint32_t    totalThreadsInGroup,
+                  uint32_t    totalSpillFillSize,
+                  uint32_t    scratchSpaceSizePerInstance,
+                  uint32_t    numInstances)
+{
+    API_STATE* pState                = GetDrawState(GetContext(hContext));
+    pState->pfnCsFunc                = pfnCsFunc;
+    pState->totalThreadsInGroup      = totalThreadsInGroup;
+    pState->totalSpillFillSize       = totalSpillFillSize;
+    pState->scratchSpaceSize         = scratchSpaceSizePerInstance;
      pState->scratchSpaceNumInstances = numInstances;
  }
  
-void SwrSetTsState(
-    HANDLE hContext,
-    SWR_TS_STATE *pState)
+void SwrSetTsState(HANDLE hContext, SWR_TS_STATE* pState)
  {
      API_STATE* pApiState = GetDrawState(GetContext(hContext));
-    pApiState->tsState = *pState;
+    pApiState->tsState   = *pState;
  }
  
-void SwrSetHsFunc(
-    HANDLE hContext,
-    PFN_HS_FUNC pfnFunc)
+void SwrSetHsFunc(HANDLE hContext, PFN_HS_FUNC pfnFunc)
  {
      API_STATE* pApiState = GetDrawState(GetContext(hContext));
      pApiState->pfnHsFunc = pfnFunc;
  }
  
-void SwrSetDsFunc(
-    HANDLE hContext,
-    PFN_DS_FUNC pfnFunc)
+void SwrSetDsFunc(HANDLE hContext, PFN_DS_FUNC pfnFunc)
  {
      API_STATE* pApiState = GetDrawState(GetContext(hContext));
      pApiState->pfnDsFunc = pfnFunc;
  }
  
-void SwrSetDepthStencilState(
-    HANDLE hContext,
-    SWR_DEPTH_STENCIL_STATE *pDSState)
+void SwrSetDepthStencilState(HANDLE hContext, SWR_DEPTH_STENCIL_STATE* pDSState)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      pState->depthStencilState = *pDSState;
  }
  
-void SwrSetBackendState(
-    HANDLE hContext,
-    SWR_BACKEND_STATE *pBEState)
+void SwrSetBackendState(HANDLE hContext, SWR_BACKEND_STATE* pBEState)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      pState->backendState = *pBEState;
  }
  
-void SwrSetDepthBoundsState(
-    HANDLE hContext,
-    SWR_DEPTH_BOUNDS_STATE *pDBState)
+void SwrSetDepthBoundsState(HANDLE hContext, SWR_DEPTH_BOUNDS_STATE* pDBState)
  {
      API_STATE* pState = GetDrawState(GetContext(hContext));
  
      pState->depthBoundsState = *pDBState;
  }
  
-void SwrSetPixelShaderState(
-    HANDLE hContext,
-    SWR_PS_STATE *pPSState)
+void SwrSetPixelShaderState(HANDLE hContext, SWR_PS_STATE* pPSState)
  {
-    API_STATE *pState = GetDrawState(GetContext(hContext));
-    pState->psState = *pPSState;
+    API_STATE* pState = GetDrawState(GetContext(hContext));
+    pState->psState   = *pPSState;
  }
  
-void SwrSetBlendState(
-    HANDLE hContext,
-    SWR_BLEND_STATE *pBlendState)
+void SwrSetBlendState(HANDLE hContext, SWR_BLEND_STATE* pBlendState)
  {
-    API_STATE *pState = GetDrawState(GetContext(hContext));
+    API_STATE* pState = GetDrawState(GetContext(hContext));
      memcpy(&pState->blendState, pBlendState, sizeof(SWR_BLEND_STATE));
  }
  
-void SwrSetBlendFunc(
-    HANDLE hContext,
-    uint32_t renderTarget,
-    PFN_BLEND_JIT_FUNC pfnBlendFunc)
+void SwrSetBlendFunc(HANDLE hContext, uint32_t renderTarget, PFN_BLEND_JIT_FUNC pfnBlendFunc)
  {
      SWR_ASSERT(renderTarget < SWR_NUM_RENDERTARGETS);
-    API_STATE *pState = GetDrawState(GetContext(hContext));
+    API_STATE* pState                  = GetDrawState(GetContext(hContext));
      pState->pfnBlendFunc[renderTarget] = pfnBlendFunc;
  }
  
  // update guardband multipliers for the viewport
-void updateGuardbands(API_STATE *pState)
+void updateGuardbands(API_STATE* pState)
  {
      uint32_t numGbs = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
  
-    for(uint32_t i = 0; i < numGbs; ++i)
+    for (uint32_t i = 0; i < numGbs; ++i)
      {
          // guardband center is viewport center
-        pState->gbState.left[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
-        pState->gbState.right[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
-        pState->gbState.top[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
+        pState->gbState.left[i]   = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
+        pState->gbState.right[i]  = KNOB_GUARDBAND_WIDTH / pState->vp[i].width;
+        pState->gbState.top[i]    = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
          pState->gbState.bottom[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height;
      }
  }
  
-void SwrSetRastState(
-    HANDLE hContext,
-    const SWR_RASTSTATE *pRastState)
+void SwrSetRastState(HANDLE hContext, const SWR_RASTSTATE* pRastState)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    API_STATE* pState = GetDrawState(pContext);
+    SWR_CONTEXT* pContext = GetContext(hContext);
+    API_STATE*   pState   = GetDrawState(pContext);
  
      memcpy(&pState->rastState, pRastState, sizeof(SWR_RASTSTATE));
  }
  
-void SwrSetViewports(
-    HANDLE hContext,
-    uint32_t numViewports,
-    const SWR_VIEWPORT* pViewports,
-    const SWR_VIEWPORT_MATRICES* pMatrices)
+void SwrSetViewports(HANDLE                       hContext,
+                     uint32_t                     numViewports,
+                     const SWR_VIEWPORT*          pViewports,
+                     const SWR_VIEWPORT_MATRICES* pMatrices)
  {
-    SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS,
-        "Invalid number of viewports.");
+    SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of viewports.");
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    API_STATE* pState = GetDrawState(pContext);
+    SWR_CONTEXT* pContext = GetContext(hContext);
+    API_STATE*   pState   = GetDrawState(pContext);
  
      memcpy(&pState->vp[0], pViewports, sizeof(SWR_VIEWPORT) * numViewports);
      // @todo Faster to copy portions of the SOA or just copy all of it?
@@ -784,27 +743,24 @@ void SwrSetViewports(
      updateGuardbands(pState);
  }
  
-void SwrSetScissorRects(
-    HANDLE hContext,
-    uint32_t numScissors,
-    const SWR_RECT* pScissors)
+void SwrSetScissorRects(HANDLE hContext, uint32_t numScissors, const SWR_RECT* pScissors)
  {
-    SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS,
-        "Invalid number of scissor rects.");
+    SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS, "Invalid number of scissor rects.");
  
      API_STATE* pState = GetDrawState(GetContext(hContext));
      memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0]));
  };
  
-void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
+void SetupMacroTileScissors(DRAW_CONTEXT* pDC)
  {
-    API_STATE *pState = &pDC->pState->state;
-    uint32_t numScissors = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
+    API_STATE* pState = &pDC->pState->state;
+    uint32_t numScissors =
+        pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1;
      pState->scissorsTileAligned = true;
  
      for (uint32_t index = 0; index < numScissors; ++index)
      {
-        SWR_RECT &scissorInFixedPoint = pState->scissorsInFixedPoint[index];
+        SWR_RECT& scissorInFixedPoint = pState->scissorsInFixedPoint[index];
  
          // Set up scissor dimensions based on scissor or viewport
          if (pState->rastState.scissorEnable)
@@ -813,8 +769,9 @@ void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
          }
          else
          {
-            // the vp width and height must be added to origin un-rounded then the result round to -inf.
-            // The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
+            // the vp width and height must be added to origin un-rounded then the result round to
+            // -inf. The cast to int works for rounding assuming all [left, right, top, bottom] are
+            // positive.
              scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x;
              scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width);
              scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y;
@@ -826,7 +783,7 @@ void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
  
          // Test for tile alignment
          bool tileAligned;
-        tileAligned  = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
+        tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0;
          tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0;
          tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0;
          tileAligned &= (scissorInFixedPoint.ymax % KNOB_TILE_Y_DIM) == 0;
@@ -848,12 +805,12 @@ void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
  
  // templated backend function tables
  
-void SetupPipeline(DRAW_CONTEXT *pDC)
+void SetupPipeline(DRAW_CONTEXT* pDC)
  {
-    DRAW_STATE* pState = pDC->pState;
-    const SWR_RASTSTATE &rastState = pState->state.rastState;
-    const SWR_PS_STATE &psState = pState->state.psState;
-    BACKEND_FUNCS& backendFuncs = pState->backendFuncs;
+    DRAW_STATE*          pState       = pDC->pState;
+    const SWR_RASTSTATE& rastState    = pState->state.rastState;
+    const SWR_PS_STATE&  psState      = pState->state.psState;
+    BACKEND_FUNCS&       backendFuncs = pState->backendFuncs;
  
      // setup backend
      if (psState.pfnPixelShader == nullptr)
@@ -863,35 +820,46 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
      else
      {
          const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0;
-        const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
-        const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
-        const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesUAV)) ? 1 : 0;
+        const bool     bMultisampleEnable =
+            ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0;
+        const uint32_t centroid =
+            ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0;
+        const uint32_t canEarlyZ =
+            (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesUAV)) ? 1 : 0;
          SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask;
-        
+
          // select backend function
-        switch(psState.shadingRate)
+        switch (psState.shadingRate)
          {
          case SWR_SHADING_RATE_PIXEL:
-            if(bMultisampleEnable)
+            if (bMultisampleEnable)
              {
                  // always need to generate I & J per sample for Z interpolation
-                barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
-                backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage]
-                                                                [centroid][forcedSampleCount][canEarlyZ]
+                barycentricsMask =
+                    (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
+                backendFuncs.pfnBackend =
+                    gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern]
+                                          [psState.inputCoverage][centroid][forcedSampleCount]
+                                          [canEarlyZ]
                      ;
              }
              else
              {
                  // always need to generate I & J per pixel for Z interpolation
-                barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
-                backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
+                barycentricsMask =
+                    (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK);
+                backendFuncs.pfnBackend =
+                    gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ];
              }
              break;
          case SWR_SHADING_RATE_SAMPLE:
              SWR_ASSERT(rastState.bIsCenterPattern != true);
              // always need to generate I & J per sample for Z interpolation
-            barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
-            backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ];
+            barycentricsMask =
+                (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK);
+            backendFuncs.pfnBackend =
+                gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid]
+                                       [canEarlyZ];
              break;
          default:
              SWR_ASSERT(0 && "Invalid shading rate");
@@ -909,10 +877,10 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
      {
      case TOP_POINT_LIST:
          pState->pfnProcessPrims = ClipPoints;
-        pfnBinner = BinPoints;
+        pfnBinner               = BinPoints;
  #if USE_SIMD16_FRONTEND
          pState->pfnProcessPrims_simd16 = ClipPoints_simd16;
-        pfnBinner_simd16 = BinPoints_simd16;
+        pfnBinner_simd16               = BinPoints_simd16;
  #endif
          break;
      case TOP_LINE_LIST:
@@ -921,15 +889,15 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
      case TOP_LINE_LIST_ADJ:
      case TOP_LISTSTRIP_ADJ:
          pState->pfnProcessPrims = ClipLines;
-        pfnBinner = BinLines;
+        pfnBinner               = BinLines;
  #if USE_SIMD16_FRONTEND
          pState->pfnProcessPrims_simd16 = ClipLines_simd16;
-        pfnBinner_simd16 = BinLines_simd16;
+        pfnBinner_simd16               = BinLines_simd16;
  #endif
          break;
      default:
          pState->pfnProcessPrims = ClipTriangles;
-        pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0));
+        pfnBinner               = GetBinTrianglesFunc((rastState.conservativeRast > 0));
  #if USE_SIMD16_FRONTEND
          pState->pfnProcessPrims_simd16 = ClipTriangles_simd16;
          pfnBinner_simd16 = GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0));
@@ -971,14 +939,16 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
  
  
      // set up the frontend attribute count
-    pState->state.feNumAttributes = 0;
+    pState->state.feNumAttributes         = 0;
      const SWR_BACKEND_STATE& backendState = pState->state.backendState;
      if (backendState.swizzleEnable)
      {
          // attribute swizzling is enabled, iterate over the map and record the max attribute used
          for (uint32_t i = 0; i < backendState.numAttributes; ++i)
          {
-            pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
+            pState->state.feNumAttributes =
+                std::max(pState->state.feNumAttributes,
+                         (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1);
          }
      }
      else
@@ -997,37 +967,44 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
          DWORD maxAttrib;
          if (_BitScanReverse64(&maxAttrib, streamMasks))
          {
-            pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
+            pState->state.feNumAttributes =
+                std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1));
          }
      }
  
      // complicated logic to test for cases where we don't need backing hottile memory for a draw
-    // have to check for the special case where depth/stencil test is enabled but depthwrite is disabled.
-    pState->state.depthHottileEnable = ((!(pState->state.depthStencilState.depthTestEnable &&
-                                           !pState->state.depthStencilState.depthWriteEnable &&
-                                           !pState->state.depthBoundsState.depthBoundsTestEnable &&
-                                           pState->state.depthStencilState.depthTestFunc == ZFUNC_ALWAYS)) && 
-                                        (pState->state.depthStencilState.depthTestEnable || 
-                                         pState->state.depthStencilState.depthWriteEnable ||
-                                         pState->state.depthBoundsState.depthBoundsTestEnable)) ? true : false;
-
-    pState->state.stencilHottileEnable = (((!(pState->state.depthStencilState.stencilTestEnable &&
-                                             !pState->state.depthStencilState.stencilWriteEnable &&
-                                              pState->state.depthStencilState.stencilTestFunc == ZFUNC_ALWAYS)) ||
-                                          // for stencil we have to check the double sided state as well
-                                          (!(pState->state.depthStencilState.doubleSidedStencilTestEnable &&
-                                             !pState->state.depthStencilState.stencilWriteEnable &&
-                                              pState->state.depthStencilState.backfaceStencilTestFunc == ZFUNC_ALWAYS))) && 
-                                          (pState->state.depthStencilState.stencilTestEnable  ||
-                                           pState->state.depthStencilState.stencilWriteEnable)) ? true : false;
-
+    // have to check for the special case where depth/stencil test is enabled but depthwrite is
+    // disabled.
+    pState->state.depthHottileEnable =
+        ((!(pState->state.depthStencilState.depthTestEnable &&
+            !pState->state.depthStencilState.depthWriteEnable &&
+            !pState->state.depthBoundsState.depthBoundsTestEnable &&
+            pState->state.depthStencilState.depthTestFunc == ZFUNC_ALWAYS)) &&
+         (pState->state.depthStencilState.depthTestEnable ||
+          pState->state.depthStencilState.depthWriteEnable ||
+          pState->state.depthBoundsState.depthBoundsTestEnable))
+            ? true
+            : false;
+
+    pState->state.stencilHottileEnable =
+        (((!(pState->state.depthStencilState.stencilTestEnable &&
+             !pState->state.depthStencilState.stencilWriteEnable &&
+             pState->state.depthStencilState.stencilTestFunc == ZFUNC_ALWAYS)) ||
+          // for stencil we have to check the double sided state as well
+          (!(pState->state.depthStencilState.doubleSidedStencilTestEnable &&
+             !pState->state.depthStencilState.stencilWriteEnable &&
+             pState->state.depthStencilState.backfaceStencilTestFunc == ZFUNC_ALWAYS))) &&
+         (pState->state.depthStencilState.stencilTestEnable ||
+          pState->state.depthStencilState.stencilWriteEnable))
+            ? true
+            : false;
  
      uint32_t hotTileEnable = pState->state.psState.renderTargetMask;
  
      // Disable hottile for surfaces with no writes
      if (psState.pfnPixelShader != nullptr)
      {
-        DWORD rt;
+        DWORD    rt;
          uint32_t rtMask = pState->state.psState.renderTargetMask;
          while (_BitScanForward(&rt, rtMask))
          {
@@ -1045,33 +1022,39 @@ void SetupPipeline(DRAW_CONTEXT *pDC)
  
      pState->state.colorHottileEnable = hotTileEnable;
  
-
      // Setup depth quantization function
      if (pState->state.depthHottileEnable)
      {
          switch (pState->state.rastState.depthFormat)
          {
-        case R32_FLOAT_X8X24_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT_X8X24_TYPELESS > ; break;
-        case R32_FLOAT: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; break;
-        case R24_UNORM_X8_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R24_UNORM_X8_TYPELESS > ; break;
-        case R16_UNORM: pState->state.pfnQuantizeDepth = QuantizeDepth < R16_UNORM > ; break;
-        default: SWR_INVALID("Unsupported depth format for depth quantiztion.");
-            pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+        case R32_FLOAT_X8X24_TYPELESS:
+            pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT_X8X24_TYPELESS>;
+            break;
+        case R32_FLOAT:
+            pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
+            break;
+        case R24_UNORM_X8_TYPELESS:
+            pState->state.pfnQuantizeDepth = QuantizeDepth<R24_UNORM_X8_TYPELESS>;
+            break;
+        case R16_UNORM:
+            pState->state.pfnQuantizeDepth = QuantizeDepth<R16_UNORM>;
+            break;
+        default:
+            SWR_INVALID("Unsupported depth format for depth quantiztion.");
+            pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
          }
      }
      else
      {
          // set up pass-through quantize if depth isn't enabled
-        pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ;
+        pState->state.pfnQuantizeDepth = QuantizeDepth<R32_FLOAT>;
      }
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief InitDraw
  /// @param pDC - Draw context to initialize for this draw.
-void InitDraw(
-    DRAW_CONTEXT *pDC,
-    bool isSplitDraw)
+void InitDraw(DRAW_CONTEXT* pDC, bool isSplitDraw)
  {
      // We don't need to re-setup the scissors/pipeline state again for split draw.
      if (isSplitDraw == false)
@@ -1079,7 +1062,6 @@ void InitDraw(
          SetupMacroTileScissors(pDC);
          SetupPipeline(pDC);
      }
-    
  
  }
  
@@ -1087,10 +1069,7 @@ void InitDraw(
  /// @brief We can split the draw for certain topologies for better performance.
  /// @param totalVerts - Total vertices for draw
  /// @param topology - Topology used for draw
-uint32_t MaxVertsPerDraw(
-    DRAW_CONTEXT* pDC,
-    uint32_t totalVerts,
-    PRIMITIVE_TOPOLOGY topology)
+uint32_t MaxVertsPerDraw(DRAW_CONTEXT* pDC, uint32_t totalVerts, PRIMITIVE_TOPOLOGY topology)
  {
      API_STATE& state = pDC->pState->state;
  
@@ -1157,7 +1136,7 @@ uint32_t MaxVertsPerDraw(
          if (pDC->pState->state.tsState.tsEnable)
          {
              uint32_t vertsPerPrim = topology - TOP_PATCHLIST_BASE;
-            vertsPerDraw = vertsPerPrim * KNOB_MAX_TESS_PRIMS_PER_DRAW;
+            vertsPerDraw          = vertsPerPrim * KNOB_MAX_TESS_PRIMS_PER_DRAW;
          }
          break;
      default:
@@ -1168,7 +1147,6 @@ uint32_t MaxVertsPerDraw(
      return vertsPerDraw;
  }
  
-
  //////////////////////////////////////////////////////////////////////////
  /// @brief DrawInstanced
  /// @param hContext - Handle passed back from SwrCreateContext
@@ -1176,31 +1154,31 @@ uint32_t MaxVertsPerDraw(
  /// @param numVerts - How many vertices to read sequentially from vertex data (per instance).
  /// @param startVertex - Specifies start vertex for draw. (vertex data)
  /// @param numInstances - How many instances to render.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void DrawInstanced(
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numVertices,
-    uint32_t startVertex,
-    uint32_t numInstances = 1,
-    uint32_t startInstance = 0)
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void DrawInstanced(HANDLE             hContext,
+                   PRIMITIVE_TOPOLOGY topology,
+                   uint32_t           numVertices,
+                   uint32_t           startVertex,
+                   uint32_t           numInstances  = 1,
+                   uint32_t           startInstance = 0)
  {
      if (KNOB_TOSS_DRAW)
      {
          return;
      }
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      RDTSC_BEGIN(APIDraw, pDC->drawId);
  
      uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology);
-    uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw);
-    uint32_t remainingVerts = numVertices;
+    uint32_t primsPerDraw    = GetNumPrims(topology, maxVertsPerDraw);
+    uint32_t remainingVerts  = numVertices;
  
-    API_STATE    *pState = &pDC->pState->state;
-    pState->topology = topology;
+    API_STATE* pState  = &pDC->pState->state;
+    pState->topology   = topology;
      pState->forceFront = false;
  
      // disable culling for points/lines
@@ -1208,7 +1186,7 @@ void DrawInstanced(
      if (topology == TOP_POINT_LIST)
      {
          pState->rastState.cullMode = SWR_CULLMODE_NONE;
-        pState->forceFront = true;
+        pState->forceFront         = true;
      }
      else if (topology == TOP_RECT_LIST)
      {
@@ -1218,42 +1196,50 @@ void DrawInstanced(
      int draw = 0;
      while (remainingVerts)
      {
-        uint32_t numVertsForDraw = (remainingVerts < maxVertsPerDraw) ?
-        remainingVerts : maxVertsPerDraw;
+        uint32_t numVertsForDraw =
+            (remainingVerts < maxVertsPerDraw) ? remainingVerts : maxVertsPerDraw;
  
-        bool isSplitDraw = (draw > 0) ? true : false;
-        DRAW_CONTEXT* pDC = GetDrawContext(pContext, isSplitDraw);
+        bool          isSplitDraw = (draw > 0) ? true : false;
+        DRAW_CONTEXT* pDC         = GetDrawContext(pContext, isSplitDraw);
          InitDraw(pDC, isSplitDraw);
  
-        pDC->FeWork.type = DRAW;
-        pDC->FeWork.pfnWork = GetProcessDrawFunc(
-            false,  // IsIndexed
-            false, // bEnableCutIndex
-            pState->tsState.tsEnable,
-            pState->gsState.gsEnable,
-            pState->soState.soEnable,
-            pDC->pState->pfnProcessPrims != nullptr);
-        pDC->FeWork.desc.draw.numVerts = numVertsForDraw;
-        pDC->FeWork.desc.draw.startVertex = startVertex;
-        pDC->FeWork.desc.draw.numInstances = numInstances;
+        pDC->FeWork.type                    = DRAW;
+        pDC->FeWork.pfnWork                 = GetProcessDrawFunc(false, // IsIndexed
+                                                 false, // bEnableCutIndex
+                                                 pState->tsState.tsEnable,
+                                                 pState->gsState.gsEnable,
+                                                 pState->soState.soEnable,
+                                                 pDC->pState->pfnProcessPrims != nullptr);
+        pDC->FeWork.desc.draw.numVerts      = numVertsForDraw;
+        pDC->FeWork.desc.draw.startVertex   = startVertex;
+        pDC->FeWork.desc.draw.numInstances  = numInstances;
          pDC->FeWork.desc.draw.startInstance = startInstance;
-        pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
+        pDC->FeWork.desc.draw.startPrimID   = draw * primsPerDraw;
          pDC->FeWork.desc.draw.startVertexID = draw * maxVertsPerDraw;
  
          pDC->cleanupState = (remainingVerts == numVertsForDraw);
  
-        //enqueue DC
+        // enqueue DC
          QueueDraw(pContext);
  
-        AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertsForDraw, startVertex, numInstances,
-            startInstance, pState->tsState.tsEnable, pState->gsState.gsEnable, pState->soState.soEnable, pState->gsState.outputTopology, draw));
+        AR_API_EVENT(DrawInstancedEvent(pDC->drawId,
+                                        topology,
+                                        numVertsForDraw,
+                                        startVertex,
+                                        numInstances,
+                                        startInstance,
+                                        pState->tsState.tsEnable,
+                                        pState->gsState.gsEnable,
+                                        pState->soState.soEnable,
+                                        pState->gsState.outputTopology,
+                                        draw));
  
          remainingVerts -= numVertsForDraw;
          draw++;
      }
  
      // restore culling state
-    pDC = GetDrawContext(pContext);
+    pDC                                   = GetDrawContext(pContext);
      pDC->pState->state.rastState.cullMode = oldCullMode;
  
      RDTSC_END(APIDraw, numVertices * numInstances);
@@ -1265,11 +1251,10 @@ void DrawInstanced(
  /// @param topology - Specifies topology for draw.
  /// @param startVertex - Specifies start vertex in vertex buffer for draw.
  /// @param primCount - Number of vertices.
-void SwrDraw(
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t startVertex,
-    uint32_t numVertices)
+void SwrDraw(HANDLE             hContext,
+             PRIMITIVE_TOPOLOGY topology,
+             uint32_t           startVertex,
+             uint32_t           numVertices)
  {
      DrawInstanced(hContext, topology, numVertices, startVertex);
  }
@@ -1281,17 +1266,17 @@ void SwrDraw(
  /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
  /// @param numInstances - How many instances to render.
  /// @param startVertex - Specifies start vertex for draw. (vertex data)
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void SwrDrawInstanced(
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numVertsPerInstance,
-    uint32_t numInstances,
-    uint32_t startVertex,
-    uint32_t startInstance
-    )
-{
-    DrawInstanced(hContext, topology, numVertsPerInstance, startVertex, numInstances, startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void SwrDrawInstanced(HANDLE             hContext,
+                      PRIMITIVE_TOPOLOGY topology,
+                      uint32_t           numVertsPerInstance,
+                      uint32_t           numInstances,
+                      uint32_t           startVertex,
+                      uint32_t           startInstance)
+{
+    DrawInstanced(
+        hContext, topology, numVertsPerInstance, startVertex, numInstances, startInstance);
  }
  
  //////////////////////////////////////////////////////////////////////////
@@ -1302,46 +1287,52 @@ void SwrDrawInstanced(
  /// @param indexOffset - Starting index into index buffer.
  /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
  /// @param numInstances - Number of instances to render.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void DrawIndexedInstance(
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numIndices,
-    uint32_t indexOffset,
-    int32_t baseVertex,
-    uint32_t numInstances = 1,
-    uint32_t startInstance = 0)
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void DrawIndexedInstance(HANDLE             hContext,
+                         PRIMITIVE_TOPOLOGY topology,
+                         uint32_t           numIndices,
+                         uint32_t           indexOffset,
+                         int32_t            baseVertex,
+                         uint32_t           numInstances  = 1,
+                         uint32_t           startInstance = 0)
  {
      if (KNOB_TOSS_DRAW)
      {
          return;
      }
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-    API_STATE* pState = &pDC->pState->state;
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
+    API_STATE*    pState   = &pDC->pState->state;
  
      RDTSC_BEGIN(APIDrawIndexed, pDC->drawId);
  
      uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology);
-    uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw);
-    uint32_t remainingIndices = numIndices;
+    uint32_t primsPerDraw      = GetNumPrims(topology, maxIndicesPerDraw);
+    uint32_t remainingIndices  = numIndices;
  
      uint32_t indexSize = 0;
      switch (pState->indexBuffer.format)
      {
-    case R32_UINT: indexSize = sizeof(uint32_t); break;
-    case R16_UINT: indexSize = sizeof(uint16_t); break;
-    case R8_UINT: indexSize = sizeof(uint8_t); break;
+    case R32_UINT:
+        indexSize = sizeof(uint32_t);
+        break;
+    case R16_UINT:
+        indexSize = sizeof(uint16_t);
+        break;
+    case R8_UINT:
+        indexSize = sizeof(uint8_t);
+        break;
      default:
          SWR_INVALID("Invalid index buffer format: %d", pState->indexBuffer.format);
      }
  
-    int draw = 0;
+    int      draw = 0;
      gfxptr_t xpIB = pState->indexBuffer.xpIndices;
      xpIB += (uint64_t)indexOffset * (uint64_t)indexSize;
  
-    pState->topology = topology;
+    pState->topology   = topology;
      pState->forceFront = false;
  
      // disable culling for points/lines
@@ -1349,7 +1340,7 @@ void DrawIndexedInstance(
      if (topology == TOP_POINT_LIST)
      {
          pState->rastState.cullMode = SWR_CULLMODE_NONE;
-        pState->forceFront = true;
+        pState->forceFront         = true;
      }
      else if (topology == TOP_RECT_LIST)
      {
@@ -1358,8 +1349,8 @@ void DrawIndexedInstance(
  
      while (remainingIndices)
      {
-        uint32_t numIndicesForDraw = (remainingIndices < maxIndicesPerDraw) ?
-        remainingIndices : maxIndicesPerDraw;
+        uint32_t numIndicesForDraw =
+            (remainingIndices < maxIndicesPerDraw) ? remainingIndices : maxIndicesPerDraw;
  
          // When breaking up draw, we need to obtain new draw context for each iteration.
          bool isSplitDraw = (draw > 0) ? true : false;
@@ -1367,31 +1358,40 @@ void DrawIndexedInstance(
          pDC = GetDrawContext(pContext, isSplitDraw);
          InitDraw(pDC, isSplitDraw);
  
-        pDC->FeWork.type = DRAW;
-        pDC->FeWork.pfnWork = GetProcessDrawFunc(
-            true,   // IsIndexed
-            pState->frontendState.bEnableCutIndex,
-            pState->tsState.tsEnable,
-            pState->gsState.gsEnable,
-            pState->soState.soEnable,
-            pDC->pState->pfnProcessPrims != nullptr);
-        pDC->FeWork.desc.draw.pDC = pDC;
+        pDC->FeWork.type                 = DRAW;
+        pDC->FeWork.pfnWork              = GetProcessDrawFunc(true, // IsIndexed
+                                                 pState->frontendState.bEnableCutIndex,
+                                                 pState->tsState.tsEnable,
+                                                 pState->gsState.gsEnable,
+                                                 pState->soState.soEnable,
+                                                 pDC->pState->pfnProcessPrims != nullptr);
+        pDC->FeWork.desc.draw.pDC        = pDC;
          pDC->FeWork.desc.draw.numIndices = numIndicesForDraw;
-        pDC->FeWork.desc.draw.xpIB = xpIB;
-        pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
+        pDC->FeWork.desc.draw.xpIB       = xpIB;
+        pDC->FeWork.desc.draw.type       = pDC->pState->state.indexBuffer.format;
  
-        pDC->FeWork.desc.draw.numInstances = numInstances;
+        pDC->FeWork.desc.draw.numInstances  = numInstances;
          pDC->FeWork.desc.draw.startInstance = startInstance;
-        pDC->FeWork.desc.draw.baseVertex = baseVertex;
-        pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw;
+        pDC->FeWork.desc.draw.baseVertex    = baseVertex;
+        pDC->FeWork.desc.draw.startPrimID   = draw * primsPerDraw;
  
          pDC->cleanupState = (remainingIndices == numIndicesForDraw);
  
-        //enqueue DC
+        // enqueue DC
          QueueDraw(pContext);
  
-        AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndicesForDraw, indexOffset, baseVertex,
-            numInstances, startInstance, pState->tsState.tsEnable, pState->gsState.gsEnable, pState->soState.soEnable, pState->gsState.outputTopology, draw));
+        AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId,
+                                               topology,
+                                               numIndicesForDraw,
+                                               indexOffset,
+                                               baseVertex,
+                                               numInstances,
+                                               startInstance,
+                                               pState->tsState.tsEnable,
+                                               pState->gsState.gsEnable,
+                                               pState->soState.soEnable,
+                                               pState->gsState.outputTopology,
+                                               draw));
  
          xpIB += maxIndicesPerDraw * indexSize;
          remainingIndices -= numIndicesForDraw;
@@ -1399,13 +1399,12 @@ void DrawIndexedInstance(
      }
  
      // Restore culling state
-    pDC = GetDrawContext(pContext);
+    pDC                                   = GetDrawContext(pContext);
      pDC->pState->state.rastState.cullMode = oldCullMode;
- 
+
      RDTSC_END(APIDrawIndexed, numIndices * numInstances);
  }
  
-
  //////////////////////////////////////////////////////////////////////////
  /// @brief DrawIndexed
  /// @param hContext - Handle passed back from SwrCreateContext
@@ -1413,13 +1412,11 @@ void DrawIndexedInstance(
  /// @param numIndices - Number of indices to read sequentially from index buffer.
  /// @param indexOffset - Starting index into index buffer.
  /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-void SwrDrawIndexed(
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numIndices,
-    uint32_t indexOffset,
-    int32_t baseVertex
-    )
+void SwrDrawIndexed(HANDLE             hContext,
+                    PRIMITIVE_TOPOLOGY topology,
+                    uint32_t           numIndices,
+                    uint32_t           indexOffset,
+                    int32_t            baseVertex)
  {
      DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex);
  }
@@ -1432,48 +1429,49 @@ void SwrDrawIndexed(
  /// @param numInstances - Number of instances to render.
  /// @param indexOffset - Starting index into index buffer.
  /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-void SwrDrawIndexedInstanced(
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numIndices,
-    uint32_t numInstances,
-    uint32_t indexOffset,
-    int32_t baseVertex,
-    uint32_t startInstance)
-{
-    DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+void SwrDrawIndexedInstanced(HANDLE             hContext,
+                             PRIMITIVE_TOPOLOGY topology,
+                             uint32_t           numIndices,
+                             uint32_t           numInstances,
+                             uint32_t           indexOffset,
+                             int32_t            baseVertex,
+                             uint32_t           startInstance)
+{
+    DrawIndexedInstance(
+        hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance);
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrInvalidateTiles
  /// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
+/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
+/// invalidate.
  /// @param invalidateRect - The pixel-coordinate rectangle to invalidate.  This will be expanded to
  ///                         be hottile size-aligned.
-void SWR_API SwrInvalidateTiles(
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    const SWR_RECT& invalidateRect)
+void SWR_API SwrInvalidateTiles(HANDLE          hContext,
+                                uint32_t        attachmentMask,
+                                const SWR_RECT& invalidateRect)
  {
      if (KNOB_TOSS_DRAW)
      {
          return;
      }
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
-    pDC->FeWork.type = DISCARDINVALIDATETILES;
-    pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
+    pDC->FeWork.type                                       = DISCARDINVALIDATETILES;
+    pDC->FeWork.pfnWork                                    = ProcessDiscardInvalidateTiles;
      pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
-    pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
+    pDC->FeWork.desc.discardInvalidateTiles.rect           = invalidateRect;
      pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
-    pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
+    pDC->FeWork.desc.discardInvalidateTiles.newTileState   = SWR_TILE_INVALID;
      pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
-    pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
+    pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly  = false;
  
-    //enqueue
+    // enqueue
      QueueDraw(pContext);
  
      AR_API_EVENT(SwrInvalidateTilesEvent(pDC->drawId));
@@ -1485,30 +1483,27 @@ void SWR_API SwrInvalidateTiles(
  /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
  /// @param rect - The pixel-coordinate rectangle to discard.  Only fully-covered hottiles will be
  ///               discarded.
-void SWR_API SwrDiscardRect(
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    const SWR_RECT& rect)
+void SWR_API SwrDiscardRect(HANDLE hContext, uint32_t attachmentMask, const SWR_RECT& rect)
  {
      if (KNOB_TOSS_DRAW)
      {
          return;
      }
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      // Queue a load to the hottile
-    pDC->FeWork.type = DISCARDINVALIDATETILES;
-    pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
+    pDC->FeWork.type                                       = DISCARDINVALIDATETILES;
+    pDC->FeWork.pfnWork                                    = ProcessDiscardInvalidateTiles;
      pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
-    pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
+    pDC->FeWork.desc.discardInvalidateTiles.rect           = rect;
      pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
-    pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
+    pDC->FeWork.desc.discardInvalidateTiles.newTileState   = SWR_TILE_RESOLVED;
      pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
-    pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
+    pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly  = true;
  
-    //enqueue
+    // enqueue
      QueueDraw(pContext);
  
      AR_API_EVENT(SwrDiscardRectEvent(pDC->drawId));
@@ -1520,23 +1515,23 @@ void SWR_API SwrDiscardRect(
  /// @param threadGroupCountX - Number of thread groups dispatched in X direction
  /// @param threadGroupCountY - Number of thread groups dispatched in Y direction
  /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
-void SwrDispatch(
-    HANDLE hContext,
-    uint32_t threadGroupCountX,
-    uint32_t threadGroupCountY,
-    uint32_t threadGroupCountZ)
+void SwrDispatch(HANDLE   hContext,
+                 uint32_t threadGroupCountX,
+                 uint32_t threadGroupCountY,
+                 uint32_t threadGroupCountZ)
  {
      if (KNOB_TOSS_DRAW)
      {
          return;
      }
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      RDTSC_BEGIN(APIDispatch, pDC->drawId);
-    AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
-    pDC->isCompute = true;      // This is a compute context.
+    AR_API_EVENT(
+        DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ));
+    pDC->isCompute = true; // This is a compute context.
  
      COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64);
  
@@ -1545,8 +1540,8 @@ void SwrDispatch(
      pTaskData->threadGroupCountZ = threadGroupCountZ;
  
      uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ;
-    uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
-    pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex];
+    uint32_t dcIndex           = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT;
+    pDC->pDispatch             = &pContext->pDispatchQueueArray[dcIndex];
      pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE);
  
      QueueDispatch(pContext);
@@ -1555,30 +1550,29 @@ void SwrDispatch(
  
  // Deswizzles, converts and stores current contents of the hot tiles to surface
  // described by pState
-void SWR_API SwrStoreTiles(
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    SWR_TILE_STATE postStoreTileState,
-    const SWR_RECT& storeRect)
+void SWR_API SwrStoreTiles(HANDLE          hContext,
+                           uint32_t        attachmentMask,
+                           SWR_TILE_STATE  postStoreTileState,
+                           const SWR_RECT& storeRect)
  {
      if (KNOB_TOSS_DRAW)
      {
          return;
      }
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      RDTSC_BEGIN(APIStoreTiles, pDC->drawId);
  
-    pDC->FeWork.type = STORETILES;
-    pDC->FeWork.pfnWork = ProcessStoreTiles;
-    pDC->FeWork.desc.storeTiles.attachmentMask = attachmentMask;
+    pDC->FeWork.type                               = STORETILES;
+    pDC->FeWork.pfnWork                            = ProcessStoreTiles;
+    pDC->FeWork.desc.storeTiles.attachmentMask     = attachmentMask;
      pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState;
-    pDC->FeWork.desc.storeTiles.rect = storeRect;
+    pDC->FeWork.desc.storeTiles.rect               = storeRect;
      pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect;
  
-    //enqueue
+    // enqueue
      QueueDraw(pContext);
  
      AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId));
@@ -1595,37 +1589,36 @@ void SWR_API SwrStoreTiles(
  /// @param z - depth value use for clearing depth buffer
  /// @param stencil - stencil value used for clearing stencil buffer
  /// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
-void SWR_API SwrClearRenderTarget(
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    uint32_t renderTargetArrayIndex,
-    const float clearColor[4],
-    float z,
-    uint8_t stencil,
-    const SWR_RECT& clearRect)
+void SWR_API SwrClearRenderTarget(HANDLE          hContext,
+                                  uint32_t        attachmentMask,
+                                  uint32_t        renderTargetArrayIndex,
+                                  const float     clearColor[4],
+                                  float           z,
+                                  uint8_t         stencil,
+                                  const SWR_RECT& clearRect)
  {
      if (KNOB_TOSS_DRAW)
      {
          return;
      }
  
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      RDTSC_BEGIN(APIClearRenderTarget, pDC->drawId);
  
-    pDC->FeWork.type = CLEAR;
-    pDC->FeWork.pfnWork = ProcessClear;
+    pDC->FeWork.type            = CLEAR;
+    pDC->FeWork.pfnWork         = ProcessClear;
      pDC->FeWork.desc.clear.rect = clearRect;
      pDC->FeWork.desc.clear.rect &= g_MaxScissorRect;
-    pDC->FeWork.desc.clear.attachmentMask = attachmentMask;
+    pDC->FeWork.desc.clear.attachmentMask         = attachmentMask;
      pDC->FeWork.desc.clear.renderTargetArrayIndex = renderTargetArrayIndex;
-    pDC->FeWork.desc.clear.clearDepth = z;
-    pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];
-    pDC->FeWork.desc.clear.clearRTColor[1] = clearColor[1];
-    pDC->FeWork.desc.clear.clearRTColor[2] = clearColor[2];
-    pDC->FeWork.desc.clear.clearRTColor[3] = clearColor[3];
-    pDC->FeWork.desc.clear.clearStencil = stencil;
+    pDC->FeWork.desc.clear.clearDepth             = z;
+    pDC->FeWork.desc.clear.clearRTColor[0]        = clearColor[0];
+    pDC->FeWork.desc.clear.clearRTColor[1]        = clearColor[1];
+    pDC->FeWork.desc.clear.clearRTColor[2]        = clearColor[2];
+    pDC->FeWork.desc.clear.clearRTColor[3]        = clearColor[3];
+    pDC->FeWork.desc.clear.clearStencil           = stencil;
  
      // enqueue draw
      QueueDraw(pContext);
@@ -1639,16 +1632,16 @@ void SWR_API SwrClearRenderTarget(
  ///        sampler.
  ///        SWR is responsible for the allocation of the private context state.
  /// @param hContext - Handle passed back from SwrCreateContext
-VOID* SwrGetPrivateContextState(
-    HANDLE hContext)
+VOID* SwrGetPrivateContextState(HANDLE hContext)
  {
-    SWR_CONTEXT* pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
-    DRAW_STATE* pState = pDC->pState;
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
+    DRAW_STATE*   pState   = pDC->pState;
  
      if (pState->pPrivateState == nullptr)
      {
-        pState->pPrivateState = pState->pArena->AllocAligned(pContext->privateStateSize, KNOB_SIMD_WIDTH*sizeof(float));
+        pState->pPrivateState = pState->pArena->AllocAligned(pContext->privateStateSize,
+                                                             KNOB_SIMD_WIDTH * sizeof(float));
      }
  
      return pState->pPrivateState;
@@ -1662,13 +1655,10 @@ VOID* SwrGetPrivateContextState(
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param size - Size of allocation
  /// @param align - Alignment needed for allocation.
-VOID* SwrAllocDrawContextMemory(
-    HANDLE hContext,
-    uint32_t size,
-    uint32_t align)
+VOID* SwrAllocDrawContextMemory(HANDLE hContext, uint32_t size, uint32_t align)
  {
-    SWR_CONTEXT* pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      return pDC->pState->pArena->AllocAligned(size, align);
  }
@@ -1677,12 +1667,10 @@ VOID* SwrAllocDrawContextMemory(
  /// @brief Enables stats counting
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param enable - If true then counts are incremented.
-void SwrEnableStatsFE(
-    HANDLE hContext,
-    bool enable)
+void SwrEnableStatsFE(HANDLE hContext, bool enable)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      pDC->pState->state.enableStatsFE = enable;
  }
@@ -1691,12 +1679,10 @@ void SwrEnableStatsFE(
  /// @brief Enables stats counting
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param enable - If true then counts are incremented.
-void SwrEnableStatsBE(
-    HANDLE hContext,
-    bool enable)
+void SwrEnableStatsBE(HANDLE hContext, bool enable)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
  
      pDC->pState->state.enableStatsBE = enable;
  }
@@ -1704,11 +1690,10 @@ void SwrEnableStatsBE(
  //////////////////////////////////////////////////////////////////////////
  /// @brief Mark end of frame - used for performance profiling
  /// @param hContext - Handle passed back from SwrCreateContext
-void SWR_API SwrEndFrame(
-    HANDLE hContext)
+void SWR_API SwrEndFrame(HANDLE hContext)
  {
-    SWR_CONTEXT *pContext = GetContext(hContext);
-    DRAW_CONTEXT* pDC = GetDrawContext(pContext);
+    SWR_CONTEXT*  pContext = GetContext(hContext);
+    DRAW_CONTEXT* pDC      = GetDrawContext(pContext);
      (void)pDC; // var used
  
      RDTSC_ENDFRAME();
@@ -1733,55 +1718,55 @@ void SwrInit()
      InitRasterizerFunctions();
  }
  
-void SwrGetInterface(SWR_INTERFACE &out_funcs)
-{
-    out_funcs.pfnSwrCreateContext = SwrCreateContext;
-    out_funcs.pfnSwrDestroyContext = SwrDestroyContext;
-    out_funcs.pfnSwrBindApiThread = SwrBindApiThread;
-    out_funcs.pfnSwrSaveState = SwrSaveState;
-    out_funcs.pfnSwrRestoreState = SwrRestoreState;
-    out_funcs.pfnSwrSync = SwrSync;
-    out_funcs.pfnSwrStallBE = SwrStallBE;
-    out_funcs.pfnSwrWaitForIdle = SwrWaitForIdle;
-    out_funcs.pfnSwrWaitForIdleFE = SwrWaitForIdleFE;
-    out_funcs.pfnSwrSetVertexBuffers = SwrSetVertexBuffers;
-    out_funcs.pfnSwrSetIndexBuffer = SwrSetIndexBuffer;
-    out_funcs.pfnSwrSetFetchFunc = SwrSetFetchFunc;
-    out_funcs.pfnSwrSetSoFunc = SwrSetSoFunc;
-    out_funcs.pfnSwrSetSoState = SwrSetSoState;
-    out_funcs.pfnSwrSetSoBuffers = SwrSetSoBuffers;
-    out_funcs.pfnSwrSetVertexFunc = SwrSetVertexFunc;
-    out_funcs.pfnSwrSetFrontendState = SwrSetFrontendState;
-    out_funcs.pfnSwrSetGsState = SwrSetGsState;
-    out_funcs.pfnSwrSetGsFunc = SwrSetGsFunc;
-    out_funcs.pfnSwrSetCsFunc = SwrSetCsFunc;
-    out_funcs.pfnSwrSetTsState = SwrSetTsState;
-    out_funcs.pfnSwrSetHsFunc = SwrSetHsFunc;
-    out_funcs.pfnSwrSetDsFunc = SwrSetDsFunc;
-    out_funcs.pfnSwrSetDepthStencilState = SwrSetDepthStencilState;
-    out_funcs.pfnSwrSetBackendState = SwrSetBackendState;
-    out_funcs.pfnSwrSetDepthBoundsState = SwrSetDepthBoundsState;
-    out_funcs.pfnSwrSetPixelShaderState = SwrSetPixelShaderState;
-    out_funcs.pfnSwrSetBlendState = SwrSetBlendState;
-    out_funcs.pfnSwrSetBlendFunc = SwrSetBlendFunc;
-    out_funcs.pfnSwrDraw = SwrDraw;
-    out_funcs.pfnSwrDrawInstanced = SwrDrawInstanced;
-    out_funcs.pfnSwrDrawIndexed = SwrDrawIndexed;
-    out_funcs.pfnSwrDrawIndexedInstanced = SwrDrawIndexedInstanced;
-    out_funcs.pfnSwrInvalidateTiles = SwrInvalidateTiles;
-    out_funcs.pfnSwrDiscardRect = SwrDiscardRect;
-    out_funcs.pfnSwrDispatch = SwrDispatch;
-    out_funcs.pfnSwrStoreTiles = SwrStoreTiles;
-    out_funcs.pfnSwrClearRenderTarget = SwrClearRenderTarget;
-    out_funcs.pfnSwrSetRastState = SwrSetRastState;
-    out_funcs.pfnSwrSetViewports = SwrSetViewports;
-    out_funcs.pfnSwrSetScissorRects = SwrSetScissorRects;
+void SwrGetInterface(SWR_INTERFACE& out_funcs)
+{
+    out_funcs.pfnSwrCreateContext          = SwrCreateContext;
+    out_funcs.pfnSwrDestroyContext         = SwrDestroyContext;
+    out_funcs.pfnSwrBindApiThread          = SwrBindApiThread;
+    out_funcs.pfnSwrSaveState              = SwrSaveState;
+    out_funcs.pfnSwrRestoreState           = SwrRestoreState;
+    out_funcs.pfnSwrSync                   = SwrSync;
+    out_funcs.pfnSwrStallBE                = SwrStallBE;
+    out_funcs.pfnSwrWaitForIdle            = SwrWaitForIdle;
+    out_funcs.pfnSwrWaitForIdleFE          = SwrWaitForIdleFE;
+    out_funcs.pfnSwrSetVertexBuffers       = SwrSetVertexBuffers;
+    out_funcs.pfnSwrSetIndexBuffer         = SwrSetIndexBuffer;
+    out_funcs.pfnSwrSetFetchFunc           = SwrSetFetchFunc;
+    out_funcs.pfnSwrSetSoFunc              = SwrSetSoFunc;
+    out_funcs.pfnSwrSetSoState             = SwrSetSoState;
+    out_funcs.pfnSwrSetSoBuffers           = SwrSetSoBuffers;
+    out_funcs.pfnSwrSetVertexFunc          = SwrSetVertexFunc;
+    out_funcs.pfnSwrSetFrontendState       = SwrSetFrontendState;
+    out_funcs.pfnSwrSetGsState             = SwrSetGsState;
+    out_funcs.pfnSwrSetGsFunc              = SwrSetGsFunc;
+    out_funcs.pfnSwrSetCsFunc              = SwrSetCsFunc;
+    out_funcs.pfnSwrSetTsState             = SwrSetTsState;
+    out_funcs.pfnSwrSetHsFunc              = SwrSetHsFunc;
+    out_funcs.pfnSwrSetDsFunc              = SwrSetDsFunc;
+    out_funcs.pfnSwrSetDepthStencilState   = SwrSetDepthStencilState;
+    out_funcs.pfnSwrSetBackendState        = SwrSetBackendState;
+    out_funcs.pfnSwrSetDepthBoundsState    = SwrSetDepthBoundsState;
+    out_funcs.pfnSwrSetPixelShaderState    = SwrSetPixelShaderState;
+    out_funcs.pfnSwrSetBlendState          = SwrSetBlendState;
+    out_funcs.pfnSwrSetBlendFunc           = SwrSetBlendFunc;
+    out_funcs.pfnSwrDraw                   = SwrDraw;
+    out_funcs.pfnSwrDrawInstanced          = SwrDrawInstanced;
+    out_funcs.pfnSwrDrawIndexed            = SwrDrawIndexed;
+    out_funcs.pfnSwrDrawIndexedInstanced   = SwrDrawIndexedInstanced;
+    out_funcs.pfnSwrInvalidateTiles        = SwrInvalidateTiles;
+    out_funcs.pfnSwrDiscardRect            = SwrDiscardRect;
+    out_funcs.pfnSwrDispatch               = SwrDispatch;
+    out_funcs.pfnSwrStoreTiles             = SwrStoreTiles;
+    out_funcs.pfnSwrClearRenderTarget      = SwrClearRenderTarget;
+    out_funcs.pfnSwrSetRastState           = SwrSetRastState;
+    out_funcs.pfnSwrSetViewports           = SwrSetViewports;
+    out_funcs.pfnSwrSetScissorRects        = SwrSetScissorRects;
      out_funcs.pfnSwrGetPrivateContextState = SwrGetPrivateContextState;
      out_funcs.pfnSwrAllocDrawContextMemory = SwrAllocDrawContextMemory;
-    out_funcs.pfnSwrEnableStatsFE = SwrEnableStatsFE;
-    out_funcs.pfnSwrEnableStatsBE = SwrEnableStatsBE;
-    out_funcs.pfnSwrEndFrame = SwrEndFrame;
-    out_funcs.pfnSwrInit = SwrInit;
+    out_funcs.pfnSwrEnableStatsFE          = SwrEnableStatsFE;
+    out_funcs.pfnSwrEnableStatsBE          = SwrEnableStatsBE;
+    out_funcs.pfnSwrEndFrame               = SwrEndFrame;
+    out_funcs.pfnSwrInit                   = SwrInit;
      out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile;
      out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface;
      out_funcs.pfnSwrStoreHotTileClear = SwrStoreHotTileClear;
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h b/src/gallium/drivers/swr/rasterizer/core/api.h

index b171188c927ee54f8a615570c9a7166711694fdd..9cc5292e7b03c26e909fe0ec78ae3f55a238683a 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file api.h
-*
-* @brief API definitions
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file api.h
+ *
+ * @brief API definitions
+ *
+ ******************************************************************************/
  
  #ifndef __SWR_API_H__
  #define __SWR_API_H__
@@ -38,7 +38,7 @@
  #include "common/formats.h"
  #include "core/state.h"
  
-typedef void(SWR_API *PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
+typedef void(SWR_API* PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Rectangle structure
@@ -47,20 +47,15 @@ struct SWR_RECT
      int32_t xmin; ///< inclusive
      int32_t ymin; ///< inclusive
      int32_t xmax; ///< exclusive
-    int32_t ymax; ///< exclusive 
+    int32_t ymax; ///< exclusive
  
-    bool operator == (const SWR_RECT& rhs)
+    bool operator==(const SWR_RECT& rhs)
      {
-        return (this->ymin == rhs.ymin &&
-            this->ymax == rhs.ymax &&
-            this->xmin == rhs.xmin &&
-            this->xmax == rhs.xmax);
+        return (this->ymin == rhs.ymin && this->ymax == rhs.ymax && this->xmin == rhs.xmin &&
+                this->xmax == rhs.xmax);
      }
  
-    bool operator != (const SWR_RECT& rhs)
-    {
-        return !(*this == rhs);
-    }
+    bool operator!=(const SWR_RECT& rhs) { return !(*this == rhs); }
  
      SWR_RECT& Intersect(const SWR_RECT& other)
      {
@@ -69,8 +64,7 @@ struct SWR_RECT
          this->xmax = std::min(this->xmax, other.xmax);
          this->ymax = std::min(this->ymax, other.ymax);
  
-        if (xmax - xmin < 0 ||
-            ymax - ymin < 0)
+        if (xmax - xmin < 0 || ymax - ymin < 0)
          {
              // Zero area
              ymin = ymax = xmin = xmax = 0;
@@ -78,10 +72,7 @@ struct SWR_RECT
  
          return *this;
      }
-    SWR_RECT& operator &= (const SWR_RECT& other)
-    {
-        return Intersect(other);
-    }
+    SWR_RECT& operator&=(const SWR_RECT& other) { return Intersect(other); }
  
      SWR_RECT& Union(const SWR_RECT& other)
      {
@@ -93,10 +84,7 @@ struct SWR_RECT
          return *this;
      }
  
-    SWR_RECT& operator |= (const SWR_RECT& other)
-    {
-        return Union(other);
-    }
+    SWR_RECT& operator|=(const SWR_RECT& other) { return Union(other); }
  
      void Translate(int32_t x, int32_t y)
      {
@@ -115,10 +103,14 @@ struct SWR_RECT
  /// @param x - destination x coordinate
  /// @param y - destination y coordinate
  /// @param pDstHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
-    SWR_FORMAT dstFormat,
-    SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
-    uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pDstHotTile);
+typedef void(SWR_API* PFN_LOAD_TILE)(HANDLE                      hPrivateContext,
+                                     HANDLE                      hWorkerPrivateData,
+                                     SWR_FORMAT                  dstFormat,
+                                     SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+                                     uint32_t                    x,
+                                     uint32_t                    y,
+                                     uint32_t                    renderTargetArrayIndex,
+                                     uint8_t*                    pDstHotTile);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Function signature for store hot tiles
@@ -128,10 +120,14 @@ typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPriva
  /// @param x - destination x coordinate
  /// @param y - destination y coordinate
  /// @param pSrcHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
-    SWR_FORMAT srcFormat,
-    SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
-    uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile);
+typedef void(SWR_API* PFN_STORE_TILE)(HANDLE                      hPrivateContext,
+                                      HANDLE                      hWorkerPrivateData,
+                                      SWR_FORMAT                  srcFormat,
+                                      SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+                                      uint32_t                    x,
+                                      uint32_t                    y,
+                                      uint32_t                    renderTargetArrayIndex,
+                                      uint8_t*                    pSrcHotTile);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Function signature for clearing from the hot tiles clear value
@@ -141,9 +137,13 @@ typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPriv
  /// @param y - destination y coordinate
  /// @param renderTargetArrayIndex - render target array offset from arrayIndex
  /// @param pClearColor - pointer to the hot tile's clear value
-typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPrivateData,
-    SWR_RENDERTARGET_ATTACHMENT rtIndex,
-    uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, const float* pClearColor);
+typedef void(SWR_API* PFN_CLEAR_TILE)(HANDLE                      hPrivateContext,
+                                      HANDLE                      hWorkerPrivateData,
+                                      SWR_RENDERTARGET_ATTACHMENT rtIndex,
+                                      uint32_t                    x,
+                                      uint32_t                    y,
+                                      uint32_t                    renderTargetArrayIndex,
+                                      const float*                pClearColor);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Callback to allow driver to update their copy of streamout write offset.
@@ -152,15 +152,15 @@ typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE hWorkerPriv
  /// @param hPrivateContext - handle to private data
  /// @param soBufferSlot - buffer slot for write offset
  /// @param soWriteOffset - update value for so write offset.
-typedef void(SWR_API *PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE hPrivateContext,
-    uint32_t soBufferSlot, uint32_t soWriteOffset);
+typedef void(SWR_API* PFN_UPDATE_SO_WRITE_OFFSET)(HANDLE   hPrivateContext,
+                                                  uint32_t soBufferSlot,
+                                                  uint32_t soWriteOffset);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Callback to allow driver to update their copy of stats.
  /// @param hPrivateContext - handle to private data
  /// @param pStats - pointer to draw stats
-typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
-    const SWR_STATS* pStats);
+typedef void(SWR_API* PFN_UPDATE_STATS)(HANDLE hPrivateContext, const SWR_STATS* pStats);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Callback to allow driver to update their copy of FE stats.
@@ -169,8 +169,7 @@ typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
  ///       to sum up the stats across all of the workers.
  /// @param hPrivateContext - handle to private data
  /// @param pStats - pointer to draw stats
-typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext,
-    const SWR_STATS_FE* pStats);
+typedef void(SWR_API* PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext, const SWR_STATS_FE* pStats);
  
  //////////////////////////////////////////////////////////////////////////
  /// BucketManager
@@ -183,14 +182,14 @@ class BucketManager;
  /////////////////////////////////////////////////////////////////////////
  struct SWR_THREADING_INFO
  {
-    uint32_t    BASE_NUMA_NODE;
-    uint32_t    BASE_CORE;
-    uint32_t    BASE_THREAD;
-    uint32_t    MAX_WORKER_THREADS;
-    uint32_t    MAX_NUMA_NODES;
-    uint32_t    MAX_CORES_PER_NUMA_NODE;
-    uint32_t    MAX_THREADS_PER_CORE;
-    bool        SINGLE_THREADED;
+    uint32_t BASE_NUMA_NODE;
+    uint32_t BASE_CORE;
+    uint32_t BASE_THREAD;
+    uint32_t MAX_WORKER_THREADS;
+    uint32_t MAX_NUMA_NODES;
+    uint32_t MAX_CORES_PER_NUMA_NODE;
+    uint32_t MAX_THREADS_PER_CORE;
+    bool     SINGLE_THREADED;
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -206,8 +205,8 @@ struct SWR_API_THREADING_INFO
      uint32_t bindAPIThread0;        // Default is true if numAPIReservedThreads is > 0,
                                      // binds thread used in SwrCreateContext to API Reserved
                                      // thread 0
-    uint32_t numAPIThreadsPerCore;  // 0 - means use all threads per core, else clamp to this number.
-                                    // Independent of KNOB_MAX_THREADS_PER_CORE.
+    uint32_t numAPIThreadsPerCore; // 0 - means use all threads per core, else clamp to this number.
+                                   // Independent of KNOB_MAX_THREADS_PER_CORE.
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -217,13 +216,13 @@ struct SWR_API_THREADING_INFO
  /////////////////////////////////////////////////////////////////////////
  struct SWR_WORKER_PRIVATE_STATE
  {
-    typedef void (SWR_API *PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
+    typedef void(SWR_API* PFN_WORKER_DATA)(HANDLE hWorkerPrivateData, uint32_t iWorkerNum);
  
-    size_t              perWorkerPrivateStateSize;  ///< Amount of data to allocate per-worker
-    PFN_WORKER_DATA     pfnInitWorkerData;          ///< Init function for worker data.  If null
-                                                    ///< worker data will be initialized to 0.
-    PFN_WORKER_DATA     pfnFinishWorkerData;        ///< Finish / destroy function for worker data.
-                                                    ///< Can be null.
+    size_t          perWorkerPrivateStateSize; ///< Amount of data to allocate per-worker
+    PFN_WORKER_DATA pfnInitWorkerData;         ///< Init function for worker data.  If null
+                                               ///< worker data will be initialized to 0.
+    PFN_WORKER_DATA pfnFinishWorkerData;       ///< Finish / destroy function for worker data.
+                                               ///< Can be null.
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -233,198 +232,167 @@ struct SWR_CREATECONTEXT_INFO
  {
      // External functions (e.g. sampler) need per draw context state.
      // Use SwrGetPrivateContextState() to access private state.
-    size_t                      privateStateSize;
+    size_t privateStateSize;
  
      // Optional per-worker state, can be NULL for no worker-private data
-    SWR_WORKER_PRIVATE_STATE*   pWorkerPrivateState;
+    SWR_WORKER_PRIVATE_STATE* pWorkerPrivateState;
  
      // Callback functions
-    PFN_LOAD_TILE               pfnLoadTile;
-    PFN_STORE_TILE              pfnStoreTile;
-    PFN_CLEAR_TILE              pfnClearTile;
-    PFN_UPDATE_SO_WRITE_OFFSET  pfnUpdateSoWriteOffset;
-    PFN_UPDATE_STATS            pfnUpdateStats;
-    PFN_UPDATE_STATS_FE         pfnUpdateStatsFE;
+    PFN_LOAD_TILE              pfnLoadTile;
+    PFN_STORE_TILE             pfnStoreTile;
+    PFN_CLEAR_TILE             pfnClearTile;
+    PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+    PFN_UPDATE_STATS           pfnUpdateStats;
+    PFN_UPDATE_STATS_FE        pfnUpdateStatsFE;
  
  
      // Pointer to rdtsc buckets mgr returned to the caller.
      // Only populated when KNOB_ENABLE_RDTSC is set
-    BucketManager*              pBucketMgr;
+    BucketManager* pBucketMgr;
  
      // Output: size required memory passed to for SwrSaveState / SwrRestoreState
-    size_t                      contextSaveSize;
+    size_t contextSaveSize;
  
      // ArchRast event manager.
-    HANDLE                      hArEventManager;
+    HANDLE hArEventManager;
  
      // Input (optional): Threading info that overrides any set KNOB values.
-    SWR_THREADING_INFO*         pThreadInfo;
+    SWR_THREADING_INFO* pThreadInfo;
  
      // Input (optional): Info for reserving API threads
-    SWR_API_THREADING_INFO*     pApiThreadInfo;
+    SWR_API_THREADING_INFO* pApiThreadInfo;
  
      // Input: if set to non-zero value, overrides KNOB value for maximum
      // number of draws in flight
-    uint32_t                    MAX_DRAWS_IN_FLIGHT;
+    uint32_t MAX_DRAWS_IN_FLIGHT;
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Create SWR Context.
  /// @param pCreateInfo - pointer to creation info.
-SWR_FUNC(HANDLE, SwrCreateContext,
-    SWR_CREATECONTEXT_INFO* pCreateInfo);
+SWR_FUNC(HANDLE, SwrCreateContext, SWR_CREATECONTEXT_INFO* pCreateInfo);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Destroys SWR Context.
  /// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrDestroyContext,
-    HANDLE hContext);
+SWR_FUNC(void, SwrDestroyContext, HANDLE hContext);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Bind current thread to an API reserved HW thread
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param apiThreadId - index of reserved HW thread to bind to.
-SWR_FUNC(void, SwrBindApiThread,
-    HANDLE hContext,
-    uint32_t apiThreadId);
+SWR_FUNC(void, SwrBindApiThread, HANDLE hContext, uint32_t apiThreadId);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Saves API state associated with hContext
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pOutputStateBlock - Memory block to receive API state data
  /// @param memSize - Size of memory pointed to by pOutputStateBlock
-SWR_FUNC(void, SwrSaveState,
-    HANDLE hContext,
-    void* pOutputStateBlock,
-    size_t memSize);
+SWR_FUNC(void, SwrSaveState, HANDLE hContext, void* pOutputStateBlock, size_t memSize);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Restores API state to hContext previously saved with SwrSaveState
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pStateBlock - Memory block to read API state data from
  /// @param memSize - Size of memory pointed to by pStateBlock
-SWR_FUNC(void, SwrRestoreState,
-    HANDLE hContext,
-    const void* pStateBlock,
-    size_t memSize);
+SWR_FUNC(void, SwrRestoreState, HANDLE hContext, const void* pStateBlock, size_t memSize);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Sync cmd. Executes the callback func when all rendering up to this sync
  ///        has been completed
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pfnFunc - pointer to callback function,
-/// @param userData - user data to pass back 
-SWR_FUNC(void, SwrSync,
-    HANDLE hContext,
-    PFN_CALLBACK_FUNC pfnFunc,
-    uint64_t userData,
-    uint64_t userData2,
-    uint64_t userData3);
+/// @param userData - user data to pass back
+SWR_FUNC(void,
+         SwrSync,
+         HANDLE            hContext,
+         PFN_CALLBACK_FUNC pfnFunc,
+         uint64_t          userData,
+         uint64_t          userData2,
+         uint64_t          userData3);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Stall cmd. Stalls the backend until all previous work has been completed.
  ///        Frontend work can continue to make progress
  /// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrStallBE,
-    HANDLE hContext);
+SWR_FUNC(void, SwrStallBE, HANDLE hContext);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Blocks until all rendering has been completed.
  /// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrWaitForIdle,
-    HANDLE hContext);
+SWR_FUNC(void, SwrWaitForIdle, HANDLE hContext);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Blocks until all FE rendering has been completed.
  /// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrWaitForIdleFE,
-    HANDLE hContext);
+SWR_FUNC(void, SwrWaitForIdleFE, HANDLE hContext);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set vertex buffer state.
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param numBuffers - Number of vertex buffer state descriptors.
  /// @param pVertexBuffers - Array of vertex buffer state descriptors.
-SWR_FUNC(void, SwrSetVertexBuffers,
-    HANDLE hContext,
-    uint32_t numBuffers,
-    const SWR_VERTEX_BUFFER_STATE* pVertexBuffers);
+SWR_FUNC(void,
+         SwrSetVertexBuffers,
+         HANDLE                         hContext,
+         uint32_t                       numBuffers,
+         const SWR_VERTEX_BUFFER_STATE* pVertexBuffers);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set index buffer
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pIndexBuffer - Index buffer.
-SWR_FUNC(void, SwrSetIndexBuffer,
-    HANDLE hContext,
-    const SWR_INDEX_BUFFER_STATE* pIndexBuffer);
+SWR_FUNC(void, SwrSetIndexBuffer, HANDLE hContext, const SWR_INDEX_BUFFER_STATE* pIndexBuffer);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set fetch shader pointer.
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pfnFetchFunc - Pointer to shader.
-SWR_FUNC(void, SwrSetFetchFunc,
-    HANDLE hContext,
-    PFN_FETCH_FUNC    pfnFetchFunc);
+SWR_FUNC(void, SwrSetFetchFunc, HANDLE hContext, PFN_FETCH_FUNC pfnFetchFunc);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set streamout shader pointer.
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pfnSoFunc - Pointer to shader.
  /// @param streamIndex - specifies stream
-SWR_FUNC(void, SwrSetSoFunc,
-    HANDLE hContext,
-    PFN_SO_FUNC    pfnSoFunc,
-    uint32_t streamIndex);
+SWR_FUNC(void, SwrSetSoFunc, HANDLE hContext, PFN_SO_FUNC pfnSoFunc, uint32_t streamIndex);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set streamout state
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pSoState - Pointer to streamout state.
-SWR_FUNC(void, SwrSetSoState,
-    HANDLE hContext,
-    SWR_STREAMOUT_STATE* pSoState);
+SWR_FUNC(void, SwrSetSoState, HANDLE hContext, SWR_STREAMOUT_STATE* pSoState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set streamout buffer state
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pSoBuffer - Pointer to streamout buffer.
  /// @param slot - Slot to bind SO buffer to.
-SWR_FUNC(void, SwrSetSoBuffers,
-    HANDLE hContext,
-    SWR_STREAMOUT_BUFFER* pSoBuffer,
-    uint32_t slot);
+SWR_FUNC(void, SwrSetSoBuffers, HANDLE hContext, SWR_STREAMOUT_BUFFER* pSoBuffer, uint32_t slot);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set vertex shader pointer.
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pfnVertexFunc - Pointer to shader.
-SWR_FUNC(void, SwrSetVertexFunc,
-    HANDLE hContext,
-    PFN_VERTEX_FUNC pfnVertexFunc);
+SWR_FUNC(void, SwrSetVertexFunc, HANDLE hContext, PFN_VERTEX_FUNC pfnVertexFunc);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set frontend state.
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetFrontendState,
-    HANDLE hContext,
-    SWR_FRONTEND_STATE *pState);
+SWR_FUNC(void, SwrSetFrontendState, HANDLE hContext, SWR_FRONTEND_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set geometry shader state.
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetGsState,
-    HANDLE hContext,
-    SWR_GS_STATE *pState);
+SWR_FUNC(void, SwrSetGsState, HANDLE hContext, SWR_GS_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set geometry shader
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to geometry shader function
-SWR_FUNC(void, SwrSetGsFunc,
-    HANDLE hContext,
-    PFN_GS_FUNC pfnGsFunc);
+SWR_FUNC(void, SwrSetGsFunc, HANDLE hContext, PFN_GS_FUNC pfnGsFunc);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set compute shader
@@ -434,88 +402,70 @@ SWR_FUNC(void, SwrSetGsFunc,
  /// @param totalSpillFillSize - size in bytes needed for spill/fill.
  /// @param scratchSpaceSizePerInstance - size of the scratch space needed per simd instance
  /// @param numInstances - number of simd instances that are run per execution of the shader
-SWR_FUNC(void, SwrSetCsFunc,
-    HANDLE hContext,
-    PFN_CS_FUNC pfnCsFunc,
-    uint32_t totalThreadsInGroup,
-    uint32_t totalSpillFillSize,
-    uint32_t scratchSpaceSizePerInstance,
-    uint32_t numInstances
-    );
+SWR_FUNC(void,
+         SwrSetCsFunc,
+         HANDLE      hContext,
+         PFN_CS_FUNC pfnCsFunc,
+         uint32_t    totalThreadsInGroup,
+         uint32_t    totalSpillFillSize,
+         uint32_t    scratchSpaceSizePerInstance,
+         uint32_t    numInstances);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set tessellation state.
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state
-SWR_FUNC(void, SwrSetTsState,
-    HANDLE hContext,
-    SWR_TS_STATE *pState);
+SWR_FUNC(void, SwrSetTsState, HANDLE hContext, SWR_TS_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set hull shader
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pfnFunc - Pointer to shader function
-SWR_FUNC(void, SwrSetHsFunc,
-    HANDLE hContext,
-    PFN_HS_FUNC pfnFunc);
+SWR_FUNC(void, SwrSetHsFunc, HANDLE hContext, PFN_HS_FUNC pfnFunc);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set domain shader
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pfnFunc - Pointer to shader function
-SWR_FUNC(void, SwrSetDsFunc,
-    HANDLE hContext,
-    PFN_DS_FUNC pfnFunc);
+SWR_FUNC(void, SwrSetDsFunc, HANDLE hContext, PFN_DS_FUNC pfnFunc);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set depth stencil state
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetDepthStencilState,
-    HANDLE hContext,
-    SWR_DEPTH_STENCIL_STATE *pState);
+SWR_FUNC(void, SwrSetDepthStencilState, HANDLE hContext, SWR_DEPTH_STENCIL_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set backend state
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetBackendState,
-    HANDLE hContext,
-    SWR_BACKEND_STATE *pState);
+SWR_FUNC(void, SwrSetBackendState, HANDLE hContext, SWR_BACKEND_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set depth bounds state
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetDepthBoundsState,
-    HANDLE hContext,
-    SWR_DEPTH_BOUNDS_STATE *pState);
+SWR_FUNC(void, SwrSetDepthBoundsState, HANDLE hContext, SWR_DEPTH_BOUNDS_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set pixel shader state
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetPixelShaderState,
-    HANDLE hContext,
-    SWR_PS_STATE *pState);
+SWR_FUNC(void, SwrSetPixelShaderState, HANDLE hContext, SWR_PS_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set blend state
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pState - Pointer to state.
-SWR_FUNC(void, SwrSetBlendState,
-    HANDLE hContext,
-    SWR_BLEND_STATE *pState);
+SWR_FUNC(void, SwrSetBlendState, HANDLE hContext, SWR_BLEND_STATE* pState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Set blend function
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param renderTarget - render target index
  /// @param pfnBlendFunc - function pointer
-SWR_FUNC(void, SwrSetBlendFunc,
-    HANDLE hContext,
-    uint32_t renderTarget,
-    PFN_BLEND_JIT_FUNC pfnBlendFunc);
+SWR_FUNC(
+    void, SwrSetBlendFunc, HANDLE hContext, uint32_t renderTarget, PFN_BLEND_JIT_FUNC pfnBlendFunc);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrDraw
@@ -523,11 +473,12 @@ SWR_FUNC(void, SwrSetBlendFunc,
  /// @param topology - Specifies topology for draw.
  /// @param startVertex - Specifies start vertex in vertex buffer for draw.
  /// @param primCount - Number of vertices.
-SWR_FUNC(void, SwrDraw,
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t startVertex,
-    uint32_t primCount);
+SWR_FUNC(void,
+         SwrDraw,
+         HANDLE             hContext,
+         PRIMITIVE_TOPOLOGY topology,
+         uint32_t           startVertex,
+         uint32_t           primCount);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrDrawInstanced
@@ -536,14 +487,16 @@ SWR_FUNC(void, SwrDraw,
  /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data.
  /// @param numInstances - How many instances to render.
  /// @param startVertex - Specifies start vertex for draw. (vertex data)
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-SWR_FUNC(void, SwrDrawInstanced,
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numVertsPerInstance,
-    uint32_t numInstances,
-    uint32_t startVertex,
-    uint32_t startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+SWR_FUNC(void,
+         SwrDrawInstanced,
+         HANDLE             hContext,
+         PRIMITIVE_TOPOLOGY topology,
+         uint32_t           numVertsPerInstance,
+         uint32_t           numInstances,
+         uint32_t           startVertex,
+         uint32_t           startInstance);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief DrawIndexed
@@ -552,12 +505,13 @@ SWR_FUNC(void, SwrDrawInstanced,
  /// @param numIndices - Number of indices to read sequentially from index buffer.
  /// @param indexOffset - Starting index into index buffer.
  /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-SWR_FUNC(void, SwrDrawIndexed,
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numIndices,
-    uint32_t indexOffset,
-    int32_t baseVertex);
+SWR_FUNC(void,
+         SwrDrawIndexed,
+         HANDLE             hContext,
+         PRIMITIVE_TOPOLOGY topology,
+         uint32_t           numIndices,
+         uint32_t           indexOffset,
+         int32_t            baseVertex);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrDrawIndexedInstanced
@@ -567,26 +521,30 @@ SWR_FUNC(void, SwrDrawIndexed,
  /// @param numInstances - Number of instances to render.
  /// @param indexOffset - Starting index into index buffer.
  /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed.
-/// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data)
-SWR_FUNC(void, SwrDrawIndexedInstanced,
-    HANDLE hContext,
-    PRIMITIVE_TOPOLOGY topology,
-    uint32_t numIndices,
-    uint32_t numInstances,
-    uint32_t indexOffset,
-    int32_t baseVertex,
-    uint32_t startInstance);
+/// @param startInstance - Which instance to start sequentially fetching from in each buffer
+/// (instanced data)
+SWR_FUNC(void,
+         SwrDrawIndexedInstanced,
+         HANDLE             hContext,
+         PRIMITIVE_TOPOLOGY topology,
+         uint32_t           numIndices,
+         uint32_t           numInstances,
+         uint32_t           indexOffset,
+         int32_t            baseVertex,
+         uint32_t           startInstance);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrInvalidateTiles
  /// @param hContext - Handle passed back from SwrCreateContext
-/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
+/// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to
+/// invalidate.
  /// @param invalidateRect - The pixel-coordinate rectangle to invalidate.  This will be expanded to
  ///                         be hottile size-aligned.
-SWR_FUNC(void, SwrInvalidateTiles,
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    const SWR_RECT& invalidateRect);
+SWR_FUNC(void,
+         SwrInvalidateTiles,
+         HANDLE          hContext,
+         uint32_t        attachmentMask,
+         const SWR_RECT& invalidateRect);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrDiscardRect
@@ -594,10 +552,7 @@ SWR_FUNC(void, SwrInvalidateTiles,
  /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
  /// @param rect - The pixel-coordinate rectangle to discard.  Only fully-covered hottiles will be
  ///               discarded.
-SWR_FUNC(void, SwrDiscardRect,
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    const SWR_RECT& rect);
+SWR_FUNC(void, SwrDiscardRect, HANDLE hContext, uint32_t attachmentMask, const SWR_RECT& rect);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrDispatch
@@ -605,27 +560,29 @@ SWR_FUNC(void, SwrDiscardRect,
  /// @param threadGroupCountX - Number of thread groups dispatched in X direction
  /// @param threadGroupCountY - Number of thread groups dispatched in Y direction
  /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction
-SWR_FUNC(void, SwrDispatch,
-    HANDLE hContext,
-    uint32_t threadGroupCountX,
-    uint32_t threadGroupCountY,
-    uint32_t threadGroupCountZ);
-
+SWR_FUNC(void,
+         SwrDispatch,
+         HANDLE   hContext,
+         uint32_t threadGroupCountX,
+         uint32_t threadGroupCountY,
+         uint32_t threadGroupCountZ);
  
  enum SWR_TILE_STATE
  {
-    SWR_TILE_INVALID    = 0,    // tile is in unitialized state and should be loaded with surface contents before rendering
-    SWR_TILE_DIRTY      = 2,    // tile contains newer data than surface it represents
-    SWR_TILE_RESOLVED   = 3,    // is in sync with surface it represents
+    SWR_TILE_INVALID = 0, // tile is in unitialized state and should be loaded with surface contents
+                          // before rendering
+    SWR_TILE_DIRTY    = 2, // tile contains newer data than surface it represents
+    SWR_TILE_RESOLVED = 3, // is in sync with surface it represents
  };
  
-/// @todo Add a good description for what attachments are and when and why you would use the different SWR_TILE_STATEs.
-SWR_FUNC(void, SwrStoreTiles,
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    SWR_TILE_STATE postStoreTileState,
-    const SWR_RECT& storeRect);
-
+/// @todo Add a good description for what attachments are and when and why you would use the
+/// different SWR_TILE_STATEs.
+SWR_FUNC(void,
+         SwrStoreTiles,
+         HANDLE          hContext,
+         uint32_t        attachmentMask,
+         SWR_TILE_STATE  postStoreTileState,
+         const SWR_RECT& storeRect);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
@@ -636,22 +593,21 @@ SWR_FUNC(void, SwrStoreTiles,
  /// @param z - depth value use for clearing depth buffer
  /// @param stencil - stencil value used for clearing stencil buffer
  /// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
-SWR_FUNC(void, SwrClearRenderTarget,
-    HANDLE hContext,
-    uint32_t attachmentMask,
-    uint32_t renderTargetArrayIndex,
-    const float clearColor[4],
-    float z,
-    uint8_t stencil,
-    const SWR_RECT& clearRect);
+SWR_FUNC(void,
+         SwrClearRenderTarget,
+         HANDLE          hContext,
+         uint32_t        attachmentMask,
+         uint32_t        renderTargetArrayIndex,
+         const float     clearColor[4],
+         float           z,
+         uint8_t         stencil,
+         const SWR_RECT& clearRect);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrSetRastState
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param pRastState - New SWR_RASTSTATE used for SwrDraw* commands
-SWR_FUNC(void, SwrSetRastState,
-    HANDLE hContext,
-    const SWR_RASTSTATE *pRastState);
+SWR_FUNC(void, SwrSetRastState, HANDLE hContext, const SWR_RASTSTATE* pRastState);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrSetViewports
@@ -659,21 +615,20 @@ SWR_FUNC(void, SwrSetRastState,
  /// @param numViewports - number of viewports passed in
  /// @param pViewports - Specifies extents of viewport.
  /// @param pMatrices - If not specified then SWR computes a default one.
-SWR_FUNC(void, SwrSetViewports,
-    HANDLE hContext,
-    uint32_t numViewports,
-    const SWR_VIEWPORT* pViewports,
-    const SWR_VIEWPORT_MATRICES* pMatrices);
+SWR_FUNC(void,
+         SwrSetViewports,
+         HANDLE                       hContext,
+         uint32_t                     numViewports,
+         const SWR_VIEWPORT*          pViewports,
+         const SWR_VIEWPORT_MATRICES* pMatrices);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief SwrSetScissorRects
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param numScissors - number of scissors passed in
  /// @param pScissors - array of scissors
-SWR_FUNC(void, SwrSetScissorRects,
-    HANDLE hContext,
-    uint32_t numScissors,
-    const SWR_RECT* pScissors);
+SWR_FUNC(
+    void, SwrSetScissorRects, HANDLE hContext, uint32_t numScissors, const SWR_RECT* pScissors);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Returns a pointer to the private context state for the current
@@ -683,8 +638,7 @@ SWR_FUNC(void, SwrSetScissorRects,
  /// @note  Client needs to resend private state prior to each draw call.
  ///        Also, SWR is responsible for the private state memory.
  /// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void*, SwrGetPrivateContextState,
-    HANDLE hContext);
+SWR_FUNC(void*, SwrGetPrivateContextState, HANDLE hContext);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Clients can use this to allocate memory for draw/dispatch
@@ -694,32 +648,24 @@ SWR_FUNC(void*, SwrGetPrivateContextState,
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param size - Size of allocation
  /// @param align - Alignment needed for allocation.
-SWR_FUNC(void*, SwrAllocDrawContextMemory,
-    HANDLE hContext,
-    uint32_t size,
-    uint32_t align);
+SWR_FUNC(void*, SwrAllocDrawContextMemory, HANDLE hContext, uint32_t size, uint32_t align);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Enables stats counting
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param enable - If true then counts are incremented.
-SWR_FUNC(void, SwrEnableStatsFE,
-    HANDLE hContext,
-    bool enable);
+SWR_FUNC(void, SwrEnableStatsFE, HANDLE hContext, bool enable);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Enables stats counting
  /// @param hContext - Handle passed back from SwrCreateContext
  /// @param enable - If true then counts are incremented.
-SWR_FUNC(void, SwrEnableStatsBE,
-    HANDLE hContext,
-    bool enable);
+SWR_FUNC(void, SwrEnableStatsBE, HANDLE hContext, bool enable);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Mark end of frame - used for performance profiling
  /// @param hContext - Handle passed back from SwrCreateContext
-SWR_FUNC(void, SwrEndFrame,
-    HANDLE hContext);
+SWR_FUNC(void, SwrEndFrame, HANDLE hContext);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Initialize swr backend and memory internal tables
@@ -733,13 +679,16 @@ SWR_FUNC(void, SwrInit);
  /// @param renderTargetIndex - Index to src render target
  /// @param x, y - Coordinates to raster tile.
  /// @param pDstHotTile - Pointer to Hot Tile
-SWR_FUNC(void, SwrLoadHotTile,
-    HANDLE hWorkerPrivateData,
-    const SWR_SURFACE_STATE *pSrcSurface,
-    SWR_FORMAT dstFormat,
-    SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
-    uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
-    uint8_t *pDstHotTile);
+SWR_FUNC(void,
+         SwrLoadHotTile,
+         HANDLE                      hWorkerPrivateData,
+         const SWR_SURFACE_STATE*    pSrcSurface,
+         SWR_FORMAT                  dstFormat,
+         SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+         uint32_t                    x,
+         uint32_t                    y,
+         uint32_t                    renderTargetArrayIndex,
+         uint8_t*                    pDstHotTile);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Deswizzles and stores a full hottile to a render surface
@@ -748,13 +697,16 @@ SWR_FUNC(void, SwrLoadHotTile,
  /// @param renderTargetIndex - Index to destination render target
  /// @param x, y - Coordinates to raster tile.
  /// @param pSrcHotTile - Pointer to Hot Tile
-SWR_FUNC(void, SwrStoreHotTileToSurface,
-    HANDLE hWorkerPrivateData,
-    SWR_SURFACE_STATE *pDstSurface,
-    SWR_FORMAT srcFormat,
-    SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
-    uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex,
-    uint8_t *pSrcHotTile);
+SWR_FUNC(void,
+         SwrStoreHotTileToSurface,
+         HANDLE                      hWorkerPrivateData,
+         SWR_SURFACE_STATE*          pDstSurface,
+         SWR_FORMAT                  srcFormat,
+         SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
+         uint32_t                    x,
+         uint32_t                    y,
+         uint32_t                    renderTargetArrayIndex,
+         uint8_t*                    pSrcHotTile);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Writes clear color to every pixel of a render surface
@@ -762,72 +714,73 @@ SWR_FUNC(void, SwrStoreHotTileToSurface,
  /// @param renderTargetIndex - Index to destination render target
  /// @param x, y - Coordinates to raster tile.
  /// @param pClearColor - Pointer to clear color
-SWR_FUNC(void, SwrStoreHotTileClear,
-         HANDLE hWorkerPrivateData,
-         SWR_SURFACE_STATE *pDstSurface,
+SWR_FUNC(void,
+         SwrStoreHotTileClear,
+         HANDLE                      hWorkerPrivateData,
+         SWR_SURFACE_STATE*          pDstSurface,
           SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
-         uint32_t x,
-         uint32_t y,
-         uint32_t renderTargetArrayIndex,
-         const float* pClearColor);
+         uint32_t                    x,
+         uint32_t                    y,
+         uint32_t                    renderTargetArrayIndex,
+         const float*                pClearColor);
  
  struct SWR_INTERFACE
  {
-    PFNSwrCreateContext pfnSwrCreateContext;
-    PFNSwrDestroyContext pfnSwrDestroyContext;
-    PFNSwrBindApiThread pfnSwrBindApiThread;
-    PFNSwrSaveState pfnSwrSaveState;
-    PFNSwrRestoreState pfnSwrRestoreState;
-    PFNSwrSync pfnSwrSync;
-    PFNSwrStallBE pfnSwrStallBE;
-    PFNSwrWaitForIdle pfnSwrWaitForIdle;
-    PFNSwrWaitForIdleFE pfnSwrWaitForIdleFE;
-    PFNSwrSetVertexBuffers pfnSwrSetVertexBuffers;
-    PFNSwrSetIndexBuffer pfnSwrSetIndexBuffer;
-    PFNSwrSetFetchFunc pfnSwrSetFetchFunc;
-    PFNSwrSetSoFunc pfnSwrSetSoFunc;
-    PFNSwrSetSoState pfnSwrSetSoState;
-    PFNSwrSetSoBuffers pfnSwrSetSoBuffers;
-    PFNSwrSetVertexFunc pfnSwrSetVertexFunc;
-    PFNSwrSetFrontendState pfnSwrSetFrontendState;
-    PFNSwrSetGsState pfnSwrSetGsState;
-    PFNSwrSetGsFunc pfnSwrSetGsFunc;
-    PFNSwrSetCsFunc pfnSwrSetCsFunc;
-    PFNSwrSetTsState pfnSwrSetTsState;
-    PFNSwrSetHsFunc pfnSwrSetHsFunc;
-    PFNSwrSetDsFunc pfnSwrSetDsFunc;
-    PFNSwrSetDepthStencilState pfnSwrSetDepthStencilState;
-    PFNSwrSetBackendState pfnSwrSetBackendState;
-    PFNSwrSetDepthBoundsState pfnSwrSetDepthBoundsState;
-    PFNSwrSetPixelShaderState pfnSwrSetPixelShaderState;
-    PFNSwrSetBlendState pfnSwrSetBlendState;
-    PFNSwrSetBlendFunc pfnSwrSetBlendFunc;
-    PFNSwrDraw pfnSwrDraw;
-    PFNSwrDrawInstanced pfnSwrDrawInstanced;
-    PFNSwrDrawIndexed pfnSwrDrawIndexed;
-    PFNSwrDrawIndexedInstanced pfnSwrDrawIndexedInstanced;
-    PFNSwrInvalidateTiles pfnSwrInvalidateTiles;
-    PFNSwrDiscardRect pfnSwrDiscardRect;
-    PFNSwrDispatch pfnSwrDispatch;
-    PFNSwrStoreTiles pfnSwrStoreTiles;
-    PFNSwrClearRenderTarget pfnSwrClearRenderTarget;
-    PFNSwrSetRastState pfnSwrSetRastState;
-    PFNSwrSetViewports pfnSwrSetViewports;
-    PFNSwrSetScissorRects pfnSwrSetScissorRects;
+    PFNSwrCreateContext          pfnSwrCreateContext;
+    PFNSwrDestroyContext         pfnSwrDestroyContext;
+    PFNSwrBindApiThread          pfnSwrBindApiThread;
+    PFNSwrSaveState              pfnSwrSaveState;
+    PFNSwrRestoreState           pfnSwrRestoreState;
+    PFNSwrSync                   pfnSwrSync;
+    PFNSwrStallBE                pfnSwrStallBE;
+    PFNSwrWaitForIdle            pfnSwrWaitForIdle;
+    PFNSwrWaitForIdleFE          pfnSwrWaitForIdleFE;
+    PFNSwrSetVertexBuffers       pfnSwrSetVertexBuffers;
+    PFNSwrSetIndexBuffer         pfnSwrSetIndexBuffer;
+    PFNSwrSetFetchFunc           pfnSwrSetFetchFunc;
+    PFNSwrSetSoFunc              pfnSwrSetSoFunc;
+    PFNSwrSetSoState             pfnSwrSetSoState;
+    PFNSwrSetSoBuffers           pfnSwrSetSoBuffers;
+    PFNSwrSetVertexFunc          pfnSwrSetVertexFunc;
+    PFNSwrSetFrontendState       pfnSwrSetFrontendState;
+    PFNSwrSetGsState             pfnSwrSetGsState;
+    PFNSwrSetGsFunc              pfnSwrSetGsFunc;
+    PFNSwrSetCsFunc              pfnSwrSetCsFunc;
+    PFNSwrSetTsState             pfnSwrSetTsState;
+    PFNSwrSetHsFunc              pfnSwrSetHsFunc;
+    PFNSwrSetDsFunc              pfnSwrSetDsFunc;
+    PFNSwrSetDepthStencilState   pfnSwrSetDepthStencilState;
+    PFNSwrSetBackendState        pfnSwrSetBackendState;
+    PFNSwrSetDepthBoundsState    pfnSwrSetDepthBoundsState;
+    PFNSwrSetPixelShaderState    pfnSwrSetPixelShaderState;
+    PFNSwrSetBlendState          pfnSwrSetBlendState;
+    PFNSwrSetBlendFunc           pfnSwrSetBlendFunc;
+    PFNSwrDraw                   pfnSwrDraw;
+    PFNSwrDrawInstanced          pfnSwrDrawInstanced;
+    PFNSwrDrawIndexed            pfnSwrDrawIndexed;
+    PFNSwrDrawIndexedInstanced   pfnSwrDrawIndexedInstanced;
+    PFNSwrInvalidateTiles        pfnSwrInvalidateTiles;
+    PFNSwrDiscardRect            pfnSwrDiscardRect;
+    PFNSwrDispatch               pfnSwrDispatch;
+    PFNSwrStoreTiles             pfnSwrStoreTiles;
+    PFNSwrClearRenderTarget      pfnSwrClearRenderTarget;
+    PFNSwrSetRastState           pfnSwrSetRastState;
+    PFNSwrSetViewports           pfnSwrSetViewports;
+    PFNSwrSetScissorRects        pfnSwrSetScissorRects;
      PFNSwrGetPrivateContextState pfnSwrGetPrivateContextState;
      PFNSwrAllocDrawContextMemory pfnSwrAllocDrawContextMemory;
-    PFNSwrEnableStatsFE pfnSwrEnableStatsFE;
-    PFNSwrEnableStatsBE pfnSwrEnableStatsBE;
-    PFNSwrEndFrame pfnSwrEndFrame;
-    PFNSwrInit pfnSwrInit;
-    PFNSwrLoadHotTile pfnSwrLoadHotTile;
+    PFNSwrEnableStatsFE          pfnSwrEnableStatsFE;
+    PFNSwrEnableStatsBE          pfnSwrEnableStatsBE;
+    PFNSwrEndFrame               pfnSwrEndFrame;
+    PFNSwrInit                   pfnSwrInit;
+    PFNSwrLoadHotTile           pfnSwrLoadHotTile;
      PFNSwrStoreHotTileToSurface pfnSwrStoreHotTileToSurface;
-    PFNSwrStoreHotTileClear pfnSwrStoreHotTileClear;
+    PFNSwrStoreHotTileClear     pfnSwrStoreHotTileClear;
  };
  
  extern "C" {
-typedef void (SWR_API * PFNSwrGetInterface)(SWR_INTERFACE &out_funcs);
-SWR_VISIBLE void SWR_API SwrGetInterface(SWR_INTERFACE &out_funcs);
+typedef void(SWR_API* PFNSwrGetInterface)(SWR_INTERFACE& out_funcs);
+SWR_VISIBLE void SWR_API SwrGetInterface(SWR_INTERFACE& out_funcs);
  }
  
  #endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/arena.h b/src/gallium/drivers/swr/rasterizer/core/arena.h

index 1db09726cb7e3a11174c4a9f0ec383486046c156..a3cfdb47818fe0528291d1dd5738e8cd12c32a9f 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/arena.h
+++ b/src/gallium/drivers/swr/rasterizer/core/arena.h
@@ -1,35 +1,35 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file arena.h
-*
-* @brief Arena memory manager
-*        The arena is convenient and fast for managing allocations for any of
-*        our allocations that are associated with operations and can all be freed
-*        once when their operation has completed. Allocations are cheap since
-*        most of the time its simply an increment of an offset. Also, no need to
-*        free individual allocations. All of the arena memory can be freed at once.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file arena.h
+ *
+ * @brief Arena memory manager
+ *        The arena is convenient and fast for managing allocations for any of
+ *        our allocations that are associated with operations and can all be freed
+ *        once when their operation has completed. Allocations are cheap since
+ *        most of the time its simply an increment of an offset. Also, no need to
+ *        free individual allocations. All of the arena memory can be freed at once.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include <mutex>
@@ -42,10 +42,9 @@ static const size_t ARENA_BLOCK_ALIGN = 64;
  struct ArenaBlock
  {
      size_t      blockSize = 0;
-    ArenaBlock* pNext = nullptr;
+    ArenaBlock* pNext     = nullptr;
  };
-static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN,
-    "Increase BLOCK_ALIGN size");
+static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
  
  class DefaultAllocator
  {
@@ -55,7 +54,7 @@ public:
          SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
  
          ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
-        p->blockSize = size;
+        p->blockSize  = size;
          return p;
      }
  
@@ -70,7 +69,7 @@ public:
  };
  
  // Caching Allocator for Arena
-template<uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
+template <uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
  struct CachingAllocatorT : DefaultAllocator
  {
      ArenaBlock* AllocateAligned(size_t size, size_t align)
@@ -83,8 +82,8 @@ struct CachingAllocatorT : DefaultAllocator
          {
              // search cached blocks
              std::lock_guard<std::mutex> l(m_mutex);
-            ArenaBlock* pPrevBlock = &m_cachedBlocks[bucket];
-            ArenaBlock* pBlock = SearchBlocks(pPrevBlock, size, align);
+            ArenaBlock*                 pPrevBlock = &m_cachedBlocks[bucket];
+            ArenaBlock*                 pBlock     = SearchBlocks(pPrevBlock, size, align);
  
              if (pBlock)
              {
@@ -97,7 +96,7 @@ struct CachingAllocatorT : DefaultAllocator
              else
              {
                  pPrevBlock = &m_oldCachedBlocks[bucket];
-                pBlock = SearchBlocks(pPrevBlock, size, align);
+                pBlock     = SearchBlocks(pPrevBlock, size, align);
  
                  if (pBlock)
                  {
@@ -113,7 +112,7 @@ struct CachingAllocatorT : DefaultAllocator
              {
                  SWR_ASSUME_ASSERT(pPrevBlock && pPrevBlock->pNext == pBlock);
                  pPrevBlock->pNext = pBlock->pNext;
-                pBlock->pNext = nullptr;
+                pBlock->pNext     = nullptr;
  
                  return pBlock;
              }
@@ -150,7 +149,10 @@ struct CachingAllocatorT : DefaultAllocator
  
      void FreeOldBlocks()
      {
-        if (!m_cachedSize) { return; }
+        if (!m_cachedSize)
+        {
+            return;
+        }
          std::lock_guard<std::mutex> l(m_mutex);
  
          bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
@@ -169,7 +171,7 @@ struct CachingAllocatorT : DefaultAllocator
                      pBlock = pNext;
                  }
                  m_oldCachedBlocks[i].pNext = nullptr;
-                m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
+                m_pOldLastCachedBlocks[i]  = &m_oldCachedBlocks[i];
              }
  
              if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
@@ -179,8 +181,8 @@ struct CachingAllocatorT : DefaultAllocator
                      // We know that all blocks are the same size.
                      // Just move the list over.
                      m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
-                    m_oldCachedBlocks[i].pNext = m_cachedBlocks[i].pNext;
-                    m_cachedBlocks[i].pNext = nullptr;
+                    m_oldCachedBlocks[i].pNext    = m_cachedBlocks[i].pNext;
+                    m_cachedBlocks[i].pNext       = nullptr;
                      if (m_pOldLastCachedBlocks[i]->pNext)
                      {
                          m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
@@ -195,13 +197,13 @@ struct CachingAllocatorT : DefaultAllocator
                      while (pBlock)
                      {
                          ArenaBlock* pNext = pBlock->pNext;
-                        pBlock->pNext = nullptr;
+                        pBlock->pNext     = nullptr;
                          m_cachedSize -= pBlock->blockSize;
                          InsertCachedBlock<true>(i, pBlock);
                          pBlock = pNext;
                      }
  
-                    m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
+                    m_pLastCachedBlocks[i]  = &m_cachedBlocks[i];
                      m_cachedBlocks[i].pNext = nullptr;
                  }
              }
@@ -215,7 +217,7 @@ struct CachingAllocatorT : DefaultAllocator
      {
          for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
          {
-            m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
+            m_pLastCachedBlocks[i]    = &m_cachedBlocks[i];
              m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
          }
      }
@@ -260,7 +262,8 @@ private:
      {
          SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
  
-        ArenaBlock* pPrevBlock = OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
+        ArenaBlock* pPrevBlock =
+            OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
          ArenaBlock* pBlock = pPrevBlock->pNext;
  
          while (pBlock)
@@ -271,13 +274,13 @@ private:
                  break;
              }
              pPrevBlock = pBlock;
-            pBlock = pBlock->pNext;
+            pBlock     = pBlock->pNext;
          }
  
          // Insert into list
          SWR_ASSUME_ASSERT(pPrevBlock);
          pPrevBlock->pNext = pNewBlock;
-        pNewBlock->pNext = pBlock;
+        pNewBlock->pNext  = pBlock;
  
          if (OldBlockT)
          {
@@ -301,9 +304,9 @@ private:
  
      static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
      {
-        ArenaBlock* pBlock = pPrevBlock->pNext;
+        ArenaBlock* pBlock          = pPrevBlock->pNext;
          ArenaBlock* pPotentialBlock = nullptr;
-        ArenaBlock* pPotentialPrev = nullptr;
+        ArenaBlock* pPotentialPrev  = nullptr;
  
          while (pBlock)
          {
@@ -320,26 +323,26 @@ private:
                      // We could use this as it is larger than we wanted, but
                      // continue to search for a better match
                      pPotentialBlock = pBlock;
-                    pPotentialPrev = pPrevBlock;
+                    pPotentialPrev  = pPrevBlock;
                  }
              }
              else
              {
                  // Blocks are sorted by size (biggest first)
-                // So, if we get here, there are no blocks 
+                // So, if we get here, there are no blocks
                  // large enough, fall through to allocation.
                  pBlock = nullptr;
                  break;
              }
  
              pPrevBlock = pBlock;
-            pBlock = pBlock->pNext;
+            pBlock     = pBlock->pNext;
          }
  
          if (!pBlock)
          {
              // Couldn't find an exact match, use next biggest size
-            pBlock = pPotentialBlock;
+            pBlock     = pPotentialBlock;
              pPrevBlock = pPotentialPrev;
          }
  
@@ -347,35 +350,32 @@ private:
      }
  
      // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
-    static const uint32_t   CACHE_NUM_BUCKETS       = NumBucketsT;
-    static const uint32_t   CACHE_START_BUCKET_BIT  = StartBucketBitT;
-    static const size_t     MAX_UNUSED_SIZE         = sizeof(MEGABYTE);
+    static const uint32_t CACHE_NUM_BUCKETS      = NumBucketsT;
+    static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
+    static const size_t   MAX_UNUSED_SIZE        = sizeof(MEGABYTE);
  
-    ArenaBlock              m_cachedBlocks[CACHE_NUM_BUCKETS];
-    ArenaBlock*             m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
-    ArenaBlock              m_oldCachedBlocks[CACHE_NUM_BUCKETS];
-    ArenaBlock*             m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
-    std::mutex              m_mutex;
+    ArenaBlock  m_cachedBlocks[CACHE_NUM_BUCKETS];
+    ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
+    ArenaBlock  m_oldCachedBlocks[CACHE_NUM_BUCKETS];
+    ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
+    std::mutex  m_mutex;
  
-    size_t                  m_totalAllocated = 0;
+    size_t m_totalAllocated = 0;
  
-    size_t                  m_cachedSize = 0;
-    size_t                  m_oldCachedSize = 0;
+    size_t m_cachedSize    = 0;
+    size_t m_oldCachedSize = 0;
  };
  typedef CachingAllocatorT<> CachingAllocator;
  
-template<typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
+template <typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
  class TArena
  {
  public:
-    TArena(T& in_allocator)  : m_allocator(in_allocator) {}
-    TArena()                 : m_allocator(m_defAllocator) {}
-    ~TArena()
-    {
-        Reset(true);
-    }
+    TArena(T& in_allocator) : m_allocator(in_allocator) {}
+    TArena() : m_allocator(m_defAllocator) {}
+    ~TArena() { Reset(true); }
  
-    void* AllocAligned(size_t size, size_t  align)
+    void* AllocAligned(size_t size, size_t align)
      {
          if (0 == size)
          {
@@ -387,12 +387,12 @@ public:
          if (m_pCurBlock)
          {
              ArenaBlock* pCurBlock = m_pCurBlock;
-            size_t offset = AlignUp(m_offset, align);
+            size_t      offset    = AlignUp(m_offset, align);
  
              if ((offset + size) <= pCurBlock->blockSize)
              {
                  void* pMem = PtrAdd(pCurBlock, offset);
-                m_offset = offset + size;
+                m_offset   = offset + size;
                  return pMem;
              }
  
@@ -401,17 +401,18 @@ public:
          }
  
          static const size_t ArenaBlockSize = BlockSizeT;
-        size_t blockSize = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
+        size_t              blockSize      = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
  
          // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
          blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
  
-        ArenaBlock* pNewBlock = m_allocator.AllocateAligned(blockSize, ARENA_BLOCK_ALIGN);    // Arena blocks are always simd byte aligned.
+        ArenaBlock* pNewBlock = m_allocator.AllocateAligned(
+            blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
          SWR_ASSERT(pNewBlock != nullptr);
  
          if (pNewBlock != nullptr)
          {
-            m_offset = ARENA_BLOCK_ALIGN;
+            m_offset         = ARENA_BLOCK_ALIGN;
              pNewBlock->pNext = m_pCurBlock;
  
              m_pCurBlock = pNewBlock;
@@ -420,10 +421,7 @@ public:
          return AllocAligned(size, align);
      }
  
-    void* Alloc(size_t  size)
-    {
-        return AllocAligned(size, 1);
-    }
+    void* Alloc(size_t size) { return AllocAligned(size, 1); }
  
      void* AllocAlignedSync(size_t size, size_t align)
      {
@@ -453,12 +451,12 @@ public:
  
          if (m_pCurBlock)
          {
-            ArenaBlock *pUsedBlocks = m_pCurBlock->pNext;
-            m_pCurBlock->pNext = nullptr;
+            ArenaBlock* pUsedBlocks = m_pCurBlock->pNext;
+            m_pCurBlock->pNext      = nullptr;
              while (pUsedBlocks)
              {
                  ArenaBlock* pBlock = pUsedBlocks;
-                pUsedBlocks = pBlock->pNext;
+                pUsedBlocks        = pBlock->pNext;
  
                  m_allocator.Free(pBlock);
              }
@@ -473,20 +471,20 @@ public:
  
      bool IsEmpty()
      {
-        return (m_pCurBlock == nullptr) || (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
+        return (m_pCurBlock == nullptr) ||
+               (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
      }
  
  private:
-
-    ArenaBlock*         m_pCurBlock = nullptr;
-    size_t              m_offset    = ARENA_BLOCK_ALIGN;
+    ArenaBlock* m_pCurBlock = nullptr;
+    size_t      m_offset    = ARENA_BLOCK_ALIGN;
  
      /// @note Mutex is only used by sync allocation functions.
-    std::mutex          m_mutex;
+    std::mutex m_mutex;
  
-    DefaultAllocator    m_defAllocator;
-    T&                  m_allocator;
+    DefaultAllocator m_defAllocator;
+    T&               m_allocator;
  };
  
-using StdArena      = TArena<DefaultAllocator>;
-using CachingArena  = TArena<CachingAllocator>;
+using StdArena     = TArena<DefaultAllocator>;
+using CachingArena = TArena<CachingAllocator>;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp b/src/gallium/drivers/swr/rasterizer/core/backend.cpp

index 5ac9ceb165e2e25cf47c340f28d8cfba6d71c64d..8f8dbcf7884373441582373ed36c382c3e10cd2d 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-*        operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ *        operations.
+ *
+ ******************************************************************************/
  
  #include <smmintrin.h>
  
@@ -44,9 +44,13 @@
  /// @param pDC - pointer to draw context (dispatch).
  /// @param workerId - The unique worker ID that is assigned to this thread.
  /// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
+void ProcessComputeBE(DRAW_CONTEXT* pDC,
+                      uint32_t      workerId,
+                      uint32_t      threadGroupId,
+                      void*&        pSpillFillBuffer,
+                      void*&        pScratchSpace)
  {
-    SWR_CONTEXT *pContext = pDC->pContext;
+    SWR_CONTEXT* pContext = pDC->pContext;
  
      RDTSC_BEGIN(BEDispatch, pDC->drawId);
  
@@ -59,8 +63,9 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
      {
          pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD_BYTES);
      }
-    
-    size_t scratchSpaceSize = pDC->pState->state.scratchSpaceSize * pDC->pState->state.scratchSpaceNumInstances;
+
+    size_t scratchSpaceSize =
+        pDC->pState->state.scratchSpaceSize * pDC->pState->state.scratchSpaceNumInstances;
      if (scratchSpaceSize && pScratchSpace == nullptr)
      {
          pScratchSpace = pDC->pArena->AllocAlignedSync(scratchSpaceSize, KNOB_SIMD_BYTES);
@@ -68,17 +73,19 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
  
      const API_STATE& state = GetApiState(pDC);
  
-    SWR_CS_CONTEXT csContext{ 0 };
-    csContext.tileCounter = threadGroupId;
-    csContext.dispatchDims[0] = pTaskData->threadGroupCountX;
-    csContext.dispatchDims[1] = pTaskData->threadGroupCountY;
-    csContext.dispatchDims[2] = pTaskData->threadGroupCountZ;
-    csContext.pTGSM = pContext->ppScratch[workerId];
-    csContext.pSpillFillBuffer = (uint8_t*)pSpillFillBuffer;
-    csContext.pScratchSpace = (uint8_t*)pScratchSpace;
+    SWR_CS_CONTEXT csContext{0};
+    csContext.tileCounter         = threadGroupId;
+    csContext.dispatchDims[0]     = pTaskData->threadGroupCountX;
+    csContext.dispatchDims[1]     = pTaskData->threadGroupCountY;
+    csContext.dispatchDims[2]     = pTaskData->threadGroupCountZ;
+    csContext.pTGSM               = pContext->ppScratch[workerId];
+    csContext.pSpillFillBuffer    = (uint8_t*)pSpillFillBuffer;
+    csContext.pScratchSpace       = (uint8_t*)pScratchSpace;
      csContext.scratchSpacePerSimd = pDC->pState->state.scratchSpaceSize;
  
-    state.pfnCsFunc(GetPrivateState(pDC), pContext->threadPool.pThreadData[workerId].pWorkerPrivateData, &csContext);
+    state.pfnCsFunc(GetPrivateState(pDC),
+                    pContext->threadPool.pThreadData[workerId].pWorkerPrivateData,
+                    &csContext);
  
      UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
      AR_EVENT(CSStats(csContext.stats.numInstExecuted));
@@ -91,23 +98,26 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroup
  /// @param pDC - pointer to draw context (dispatch).
  /// @param workerId - The unique worker ID that is assigned to this thread.
  /// @param threadGroupId - the linear index for the thread group within the dispatch.
-void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
+void ProcessShutdownBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
  {
      // Dummy function
  }
  
-void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
+void ProcessSyncBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
  {
      uint32_t x, y;
      MacroTileMgr::getTileIndices(macroTile, x, y);
      SWR_ASSERT(x == 0 && y == 0);
  }
  
-void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, STORE_TILES_DESC* pDesc, 
-    SWR_RENDERTARGET_ATTACHMENT attachment)
+void ProcessStoreTileBE(DRAW_CONTEXT*               pDC,
+                        uint32_t                    workerId,
+                        uint32_t                    macroTile,
+                        STORE_TILES_DESC*           pDesc,
+                        SWR_RENDERTARGET_ATTACHMENT attachment)
  {
-    SWR_CONTEXT *pContext = pDC->pContext;
-    HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+    SWR_CONTEXT* pContext           = pDC->pContext;
+    HANDLE       hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
      RDTSC_BEGIN(BEStoreTiles, pDC->drawId);
  
@@ -121,17 +131,27 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
      case SWR_ATTACHMENT_COLOR4:
      case SWR_ATTACHMENT_COLOR5:
      case SWR_ATTACHMENT_COLOR6:
-    case SWR_ATTACHMENT_COLOR7: srcFormat = KNOB_COLOR_HOT_TILE_FORMAT; break;
-    case SWR_ATTACHMENT_DEPTH: srcFormat = KNOB_DEPTH_HOT_TILE_FORMAT; break;
-    case SWR_ATTACHMENT_STENCIL: srcFormat = KNOB_STENCIL_HOT_TILE_FORMAT; break;
-    default: SWR_INVALID("Unknown attachment: %d", attachment); srcFormat = KNOB_COLOR_HOT_TILE_FORMAT; break;
+    case SWR_ATTACHMENT_COLOR7:
+        srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
+        break;
+    case SWR_ATTACHMENT_DEPTH:
+        srcFormat = KNOB_DEPTH_HOT_TILE_FORMAT;
+        break;
+    case SWR_ATTACHMENT_STENCIL:
+        srcFormat = KNOB_STENCIL_HOT_TILE_FORMAT;
+        break;
+    default:
+        SWR_INVALID("Unknown attachment: %d", attachment);
+        srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
+        break;
      }
  
      uint32_t x, y;
      MacroTileMgr::getTileIndices(macroTile, x, y);
  
      // Only need to store the hottile if it's been rendered to...
-    HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, attachment, false);
+    HOTTILE* pHotTile =
+        pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, attachment, false);
      if (pHotTile)
      {
          // clear if clear is pending (i.e., not rendered to), then mark as dirty for store.
@@ -140,22 +160,35 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
              PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
              SWR_ASSERT(pfnClearTiles != nullptr);
  
-            pfnClearTiles(pDC, hWorkerPrivateData, attachment, macroTile, pHotTile->renderTargetArrayIndex, pHotTile->clearData, pDesc->rect);
+            pfnClearTiles(pDC,
+                          hWorkerPrivateData,
+                          attachment,
+                          macroTile,
+                          pHotTile->renderTargetArrayIndex,
+                          pHotTile->clearData,
+                          pDesc->rect);
          }
  
-        if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
+        if (pHotTile->state == HOTTILE_DIRTY ||
+            pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
          {
              int32_t destX = KNOB_MACROTILE_X_DIM * x;
              int32_t destY = KNOB_MACROTILE_Y_DIM * y;
  
-            pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, srcFormat,
-                attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+            pContext->pfnStoreTile(GetPrivateState(pDC),
+                                   hWorkerPrivateData,
+                                   srcFormat,
+                                   attachment,
+                                   destX,
+                                   destY,
+                                   pHotTile->renderTargetArrayIndex,
+                                   pHotTile->pBuffer);
          }
-        
  
          if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED)
          {
-            if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY && pHotTile->state == HOTTILE_RESOLVED))
+            if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY &&
+                  pHotTile->state == HOTTILE_RESOLVED))
              {
                  pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
              }
@@ -164,12 +197,12 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
      RDTSC_END(BEStoreTiles, 1);
  }
  
-void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
+void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
  {
-    STORE_TILES_DESC *pDesc = (STORE_TILES_DESC*)pData;
+    STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pData;
  
-    unsigned long rt = 0;
-    uint32_t mask = pDesc->attachmentMask;
+    unsigned long rt   = 0;
+    uint32_t      mask = pDesc->attachmentMask;
      while (_BitScanForward(&rt, mask))
      {
          mask &= ~(1 << rt);
@@ -177,10 +210,13 @@ void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTil
      }
  }
  
-void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
+void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
+                                     uint32_t      workerId,
+                                     uint32_t      macroTile,
+                                     void*         pData)
  {
-    DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData;
-    SWR_CONTEXT *pContext = pDC->pContext;
+    DISCARD_INVALIDATE_TILES_DESC* pDesc    = (DISCARD_INVALIDATE_TILES_DESC*)pData;
+    SWR_CONTEXT*                   pContext = pDC->pContext;
  
      const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
  
@@ -188,8 +224,13 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint3
      {
          if (pDesc->attachmentMask & (1 << i))
          {
-            HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTileNoLoad(
-                pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)i, pDesc->createNewTiles, numSamples);
+            HOTTILE* pHotTile =
+                pContext->pHotTileMgr->GetHotTileNoLoad(pContext,
+                                                        pDC,
+                                                        macroTile,
+                                                        (SWR_RENDERTARGET_ATTACHMENT)i,
+                                                        pDesc->createNewTiles,
+                                                        numSamples);
              if (pHotTile)
              {
                  pHotTile->state = (HOTTILE_STATE)pDesc->newTileState;
@@ -198,14 +239,19 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint3
      }
  }
  
-template<uint32_t sampleCountT>
-void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <uint32_t sampleCountT>
+void BackendNullPS(DRAW_CONTEXT*        pDC,
+                   uint32_t             workerId,
+                   uint32_t             x,
+                   uint32_t             y,
+                   SWR_TRIANGLE_DESC&   work,
+                   RenderOutputBuffers& renderBuffers)
  {
      RDTSC_BEGIN(BENullBackend, pDC->drawId);
      ///@todo: handle center multisample pattern
      RDTSC_BEGIN(BESetup, pDC->drawId);
  
-    const API_STATE &state = GetApiState(pDC);
+    const API_STATE& state = GetApiState(pDC);
  
      BarycentricCoeffs coeffs;
      SetupBarycentricCoeffs(&coeffs, work);
@@ -220,7 +266,7 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
  
      simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
  
-    const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
+    const simdscalar           dy        = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
      const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
      for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
      {
@@ -231,8 +277,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
          for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
          {
              // iterate over active samples
-            unsigned long sample = 0;
-            uint32_t sampleMask = state.blendState.sampleMask;
+            unsigned long sample     = 0;
+            uint32_t      sampleMask = state.blendState.sampleMask;
              while (_BitScanForward(&sample, sampleMask))
              {
                  sampleMask &= ~(1 << sample);
@@ -242,14 +288,16 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
                  if (coverageMask)
                  {
                      // offset depth/stencil buffers current sample
-                    uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
-                    uint8_t *pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+                    uint8_t* pDepthSample   = pDepthBuffer + RasterTileDepthOffset(sample);
+                    uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
  
                      if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
                      {
-                        static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+                        static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+                                      "Unsupported depth hot tile format");
  
-                        const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+                        const simdscalar z =
+                            _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
  
                          const float minz = state.depthBoundsState.depthBoundsTestMinValue;
                          const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
@@ -266,7 +314,11 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
                      CalcSampleBarycentrics(coeffs, psContext);
  
                      // interpolate and quantize z
-                    psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+                    psContext.vZ = vplaneps(coeffs.vZa,
+                                            coeffs.vZb,
+                                            coeffs.vZc,
+                                            psContext.vI.sample,
+                                            psContext.vJ.sample);
                      psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
  
                      RDTSC_END(BEBarycentric, 0);
@@ -274,21 +326,39 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
                      // interpolate user clip distance if available
                      if (state.backendState.clipDistanceMask)
                      {
-                        coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
+                        coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
+                                                             work.pUserClipBuffer,
+                                                             psContext.vI.sample,
+                                                             psContext.vJ.sample);
                      }
  
-                    simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
+                    simdscalar vCoverageMask   = _simd_vmask_ps(coverageMask);
                      simdscalar stencilPassMask = vCoverageMask;
  
                      RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
-                    simdscalar depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
-                        psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
-                    AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
-                    DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
-                        pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+                    simdscalar depthPassMask = DepthStencilTest(&state,
+                                                                work.triFlags.frontFacing,
+                                                                work.triFlags.viewportIndex,
+                                                                psContext.vZ,
+                                                                pDepthSample,
+                                                                vCoverageMask,
+                                                                pStencilSample,
+                                                                &stencilPassMask);
+                    AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask),
+                                                         _simd_movemask_ps(stencilPassMask),
+                                                         _simd_movemask_ps(vCoverageMask)));
+                    DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                      &state.depthStencilState,
+                                      work.triFlags.frontFacing,
+                                      psContext.vZ,
+                                      pDepthSample,
+                                      depthPassMask,
+                                      vCoverageMask,
+                                      pStencilSample,
+                                      stencilPassMask);
                      RDTSC_END(BEEarlyDepthTest, 0);
  
-                    uint32_t statMask = _simd_movemask_ps(depthPassMask);
+                    uint32_t statMask  = _simd_movemask_ps(depthPassMask);
                      uint32_t statCount = _mm_popcnt_u32(statMask);
                      UPDATE_STAT_BE(DepthPassCount, statCount);
                  }
@@ -299,7 +369,8 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
              }
  
              pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
-            pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+            pStencilBuffer +=
+                (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
  
              vXSamplePosUL = _simd_add_ps(vXSamplePosUL, dx);
          }
@@ -310,34 +381,30 @@ void BackendNullPS(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y,
      RDTSC_END(BENullBackend, 0);
  }
  
-PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS] = {};
+PFN_CLEAR_TILES  gClearTilesTable[NUM_SWR_FORMATS] = {};
  PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
-PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
-                                     [2] // centroid
-                                     [2] // canEarlyZ
-                                     = {};
-PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
-                                       [2] // isCenterPattern
-                                       [SWR_INPUT_COVERAGE_COUNT]
-                                       [2] // centroid
-                                       [2] // forcedSampleCount
-                                       [2] // canEarlyZ
-                                       = {};
-PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
-                                        [SWR_INPUT_COVERAGE_COUNT]
+PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2] // centroid
+                                     [2]                           // canEarlyZ
+    = {};
+PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2] // isCenterPattern
+                                       [SWR_INPUT_COVERAGE_COUNT][2]   // centroid
+                                       [2]                             // forcedSampleCount
+                                       [2]                             // canEarlyZ
+    = {};
+PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT]
                                          [2] // centroid
                                          [2] // canEarlyZ
-                                        = {};
+    = {};
  
  void InitBackendFuncTables()
-{    
+{
      InitBackendPixelRate();
      InitBackendSingleFuncTable(gBackendSingleSample);
      InitBackendSampleFuncTable(gBackendSampleRateTable);
  
-    gBackendNullPs[SWR_MULTISAMPLE_1X] = &BackendNullPS < SWR_MULTISAMPLE_1X > ;
-    gBackendNullPs[SWR_MULTISAMPLE_2X] = &BackendNullPS < SWR_MULTISAMPLE_2X > ;
-    gBackendNullPs[SWR_MULTISAMPLE_4X] = &BackendNullPS < SWR_MULTISAMPLE_4X > ;
-    gBackendNullPs[SWR_MULTISAMPLE_8X] = &BackendNullPS < SWR_MULTISAMPLE_8X > ;
-    gBackendNullPs[SWR_MULTISAMPLE_16X] = &BackendNullPS < SWR_MULTISAMPLE_16X > ;
+    gBackendNullPs[SWR_MULTISAMPLE_1X]  = &BackendNullPS<SWR_MULTISAMPLE_1X>;
+    gBackendNullPs[SWR_MULTISAMPLE_2X]  = &BackendNullPS<SWR_MULTISAMPLE_2X>;
+    gBackendNullPs[SWR_MULTISAMPLE_4X]  = &BackendNullPS<SWR_MULTISAMPLE_4X>;
+    gBackendNullPs[SWR_MULTISAMPLE_8X]  = &BackendNullPS<SWR_MULTISAMPLE_8X>;
+    gBackendNullPs[SWR_MULTISAMPLE_16X] = &BackendNullPS<SWR_MULTISAMPLE_16X>;
  }
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.h b/src/gallium/drivers/swr/rasterizer/core/backend.h

index 7a842fe0e20904a151b285d0b6e6b851e1bdcf28..79d9007bee64699f38e7b8b490e9e1f9bbae92df 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/backend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.h
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.h
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-*        operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.h
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ *        operations.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "common/os.h"
@@ -34,29 +34,37 @@
  #include "depthstencil.h"
  #include "rdtsc_core.h"
  
-void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
-void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
-void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
-void ProcessStoreTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void ProcessShutdownBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData);
+void ProcessComputeBE(DRAW_CONTEXT* pDC,
+                      uint32_t      workerId,
+                      uint32_t      threadGroupId,
+                      void*&        pSpillFillBuffer,
+                      void*&        pScratchSpace);
+void ProcessSyncBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
+void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
+void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
+void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
+                                     uint32_t      workerId,
+                                     uint32_t      macroTile,
+                                     void*         pData);
+void ProcessShutdownBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData);
  
-typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, HANDLE hWorkerData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, uint32_t, DWORD[4], const SWR_RECT& rect);
+typedef void (*PFN_CLEAR_TILES)(DRAW_CONTEXT*,
+                                HANDLE                      hWorkerData,
+                                SWR_RENDERTARGET_ATTACHMENT rt,
+                                uint32_t,
+                                uint32_t,
+                                DWORD[4],
+                                const SWR_RECT& rect);
  
-extern PFN_CLEAR_TILES gClearTilesTable[NUM_SWR_FORMATS];
+extern PFN_CLEAR_TILES  gClearTilesTable[NUM_SWR_FORMATS];
  extern PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
-extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT]
-                                     [2]  // centroid
-                                     [2]; // canEarlyZ
-extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
-                                       [2] // isCenterPattern
-                                       [SWR_INPUT_COVERAGE_COUNT]
-                                       [2] // centroid
-                                       [2] // forcedSampleCount
-                                       [2] // canEarlyZ
-                                       ;
+extern PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2]     // centroid
+                                            [2];                              // canEarlyZ
+extern PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2] // isCenterPattern
+                                              [SWR_INPUT_COVERAGE_COUNT][2]   // centroid
+                                              [2]                             // forcedSampleCount
+                                              [2]                             // canEarlyZ
+    ;
  extern PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT]
-                                        [SWR_INPUT_COVERAGE_COUNT]
-                                        [2]  // centroid
-                                        [2]; // canEarlyZ
-
+                                               [SWR_INPUT_COVERAGE_COUNT][2] // centroid
+                                               [2];                          // canEarlyZ
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp

index af031f9f9d71b9b23894d0542c1b2b514f65d0c4..0b14ca09f4cd488efe5404033822c427ff5d14b1 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-*        operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ *        operations.
+ *
+ ******************************************************************************/
  
  #include <smmintrin.h>
  
@@ -37,17 +37,17 @@
  
  #include <algorithm>
  
-template<SWR_FORMAT format>
-void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
+template <SWR_FORMAT format>
+void ClearRasterTile(uint8_t* pTileBuffer, simdvector& value)
  {
-    auto lambda = [&](int32_t comp)
-    {
+    auto lambda = [&](int32_t comp) {
          FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
  
          pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
      };
  
-    const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
+    const uint32_t numIter =
+        (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
  
      for (uint32_t i = 0; i < numIter; ++i)
      {
@@ -56,17 +56,17 @@ void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
  }
  
  #if USE_8x2_TILE_BACKEND
-template<SWR_FORMAT format>
-void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
+template <SWR_FORMAT format>
+void ClearRasterTile(uint8_t* pTileBuffer, simd16vector& value)
  {
-    auto lambda = [&](int32_t comp)
-    {
+    auto lambda = [&](int32_t comp) {
          FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
  
          pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
      };
  
-    const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
+    const uint32_t numIter =
+        (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
  
      for (uint32_t i = 0; i < numIter; ++i)
      {
@@ -75,8 +75,14 @@ void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
  }
  
  #endif
-template<SWR_FORMAT format>
-INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
+template <SWR_FORMAT format>
+INLINE void ClearMacroTile(DRAW_CONTEXT*               pDC,
+                           HANDLE                      hWorkerPrivateData,
+                           SWR_RENDERTARGET_ATTACHMENT rt,
+                           uint32_t                    macroTile,
+                           uint32_t                    renderTargetArrayIndex,
+                           DWORD                       clear[4],
+                           const SWR_RECT&             rect)
  {
      // convert clear color to hottile format
      // clear color is in RGBA float/uint32
@@ -91,7 +97,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_REN
              vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
              vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
          }
-        vComp = FormatTraits<format>::pack(comp, vComp);
+        vComp                                         = FormatTraits<format>::pack(comp, vComp);
          vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
      }
  
@@ -106,7 +112,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_REN
              vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp)));
              vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
          }
-        vComp = FormatTraits<format>::pack(comp, vComp);
+        vComp                                         = FormatTraits<format>::pack(comp, vComp);
          vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
      }
  
@@ -115,8 +121,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_REN
      MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
  
      // Init to full macrotile
-    SWR_RECT clearTile =
-    {
+    SWR_RECT clearTile = {
          KNOB_MACROTILE_X_DIM * int32_t(tileX),
          KNOB_MACROTILE_Y_DIM * int32_t(tileY),
          KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
@@ -127,7 +132,8 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_REN
      clearTile &= rect;
  
      // translate to local hottile origin
-    clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
+    clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM,
+                        -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
  
      // Make maximums inclusive (needed for convert to raster tiles)
      clearTile.xmax -= 1;
@@ -141,14 +147,29 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_REN
  
      const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
      // compute steps between raster tile samples / raster tiles / macro tile rows
-    const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
-    const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
+    const uint32_t rasterTileSampleStep =
+        KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
+    const uint32_t rasterTileStep =
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
      const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
-    const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
-
-    HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, hWorkerPrivateData, macroTile, rt, true, numSamples, renderTargetArrayIndex);
-    uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
-    uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
+    const uint32_t pitch            = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
+
+    HOTTILE* pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext,
+                                                               pDC,
+                                                               hWorkerPrivateData,
+                                                               macroTile,
+                                                               rt,
+                                                               true,
+                                                               numSamples,
+                                                               renderTargetArrayIndex);
+    uint32_t rasterTileStartOffset =
+        (ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp>>(
+            pitch, clearTile.xmin, clearTile.ymin)) *
+        numSamples;
+    uint8_t* pRasterTileRow =
+        pHotTile->pBuffer +
+        rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ,
+                               // FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
  
      // loop over all raster tiles in the current hot tile
      for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
@@ -156,7 +177,7 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_REN
          uint8_t* pRasterTile = pRasterTileRow;
          for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
          {
-            for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
+            for (int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
              {
                  ClearRasterTile<format>(pRasterTile, vClear);
                  pRasterTile += rasterTileSampleStep;
@@ -168,17 +189,16 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, HANDLE hWorkerPrivateData, SWR_REN
      pHotTile->state = HOTTILE_DIRTY;
  }
  
-
-void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
+void ProcessClearBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
  {
-    SWR_CONTEXT *pContext = pDC->pContext;
-    HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+    SWR_CONTEXT* pContext           = pDC->pContext;
+    HANDLE       hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
      if (KNOB_FAST_CLEAR)
      {
-        CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
+        CLEAR_DESC*           pClear      = (CLEAR_DESC*)pUserData;
          SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
-        uint32_t numSamples = GetNumSamples(sampleCount);
+        uint32_t              numSamples  = GetNumSamples(sampleCount);
  
          SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
  
@@ -186,36 +206,58 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
  
          if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
          {
-            unsigned long rt = 0;
-            uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
+            unsigned long rt   = 0;
+            uint32_t      mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
              while (_BitScanForward(&rt, mask))
              {
                  mask &= ~(1 << rt);
  
-                HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
+                HOTTILE* pHotTile =
+                    pContext->pHotTileMgr->GetHotTile(pContext,
+                                                      pDC,
+                                                      hWorkerPrivateData,
+                                                      macroTile,
+                                                      (SWR_RENDERTARGET_ATTACHMENT)rt,
+                                                      true,
+                                                      numSamples,
+                                                      pClear->renderTargetArrayIndex);
  
                  // All we want to do here is to mark the hot tile as being in a "needs clear" state.
                  pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
                  pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
                  pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
                  pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
-                pHotTile->state = HOTTILE_CLEAR;
+                pHotTile->state        = HOTTILE_CLEAR;
              }
          }
  
          if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
          {
-            HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
+            HOTTILE* pHotTile      = pContext->pHotTileMgr->GetHotTile(pContext,
+                                                                  pDC,
+                                                                  hWorkerPrivateData,
+                                                                  macroTile,
+                                                                  SWR_ATTACHMENT_DEPTH,
+                                                                  true,
+                                                                  numSamples,
+                                                                  pClear->renderTargetArrayIndex);
              pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
-            pHotTile->state = HOTTILE_CLEAR;
+            pHotTile->state        = HOTTILE_CLEAR;
          }
  
          if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
          {
-            HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
+            HOTTILE* pHotTile = pContext->pHotTileMgr->GetHotTile(pContext,
+                                                                  pDC,
+                                                                  hWorkerPrivateData,
+                                                                  macroTile,
+                                                                  SWR_ATTACHMENT_STENCIL,
+                                                                  true,
+                                                                  numSamples,
+                                                                  pClear->renderTargetArrayIndex);
  
              pHotTile->clearData[0] = pClear->clearStencil;
-            pHotTile->state = HOTTILE_CLEAR;
+            pHotTile->state        = HOTTILE_CLEAR;
          }
  
          RDTSC_END(BEClear, 1);
@@ -223,7 +265,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
      else
      {
          // Legacy clear
-        CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
+        CLEAR_DESC* pClear = (CLEAR_DESC*)pUserData;
          RDTSC_BEGIN(BEClear, pDC->drawId);
  
          if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
@@ -237,33 +279,51 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
              PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
              SWR_ASSERT(pfnClearTiles != nullptr);
  
-            unsigned long rt = 0;
-            uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
+            unsigned long rt   = 0;
+            uint32_t      mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
              while (_BitScanForward(&rt, mask))
              {
                  mask &= ~(1 << rt);
  
-                pfnClearTiles(pDC, hWorkerPrivateData, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+                pfnClearTiles(pDC,
+                              hWorkerPrivateData,
+                              (SWR_RENDERTARGET_ATTACHMENT)rt,
+                              macroTile,
+                              pClear->renderTargetArrayIndex,
+                              clearData,
+                              pClear->rect);
              }
          }
  
          if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
          {
              DWORD clearData[4];
-            clearData[0] = *(DWORD*)&pClear->clearDepth;
+            clearData[0]                  = *(DWORD*)&pClear->clearDepth;
              PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
              SWR_ASSERT(pfnClearTiles != nullptr);
  
-            pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+            pfnClearTiles(pDC,
+                          hWorkerPrivateData,
+                          SWR_ATTACHMENT_DEPTH,
+                          macroTile,
+                          pClear->renderTargetArrayIndex,
+                          clearData,
+                          pClear->rect);
          }
  
          if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
          {
              DWORD clearData[4];
-            clearData[0] = pClear->clearStencil;
+            clearData[0]                  = pClear->clearStencil;
              PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
  
-            pfnClearTiles(pDC, hWorkerPrivateData, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
+            pfnClearTiles(pDC,
+                          hWorkerPrivateData,
+                          SWR_ATTACHMENT_STENCIL,
+                          macroTile,
+                          pClear->renderTargetArrayIndex,
+                          clearData,
+                          pClear->rect);
          }
  
          RDTSC_END(BEClear, 1);
@@ -274,9 +334,9 @@ void InitClearTilesTable()
  {
      memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
  
-    gClearTilesTable[R8G8B8A8_UNORM]        = ClearMacroTile<R8G8B8A8_UNORM>;
-    gClearTilesTable[B8G8R8A8_UNORM]        = ClearMacroTile<B8G8R8A8_UNORM>;
-    gClearTilesTable[R32_FLOAT]             = ClearMacroTile<R32_FLOAT>;
-    gClearTilesTable[R32G32B32A32_FLOAT]    = ClearMacroTile<R32G32B32A32_FLOAT>;
-    gClearTilesTable[R8_UINT]               = ClearMacroTile<R8_UINT>;
+    gClearTilesTable[R8G8B8A8_UNORM]     = ClearMacroTile<R8G8B8A8_UNORM>;
+    gClearTilesTable[B8G8R8A8_UNORM]     = ClearMacroTile<B8G8R8A8_UNORM>;
+    gClearTilesTable[R32_FLOAT]          = ClearMacroTile<R32_FLOAT>;
+    gClearTilesTable[R32G32B32A32_FLOAT] = ClearMacroTile<R32G32B32A32_FLOAT>;
+    gClearTilesTable[R8_UINT]            = ClearMacroTile<R8_UINT>;
  }
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h

index 05234c21822a2cc0d36836fe0468f3ca7a88b65f..1798dad7bc5ea200bff0b6817c8c74611e704379 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -1,37 +1,39 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.h
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-*        operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.h
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ *        operations.
+ *
+ ******************************************************************************/
  #pragma once
  
-void InitBackendSingleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_INPUT_COVERAGE_COUNT][2][2]);
-void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]);
+void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2]);
+void InitBackendSampleFuncTable(
+    PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2]);
  
-static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext);
+static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs,
+                                          SWR_PS_CONTEXT&          psContext);
  
  
  enum SWR_BACKEND_FUNCS
@@ -45,15 +47,18 @@ enum SWR_BACKEND_FUNCS
  #if KNOB_SIMD_WIDTH == 8
  static const __m256 vCenterOffsetsX = __m256{0.5, 1.5, 0.5, 1.5, 2.5, 3.5, 2.5, 3.5};
  static const __m256 vCenterOffsetsY = __m256{0.5, 0.5, 1.5, 1.5, 0.5, 0.5, 1.5, 1.5};
-static const __m256 vULOffsetsX = __m256{0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0};
-static const __m256 vULOffsetsY = __m256{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0};
+static const __m256 vULOffsetsX     = __m256{0.0, 1.0, 0.0, 1.0, 2.0, 3.0, 2.0, 3.0};
+static const __m256 vULOffsetsY     = __m256{0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0};
  #define MASK 0xff
  #endif
  
-static INLINE simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuffer, simdscalar const &vI, simdscalar const &vJ)
+static INLINE simdmask ComputeUserClipMask(uint8_t           clipMask,
+                                           float*            pUserClipBuffer,
+                                           simdscalar const& vI,
+                                           simdscalar const& vJ)
  {
-    simdscalar vClipMask = _simd_setzero_ps();
-    uint32_t numClipDistance = _mm_popcnt_u32(clipMask);
+    simdscalar vClipMask       = _simd_setzero_ps();
+    uint32_t   numClipDistance = _mm_popcnt_u32(clipMask);
  
      for (uint32_t i = 0; i < numClipDistance; ++i)
      {
@@ -76,23 +81,29 @@ static INLINE simdmask ComputeUserClipMask(uint8_t clipMask, float* pUserClipBuf
  
  INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
  {
-    static const uint32_t RasterTileColorOffsets[16]
-    { 0,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 10,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 11,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 12,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 13,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 14,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 15,
+    static const uint32_t RasterTileColorOffsets[16]{
+        0,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8),
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 2,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 3,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 4,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 5,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 6,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 7,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 8,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) * 9,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+            10,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+            11,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+            12,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+            13,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+            14,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8) *
+            15,
      };
      assert(sampleNum < 16);
      return RasterTileColorOffsets[sampleNum];
@@ -100,23 +111,29 @@ INLINE static uint32_t RasterTileColorOffset(uint32_t sampleNum)
  
  INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
  {
-    static const uint32_t RasterTileDepthOffsets[16]
-    { 0,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 10,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 11,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 12,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 13,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 14,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 15,
+    static const uint32_t RasterTileDepthOffsets[16]{
+        0,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8),
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 2,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 3,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 4,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 5,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 6,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 7,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 8,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) * 9,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+            10,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+            11,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+            12,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+            13,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+            14,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8) *
+            15,
      };
      assert(sampleNum < 16);
      return RasterTileDepthOffsets[sampleNum];
@@ -124,60 +141,78 @@ INLINE static uint32_t RasterTileDepthOffset(uint32_t sampleNum)
  
  INLINE static uint32_t RasterTileStencilOffset(uint32_t sampleNum)
  {
-    static const uint32_t RasterTileStencilOffsets[16]
-    { 0,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 2,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 3,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 4,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 5,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 6,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 7,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 8,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 9,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 10,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 11,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 12,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 13,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 14,
-      (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) * 15,
+    static const uint32_t RasterTileStencilOffsets[16]{
+        0,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8),
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            2,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            3,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            4,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            5,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            6,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            7,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            8,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            9,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            10,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            11,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            12,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            13,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            14,
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8) *
+            15,
      };
      assert(sampleNum < 16);
      return RasterTileStencilOffsets[sampleNum];
  }
  
-template<typename T, uint32_t InputCoverage>
+template <typename T, uint32_t InputCoverage>
  struct generateInputCoverage
  {
-    INLINE generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask)
+    INLINE generateInputCoverage(const uint64_t* const coverageMask,
+                                 uint32_t (&inputMask)[KNOB_SIMD_WIDTH],
+                                 const uint32_t sampleMask)
      {
          // will need to update for avx512
          assert(KNOB_SIMD_WIDTH == 8);
  
          simdscalari mask[2];
          simdscalari sampleCoverage[2];
-        
-        if(T::bIsCenterPattern)
+
+        if (T::bIsCenterPattern)
          {
              // center coverage is the same for all samples; just broadcast to the sample slots
              uint32_t centerCoverage = ((uint32_t)(*coverageMask) & MASK);
-            if(T::MultisampleT::numSamples == 1)
+            if (T::MultisampleT::numSamples == 1)
              {
                  sampleCoverage[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, 0, centerCoverage);
              }
-            else if(T::MultisampleT::numSamples == 2)
+            else if (T::MultisampleT::numSamples == 2)
              {
-                sampleCoverage[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
+                sampleCoverage[0] =
+                    _simd_set_epi32(0, 0, 0, 0, 0, 0, centerCoverage, centerCoverage);
              }
-            else if(T::MultisampleT::numSamples == 4)
+            else if (T::MultisampleT::numSamples == 4)
              {
-                sampleCoverage[0] = _simd_set_epi32(0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
+                sampleCoverage[0] = _simd_set_epi32(
+                    0, 0, 0, 0, centerCoverage, centerCoverage, centerCoverage, centerCoverage);
              }
-            else if(T::MultisampleT::numSamples == 8)
+            else if (T::MultisampleT::numSamples == 8)
              {
                  sampleCoverage[0] = _simd_set1_epi32(centerCoverage);
              }
-            else if(T::MultisampleT::numSamples == 16)
+            else if (T::MultisampleT::numSamples == 16)
              {
                  sampleCoverage[0] = _simd_set1_epi32(centerCoverage);
                  sampleCoverage[1] = _simd_set1_epi32(centerCoverage);
@@ -185,80 +220,127 @@ struct generateInputCoverage
          }
          else
          {
-            simdscalari src = _simd_set1_epi32(0);
+            simdscalari src    = _simd_set1_epi32(0);
              simdscalari index0 = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0), index1;
  
-            if(T::MultisampleT::numSamples == 1)
+            if (T::MultisampleT::numSamples == 1)
              {
                  mask[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, 0, -1);
              }
-            else if(T::MultisampleT::numSamples == 2)
+            else if (T::MultisampleT::numSamples == 2)
              {
                  mask[0] = _simd_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
              }
-            else if(T::MultisampleT::numSamples == 4)
+            else if (T::MultisampleT::numSamples == 4)
              {
                  mask[0] = _simd_set_epi32(0, 0, 0, 0, -1, -1, -1, -1);
              }
-            else if(T::MultisampleT::numSamples == 8)
+            else if (T::MultisampleT::numSamples == 8)
              {
                  mask[0] = _simd_set1_epi32(-1);
              }
-            else if(T::MultisampleT::numSamples == 16)
+            else if (T::MultisampleT::numSamples == 16)
              {
                  mask[0] = _simd_set1_epi32(-1);
                  mask[1] = _simd_set1_epi32(-1);
-                index1 = _simd_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
+                index1  = _simd_set_epi32(15, 14, 13, 12, 11, 10, 9, 8);
              }
  
              // gather coverage for samples 0-7
-            sampleCoverage[0] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index0, _mm256_castsi256_ps(mask[0]), 8));
-            if(T::MultisampleT::numSamples > 8)
+            sampleCoverage[0] =
+                _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src),
+                                                            (const float*)coverageMask,
+                                                            index0,
+                                                            _mm256_castsi256_ps(mask[0]),
+                                                            8));
+            if (T::MultisampleT::numSamples > 8)
              {
                  // gather coverage for samples 8-15
-                sampleCoverage[1] = _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src), (const float*)coverageMask, index1, _mm256_castsi256_ps(mask[1]), 8));
+                sampleCoverage[1] =
+                    _mm256_castps_si256(_simd_mask_i32gather_ps(_mm256_castsi256_ps(src),
+                                                                (const float*)coverageMask,
+                                                                index1,
+                                                                _mm256_castsi256_ps(mask[1]),
+                                                                8));
              }
          }
  
-        mask[0] = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0,
-                                  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xC, 0x8, 0x4, 0x0);
+        mask[0] = _mm256_set_epi8(-1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  0xC,
+                                  0x8,
+                                  0x4,
+                                  0x0,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  -1,
+                                  0xC,
+                                  0x8,
+                                  0x4,
+                                  0x0);
          // pull out the 8bit 4x2 coverage for samples 0-7 into the lower 32 bits of each 128bit lane
          simdscalari packedCoverage0 = _simd_shuffle_epi8(sampleCoverage[0], mask[0]);
  
          simdscalari packedCoverage1;
-        if(T::MultisampleT::numSamples > 8)
+        if (T::MultisampleT::numSamples > 8)
          {
-            // pull out the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit lane
+            // pull out the 8bit 4x2 coverage for samples 8-15 into the lower 32 bits of each 128bit
+            // lane
              packedCoverage1 = _simd_shuffle_epi8(sampleCoverage[1], mask[0]);
          }
  
-    #if (KNOB_ARCH == KNOB_ARCH_AVX)
-        // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane 
+#if (KNOB_ARCH == KNOB_ARCH_AVX)
+        // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
          simdscalari hiToLow = _mm256_permute2f128_si256(packedCoverage0, packedCoverage0, 0x83);
-        simdscalar shufRes = _mm256_shuffle_ps(_mm256_castsi256_ps(hiToLow), _mm256_castsi256_ps(hiToLow), _MM_SHUFFLE(1, 1, 0, 1));
-        packedCoverage0 = _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE));
+        simdscalar  shufRes = _mm256_shuffle_ps(
+            _mm256_castsi256_ps(hiToLow), _mm256_castsi256_ps(hiToLow), _MM_SHUFFLE(1, 1, 0, 1));
+        packedCoverage0 = _mm256_castps_si256(
+            _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), shufRes, 0xFE));
  
          simdscalari packedSampleCoverage;
-        if(T::MultisampleT::numSamples > 8)
+        if (T::MultisampleT::numSamples > 8)
          {
              // pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
-            hiToLow = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83);
-            shufRes = _mm256_shuffle_ps(_mm256_castsi256_ps(hiToLow), _mm256_castsi256_ps(hiToLow), _MM_SHUFFLE(1, 1, 0, 1));
-            shufRes = _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage1), shufRes, 0xFE);
-            packedCoverage1 = _mm256_castps_si256(_mm256_castpd_ps(_mm256_shuffle_pd(_mm256_castps_pd(shufRes), _mm256_castps_pd(shufRes), 0x01)));
-            packedSampleCoverage = _mm256_castps_si256(_mm256_blend_ps(_mm256_castsi256_ps(packedCoverage0), _mm256_castsi256_ps(packedCoverage1), 0xFC));
+            hiToLow         = _mm256_permute2f128_si256(packedCoverage1, packedCoverage1, 0x83);
+            shufRes         = _mm256_shuffle_ps(_mm256_castsi256_ps(hiToLow),
+                                        _mm256_castsi256_ps(hiToLow),
+                                        _MM_SHUFFLE(1, 1, 0, 1));
+            shufRes         = _mm256_blend_ps(_mm256_castsi256_ps(packedCoverage1), shufRes, 0xFE);
+            packedCoverage1 = _mm256_castps_si256(_mm256_castpd_ps(
+                _mm256_shuffle_pd(_mm256_castps_pd(shufRes), _mm256_castps_pd(shufRes), 0x01)));
+            packedSampleCoverage = _mm256_castps_si256(_mm256_blend_ps(
+                _mm256_castsi256_ps(packedCoverage0), _mm256_castsi256_ps(packedCoverage1), 0xFC));
          }
          else
          {
              packedSampleCoverage = packedCoverage0;
          }
-    #else
+#else
          simdscalari permMask = _simd_set_epi32(0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x4, 0x0);
-        // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane 
+        // pack lower 32 bits of each 128 bit lane into lower 64 bits of single 128 bit lane
          packedCoverage0 = _mm256_permutevar8x32_epi32(packedCoverage0, permMask);
  
          simdscalari packedSampleCoverage;
-        if(T::MultisampleT::numSamples > 8)
+        if (T::MultisampleT::numSamples > 8)
          {
              permMask = _simd_set_epi32(0x7, 0x7, 0x7, 0x7, 0x4, 0x0, 0x7, 0x7);
              // pack lower 32 bits of each 128 bit lane into upper 64 bits of single 128 bit lane
@@ -271,14 +353,15 @@ struct generateInputCoverage
          {
              packedSampleCoverage = packedCoverage0;
          }
-    #endif
+#endif
  
-        for(int32_t i = KNOB_SIMD_WIDTH - 1; i >= 0; i--)
+        for (int32_t i = KNOB_SIMD_WIDTH - 1; i >= 0; i--)
          {
-            // convert packed sample coverage masks into single coverage masks for all samples for each pixel in the 4x2
+            // convert packed sample coverage masks into single coverage masks for all samples for
+            // each pixel in the 4x2
              inputMask[i] = _simd_movemask_epi8(packedSampleCoverage);
  
-            if(!T::bForcedSampleCount)
+            if (!T::bForcedSampleCount)
              {
                  // input coverage has to be anded with sample mask if MSAA isn't forced on
                  inputMask[i] &= sampleMask;
@@ -289,35 +372,47 @@ struct generateInputCoverage
          }
      }
  
-    INLINE generateInputCoverage(const uint64_t *const coverageMask, simdscalar &inputCoverage, const uint32_t sampleMask)
+    INLINE generateInputCoverage(const uint64_t* const coverageMask,
+                                 simdscalar&           inputCoverage,
+                                 const uint32_t        sampleMask)
      {
          uint32_t inputMask[KNOB_SIMD_WIDTH];
          generateInputCoverage<T, T::InputCoverage>(coverageMask, inputMask, sampleMask);
-        inputCoverage = _simd_castsi_ps(_simd_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]));
+        inputCoverage = _simd_castsi_ps(_simd_set_epi32(inputMask[7],
+                                                        inputMask[6],
+                                                        inputMask[5],
+                                                        inputMask[4],
+                                                        inputMask[3],
+                                                        inputMask[2],
+                                                        inputMask[1],
+                                                        inputMask[0]));
      }
-
  };
  
-template<typename T>
+template <typename T>
  struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
  {
-    INLINE generateInputCoverage(const uint64_t *const coverageMask, simdscalar &inputCoverage, const uint32_t sampleMask)
+    INLINE generateInputCoverage(const uint64_t* const coverageMask,
+                                 simdscalar&           inputCoverage,
+                                 const uint32_t        sampleMask)
      {
          // will need to update for avx512
          assert(KNOB_SIMD_WIDTH == 8);
-        simdscalari vec = _simd_set1_epi32(coverageMask[0]);
+        simdscalari       vec = _simd_set1_epi32(coverageMask[0]);
          const simdscalari bit = _simd_set_epi32(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
-        vec = _simd_and_si(vec, bit);
-        vec = _simd_cmplt_epi32(_simd_setzero_si(), vec);
-        vec = _simd_blendv_epi32(_simd_setzero_si(), _simd_set1_epi32(1), vec);
-        inputCoverage = _simd_castsi_ps(vec);
+        vec                   = _simd_and_si(vec, bit);
+        vec                   = _simd_cmplt_epi32(_simd_setzero_si(), vec);
+        vec                   = _simd_blendv_epi32(_simd_setzero_si(), _simd_set1_epi32(1), vec);
+        inputCoverage         = _simd_castsi_ps(vec);
      }
  
-    INLINE generateInputCoverage(const uint64_t *const coverageMask, uint32_t (&inputMask)[KNOB_SIMD_WIDTH], const uint32_t sampleMask)
+    INLINE generateInputCoverage(const uint64_t* const coverageMask,
+                                 uint32_t (&inputMask)[KNOB_SIMD_WIDTH],
+                                 const uint32_t sampleMask)
      {
-        uint32_t simdCoverage = (coverageMask[0] & MASK);
+        uint32_t              simdCoverage     = (coverageMask[0] & MASK);
          static const uint32_t FullCoverageMask = (1 << T::MultisampleT::numSamples) - 1;
-        for(int i = 0; i < KNOB_SIMD_WIDTH; i++)
+        for (int i = 0; i < KNOB_SIMD_WIDTH; i++)
          {
              // set all samples to covered if conservative coverage mask is set for that pixel
              inputMask[i] = (((1 << i) & simdCoverage) > 0) ? FullCoverageMask : 0;
@@ -327,18 +422,25 @@ struct generateInputCoverage<T, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
  
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
  // Centroid behaves exactly as follows :
-// (1) If all samples in the primitive are covered, the attribute is evaluated at the pixel center (even if the sample pattern does not happen to 
+// (1) If all samples in the primitive are covered, the attribute is evaluated at the pixel center
+// (even if the sample pattern does not happen to
  //     have a sample location there).
-// (2) Else the attribute is evaluated at the first covered sample, in increasing order of sample index, where sample coverage is after ANDing the 
+// (2) Else the attribute is evaluated at the first covered sample, in increasing order of sample
+// index, where sample coverage is after ANDing the
  //     coverage with the SampleMask Rasterizer State.
-// (3) If no samples are covered, such as on helper pixels executed off the bounds of a primitive to fill out 2x2 pixel stamps, the attribute is 
-//     evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the pixel, then the first sample covered by the 
-//     SampleMask Rasterizer State is the evaluation point.Otherwise (full SampleMask), the pixel center is the evaluation point.
+// (3) If no samples are covered, such as on helper pixels executed off the bounds of a primitive to
+// fill out 2x2 pixel stamps, the attribute is
+//     evaluated as follows : If the SampleMask Rasterizer state is a subset of the samples in the
+//     pixel, then the first sample covered by the SampleMask Rasterizer State is the evaluation
+//     point.Otherwise (full SampleMask), the pixel center is the evaluation point.
  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-template<typename T>
-INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS& samplePos,
-                            const uint64_t *const coverageMask, const uint32_t sampleMask,
-                            simdscalar const &vXSamplePosUL, simdscalar const &vYSamplePosUL)
+template <typename T>
+INLINE void CalcCentroidPos(SWR_PS_CONTEXT&            psContext,
+                            const SWR_MULTISAMPLE_POS& samplePos,
+                            const uint64_t* const      coverageMask,
+                            const uint32_t             sampleMask,
+                            simdscalar const&          vXSamplePosUL,
+                            simdscalar const&          vYSamplePosUL)
  {
      uint32_t inputMask[KNOB_SIMD_WIDTH];
      generateInputCoverage<T, T::InputCoverage>(coverageMask, inputMask, sampleMask);
@@ -356,50 +458,60 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS
      (inputMask[6] > 0) ? (_BitScanForward(&sampleNum[6], inputMask[6])) : (sampleNum[6] = 0);
      (inputMask[7] > 0) ? (_BitScanForward(&sampleNum[7], inputMask[7])) : (sampleNum[7] = 0);
  
-    // look up and set the sample offsets from UL pixel corner for first covered sample 
+    // look up and set the sample offsets from UL pixel corner for first covered sample
      simdscalar vXSample = _simd_set_ps(samplePos.X(sampleNum[7]),
-                                    samplePos.X(sampleNum[6]),
-                                    samplePos.X(sampleNum[5]),
-                                    samplePos.X(sampleNum[4]),
-                                    samplePos.X(sampleNum[3]),
-                                    samplePos.X(sampleNum[2]),
-                                    samplePos.X(sampleNum[1]),
-                                    samplePos.X(sampleNum[0]));
+                                       samplePos.X(sampleNum[6]),
+                                       samplePos.X(sampleNum[5]),
+                                       samplePos.X(sampleNum[4]),
+                                       samplePos.X(sampleNum[3]),
+                                       samplePos.X(sampleNum[2]),
+                                       samplePos.X(sampleNum[1]),
+                                       samplePos.X(sampleNum[0]));
  
      simdscalar vYSample = _simd_set_ps(samplePos.Y(sampleNum[7]),
-                                    samplePos.Y(sampleNum[6]),
-                                    samplePos.Y(sampleNum[5]),
-                                    samplePos.Y(sampleNum[4]),
-                                    samplePos.Y(sampleNum[3]),
-                                    samplePos.Y(sampleNum[2]),
-                                    samplePos.Y(sampleNum[1]),
-                                    samplePos.Y(sampleNum[0]));
+                                       samplePos.Y(sampleNum[6]),
+                                       samplePos.Y(sampleNum[5]),
+                                       samplePos.Y(sampleNum[4]),
+                                       samplePos.Y(sampleNum[3]),
+                                       samplePos.Y(sampleNum[2]),
+                                       samplePos.Y(sampleNum[1]),
+                                       samplePos.Y(sampleNum[0]));
      // add sample offset to UL pixel corner
      vXSample = _simd_add_ps(vXSamplePosUL, vXSample);
      vYSample = _simd_add_ps(vYSamplePosUL, vYSample);
  
      // Case (1) and case (3b) - All samples covered or not covered with full SampleMask
      static const simdscalari vFullyCoveredMask = T::MultisampleT::FullSampleMask();
-    simdscalari vInputCoveragei =  _simd_set_epi32(inputMask[7], inputMask[6], inputMask[5], inputMask[4], inputMask[3], inputMask[2], inputMask[1], inputMask[0]);
+    simdscalari              vInputCoveragei   = _simd_set_epi32(inputMask[7],
+                                                  inputMask[6],
+                                                  inputMask[5],
+                                                  inputMask[4],
+                                                  inputMask[3],
+                                                  inputMask[2],
+                                                  inputMask[1],
+                                                  inputMask[0]);
      simdscalari vAllSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vFullyCoveredMask);
  
      static const simdscalari vZero = _simd_setzero_si();
-    const simdscalari vSampleMask = _simd_and_si(_simd_set1_epi32(sampleMask), vFullyCoveredMask);
-    simdscalari vNoSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vZero);
-    simdscalari vIsFullSampleMask = _simd_cmpeq_epi32(vSampleMask, vFullyCoveredMask);
-    simdscalari vCase3b = _simd_and_si(vNoSamplesCovered, vIsFullSampleMask);
+    const simdscalari vSampleMask  = _simd_and_si(_simd_set1_epi32(sampleMask), vFullyCoveredMask);
+    simdscalari       vNoSamplesCovered = _simd_cmpeq_epi32(vInputCoveragei, vZero);
+    simdscalari       vIsFullSampleMask = _simd_cmpeq_epi32(vSampleMask, vFullyCoveredMask);
+    simdscalari       vCase3b           = _simd_and_si(vNoSamplesCovered, vIsFullSampleMask);
  
      simdscalari vEvalAtCenter = _simd_or_si(vAllSamplesCovered, vCase3b);
  
      // set the centroid position based on results from above
-    psContext.vX.centroid = _simd_blendv_ps(vXSample, psContext.vX.center, _simd_castsi_ps(vEvalAtCenter));
-    psContext.vY.centroid = _simd_blendv_ps(vYSample, psContext.vY.center, _simd_castsi_ps(vEvalAtCenter));
+    psContext.vX.centroid =
+        _simd_blendv_ps(vXSample, psContext.vX.center, _simd_castsi_ps(vEvalAtCenter));
+    psContext.vY.centroid =
+        _simd_blendv_ps(vYSample, psContext.vY.center, _simd_castsi_ps(vEvalAtCenter));
  
      // Case (3a) No samples covered and partial sample mask
      simdscalari vSomeSampleMaskSamples = _simd_cmplt_epi32(vSampleMask, vFullyCoveredMask);
      // sample mask should never be all 0's for this case, but handle it anyways
      unsigned long firstCoveredSampleMaskSample = 0;
-    (sampleMask > 0) ? (_BitScanForward(&firstCoveredSampleMaskSample, sampleMask)) : (firstCoveredSampleMaskSample = 0);
+    (sampleMask > 0) ? (_BitScanForward(&firstCoveredSampleMaskSample, sampleMask))
+                     : (firstCoveredSampleMaskSample = 0);
  
      simdscalari vCase3a = _simd_and_si(vNoSamplesCovered, vSomeSampleMaskSamples);
  
@@ -407,24 +519,34 @@ INLINE void CalcCentroidPos(SWR_PS_CONTEXT &psContext, const SWR_MULTISAMPLE_POS
      vYSample = _simd_set1_ps(samplePos.Y(firstCoveredSampleMaskSample));
  
      // blend in case 3a pixel locations
-    psContext.vX.centroid = _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
-    psContext.vY.centroid = _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
+    psContext.vX.centroid =
+        _simd_blendv_ps(psContext.vX.centroid, vXSample, _simd_castsi_ps(vCase3a));
+    psContext.vY.centroid =
+        _simd_blendv_ps(psContext.vY.centroid, vYSample, _simd_castsi_ps(vCase3a));
  }
  
-INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext,
-                                     const simdscalar &vXSamplePosUL, const simdscalar &vYSamplePosUL)
+INLINE void CalcCentroidBarycentrics(const BarycentricCoeffs& coeffs,
+                                     SWR_PS_CONTEXT&          psContext,
+                                     const simdscalar&        vXSamplePosUL,
+                                     const simdscalar&        vYSamplePosUL)
  {
      // evaluate I,J
-    psContext.vI.centroid = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
-    psContext.vJ.centroid = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
+    psContext.vI.centroid =
+        vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.centroid, psContext.vY.centroid);
+    psContext.vJ.centroid =
+        vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.centroid, psContext.vY.centroid);
      psContext.vI.centroid = _simd_mul_ps(psContext.vI.centroid, coeffs.vRecipDet);
      psContext.vJ.centroid = _simd_mul_ps(psContext.vJ.centroid, coeffs.vRecipDet);
  
      // interpolate 1/w
-    psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.centroid, psContext.vJ.centroid);
+    psContext.vOneOverW.centroid = vplaneps(coeffs.vAOneOverW,
+                                            coeffs.vBOneOverW,
+                                            coeffs.vCOneOverW,
+                                            psContext.vI.centroid,
+                                            psContext.vJ.centroid);
  }
  
-INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar const &z, float minz, float maxz)
+INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar const& z, float minz, float maxz)
  {
      const simdscalar minzMask = _simd_cmpge_ps(z, _simd_set1_ps(minz));
      const simdscalar maxzMask = _simd_cmple_ps(z, _simd_set1_ps(maxz));
@@ -432,16 +554,17 @@ INLINE simdmask CalcDepthBoundsAcceptMask(simdscalar const &z, float minz, float
      return _simd_movemask_ps(_simd_and_ps(minzMask, maxzMask));
  }
  
-template<typename T>
+template <typename T>
  INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
  {
      // RT has to be single sample if we're in forcedMSAA mode
-    if(T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X))
+    if (T::bForcedSampleCount && (T::MultisampleT::sampleCount > SWR_MULTISAMPLE_1X))
      {
          return 1;
      }
-    // unless we're forced to single sample, in which case we run the OM at the sample count of the RT
-    else if(T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X))
+    // unless we're forced to single sample, in which case we run the OM at the sample count of the
+    // RT
+    else if (T::bForcedSampleCount && (T::MultisampleT::sampleCount == SWR_MULTISAMPLE_1X))
      {
          return GetNumSamples(blendSampleCount);
      }
@@ -452,7 +575,7 @@ INLINE uint32_t GetNumOMSamples(SWR_MULTISAMPLE_COUNT blendSampleCount)
      }
  }
  
-inline void SetupBarycentricCoeffs(BarycentricCoeffs *coeffs, const SWR_TRIANGLE_DESC &work)
+inline void SetupBarycentricCoeffs(BarycentricCoeffs* coeffs, const SWR_TRIANGLE_DESC& work)
  {
      // broadcast scalars
  
@@ -475,9 +598,12 @@ inline void SetupBarycentricCoeffs(BarycentricCoeffs *coeffs, const SWR_TRIANGLE
      coeffs->vCOneOverW = _simd_broadcast_ss(&work.OneOverW[2]);
  }
  
-inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uint8_t **pDepthBuffer, uint8_t **pStencilBuffer, uint32_t colorHotTileMask, RenderOutputBuffers &renderBuffers)
+inline void SetupRenderBuffers(uint8_t*             pColorBuffer[SWR_NUM_RENDERTARGETS],
+                               uint8_t**            pDepthBuffer,
+                               uint8_t**            pStencilBuffer,
+                               uint32_t             colorHotTileMask,
+                               RenderOutputBuffers& renderBuffers)
  {
-    
      DWORD index;
      while (_BitScanForward(&index, colorHotTileMask))
      {
@@ -493,41 +619,51 @@ inline void SetupRenderBuffers(uint8_t *pColorBuffer[SWR_NUM_RENDERTARGETS], uin
  
      if (pStencilBuffer)
      {
-        *pStencilBuffer = renderBuffers.pStencil;;
+        *pStencilBuffer = renderBuffers.pStencil;
+        ;
      }
  }
  
-template<typename T>
-void SetupPixelShaderContext(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos, SWR_TRIANGLE_DESC &work)
+template <typename T>
+void SetupPixelShaderContext(SWR_PS_CONTEXT*            psContext,
+                             const SWR_MULTISAMPLE_POS& samplePos,
+                             SWR_TRIANGLE_DESC&         work)
  {
-    psContext->pAttribs = work.pAttribs;
-    psContext->pPerspAttribs = work.pPerspAttribs;
-    psContext->frontFace = work.triFlags.frontFacing;
+    psContext->pAttribs               = work.pAttribs;
+    psContext->pPerspAttribs          = work.pPerspAttribs;
+    psContext->frontFace              = work.triFlags.frontFacing;
      psContext->renderTargetArrayIndex = work.triFlags.renderTargetArrayIndex;
  
-    // save Ia/Ib/Ic and Ja/Jb/Jc if we need to reevaluate i/j/k in the shader because of pull attribs
+    // save Ia/Ib/Ic and Ja/Jb/Jc if we need to reevaluate i/j/k in the shader because of pull
+    // attribs
      psContext->I = work.I;
      psContext->J = work.J;
  
      psContext->recipDet = work.recipDet;
-    psContext->pRecipW = work.pRecipW;
-    psContext->pSamplePosX = samplePos.X();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
-    psContext->pSamplePosY = samplePos.Y();//reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
+    psContext->pRecipW  = work.pRecipW;
+    psContext->pSamplePosX =
+        samplePos.X(); // reinterpret_cast<const float *>(&T::MultisampleT::samplePosX);
+    psContext->pSamplePosY =
+        samplePos.Y(); // reinterpret_cast<const float *>(&T::MultisampleT::samplePosY);
      psContext->rasterizerSampleCount = T::MultisampleT::numSamples;
-    psContext->sampleIndex = 0;
+    psContext->sampleIndex           = 0;
  }
  
-template<typename T, bool IsSingleSample>
-void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePos,
-                  const BarycentricCoeffs &coeffs, const uint64_t * const coverageMask, uint32_t sampleMask)
+template <typename T, bool IsSingleSample>
+void CalcCentroid(SWR_PS_CONTEXT*            psContext,
+                  const SWR_MULTISAMPLE_POS& samplePos,
+                  const BarycentricCoeffs&   coeffs,
+                  const uint64_t* const      coverageMask,
+                  uint32_t                   sampleMask)
  {
-    if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid positions are still different
+    if (IsSingleSample) // if (T::MultisampleT::numSamples == 1) // doesn't cut it, the centroid
+                        // positions are still different
      {
          // for 1x case, centroid is pixel center
-        psContext->vX.centroid = psContext->vX.center;
-        psContext->vY.centroid = psContext->vY.center;
-        psContext->vI.centroid = psContext->vI.center;
-        psContext->vJ.centroid = psContext->vJ.center;
+        psContext->vX.centroid        = psContext->vX.center;
+        psContext->vY.centroid        = psContext->vY.center;
+        psContext->vI.centroid        = psContext->vI.center;
+        psContext->vJ.centroid        = psContext->vJ.center;
          psContext->vOneOverW.centroid = psContext->vOneOverW.center;
      }
      else
@@ -542,8 +678,14 @@ void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePo
              }
              else
              {
-                // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate coverage 2X'..
-                CalcCentroidPos<T>(*psContext, samplePos, coverageMask, sampleMask, psContext->vX.UL, psContext->vY.UL);
+                // add param: const uint32_t inputMask[KNOB_SIMD_WIDTH] to eliminate 'generate
+                // coverage 2X'..
+                CalcCentroidPos<T>(*psContext,
+                                   samplePos,
+                                   coverageMask,
+                                   sampleMask,
+                                   psContext->vX.UL,
+                                   psContext->vY.UL);
              }
  
              CalcCentroidBarycentrics(coeffs, *psContext, psContext->vX.UL, psContext->vY.UL);
@@ -556,47 +698,61 @@ void CalcCentroid(SWR_PS_CONTEXT *psContext, const SWR_MULTISAMPLE_POS& samplePo
      }
  }
  
-template<typename T>
+template <typename T>
  struct PixelRateZTestLoop
  {
-    PixelRateZTestLoop(DRAW_CONTEXT *DC, uint32_t _workerId, const SWR_TRIANGLE_DESC &Work, const BarycentricCoeffs& Coeffs, const API_STATE& apiState,
-                       uint8_t*& depthBuffer, uint8_t*& stencilBuffer, const uint8_t ClipDistanceMask) :
-                       pDC(DC), workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
-                       samplePos(state.rastState.samplePositions),
-                       clipDistanceMask(ClipDistanceMask), pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
+    PixelRateZTestLoop(DRAW_CONTEXT*            DC,
+                       uint32_t                 _workerId,
+                       const SWR_TRIANGLE_DESC& Work,
+                       const BarycentricCoeffs& Coeffs,
+                       const API_STATE&         apiState,
+                       uint8_t*&                depthBuffer,
+                       uint8_t*&                stencilBuffer,
+                       const uint8_t            ClipDistanceMask) :
+        pDC(DC),
+        workerId(_workerId), work(Work), coeffs(Coeffs), state(apiState), psState(apiState.psState),
+        samplePos(state.rastState.samplePositions), clipDistanceMask(ClipDistanceMask),
+        pDepthBuffer(depthBuffer), pStencilBuffer(stencilBuffer){};
  
      INLINE
-    uint32_t operator()(simdscalar& activeLanes, SWR_PS_CONTEXT& psContext, 
-                        const CORE_BUCKETS BEDepthBucket, uint32_t currentSimdIn8x8 = 0)
+    uint32_t operator()(simdscalar&        activeLanes,
+                        SWR_PS_CONTEXT&    psContext,
+                        const CORE_BUCKETS BEDepthBucket,
+                        uint32_t           currentSimdIn8x8 = 0)
      {
  
-        uint32_t statCount = 0;
+        uint32_t   statCount            = 0;
          simdscalar anyDepthSamplePassed = _simd_setzero_ps();
-        for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
+        for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
          {
-            const uint8_t *pCoverageMask = (uint8_t*)&work.coverageMask[sample];
-            vCoverageMask[sample] = _simd_and_ps(activeLanes, _simd_vmask_ps(pCoverageMask[currentSimdIn8x8] & MASK));
+            const uint8_t* pCoverageMask = (uint8_t*)&work.coverageMask[sample];
+            vCoverageMask[sample] =
+                _simd_and_ps(activeLanes, _simd_vmask_ps(pCoverageMask[currentSimdIn8x8] & MASK));
  
-            if(!_simd_movemask_ps(vCoverageMask[sample]))
+            if (!_simd_movemask_ps(vCoverageMask[sample]))
              {
-                vCoverageMask[sample] = depthPassMask[sample] = stencilPassMask[sample] = _simd_setzero_ps();
+                vCoverageMask[sample] = depthPassMask[sample] = stencilPassMask[sample] =
+                    _simd_setzero_ps();
                  continue;
              }
  
              // offset depth/stencil buffers current sample
-            uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
-            uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+            uint8_t* pDepthSample   = pDepthBuffer + RasterTileDepthOffset(sample);
+            uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
  
              if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
              {
-                static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+                static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+                              "Unsupported depth hot tile format");
  
-                const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+                const simdscalar z = _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
  
                  const float minz = state.depthBoundsState.depthBoundsTestMinValue;
                  const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
  
-                vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
+                vCoverageMask[sample] =
+                    _simd_and_ps(vCoverageMask[sample],
+                                 _simd_vmask_ps(CalcDepthBoundsAcceptMask(z, minz, maxz)));
              }
  
              RDTSC_BEGIN(BEBarycentric, pDC->drawId);
@@ -608,7 +764,7 @@ struct PixelRateZTestLoop
              // calc I & J per sample
              CalcSampleBarycentrics(coeffs, psContext);
  
-            if(psState.writesODepth)
+            if (psState.writesODepth)
              {
                  {
                      // broadcast and test oDepth(psContext.vZ) written from the PS for each sample
@@ -617,7 +773,8 @@ struct PixelRateZTestLoop
              }
              else
              {
-                vZ[sample] = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+                vZ[sample] = vplaneps(
+                    coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
                  vZ[sample] = state.pfnQuantizeDepth(vZ[sample]);
              }
  
@@ -625,36 +782,52 @@ struct PixelRateZTestLoop
  
              ///@todo: perspective correct vs non-perspective correct clipping?
              // if clip distances are enabled, we need to interpolate for each sample
-            if(clipDistanceMask)
+            if (clipDistanceMask)
              {
-                uint8_t clipMask = ComputeUserClipMask(clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
+                uint8_t clipMask = ComputeUserClipMask(clipDistanceMask,
+                                                       work.pUserClipBuffer,
+                                                       psContext.vI.sample,
+                                                       psContext.vJ.sample);
  
-                vCoverageMask[sample] = _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(~clipMask));
+                vCoverageMask[sample] =
+                    _simd_and_ps(vCoverageMask[sample], _simd_vmask_ps(~clipMask));
              }
  
              // ZTest for this sample
              ///@todo Need to uncomment out this bucket.
-            //RDTSC_BEGIN(BEDepthBucket, pDC->drawId);
-            depthPassMask[sample] = vCoverageMask[sample];
+            // RDTSC_BEGIN(BEDepthBucket, pDC->drawId);
+            depthPassMask[sample]   = vCoverageMask[sample];
              stencilPassMask[sample] = vCoverageMask[sample];
-            depthPassMask[sample] = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
-                                                     vZ[sample], pDepthSample, vCoverageMask[sample], 
-                                                     pStencilSample, &stencilPassMask[sample]);
-            //RDTSC_END(BEDepthBucket, 0);
+            depthPassMask[sample]   = DepthStencilTest(&state,
+                                                     work.triFlags.frontFacing,
+                                                     work.triFlags.viewportIndex,
+                                                     vZ[sample],
+                                                     pDepthSample,
+                                                     vCoverageMask[sample],
+                                                     pStencilSample,
+                                                     &stencilPassMask[sample]);
+            // RDTSC_END(BEDepthBucket, 0);
  
              // early-exit if no pixels passed depth or earlyZ is forced on
-            if(psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
+            if (psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask[sample]))
              {
-                DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, vZ[sample],
-                                  pDepthSample, depthPassMask[sample], vCoverageMask[sample], pStencilSample, stencilPassMask[sample]);
-
-                if(!_simd_movemask_ps(depthPassMask[sample]))
+                DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                  &state.depthStencilState,
+                                  work.triFlags.frontFacing,
+                                  vZ[sample],
+                                  pDepthSample,
+                                  depthPassMask[sample],
+                                  vCoverageMask[sample],
+                                  pStencilSample,
+                                  stencilPassMask[sample]);
+
+                if (!_simd_movemask_ps(depthPassMask[sample]))
                  {
                      continue;
                  }
              }
              anyDepthSamplePassed = _simd_or_ps(anyDepthSamplePassed, depthPassMask[sample]);
-            uint32_t statMask = _simd_movemask_ps(depthPassMask[sample]);
+            uint32_t statMask    = _simd_movemask_ps(depthPassMask[sample]);
              statCount += _mm_popcnt_u32(statMask);
          }
  
@@ -672,106 +845,129 @@ struct PixelRateZTestLoop
  private:
      // functor inputs
      DRAW_CONTEXT* pDC;
-    uint32_t workerId;
+    uint32_t      workerId;
  
-    const SWR_TRIANGLE_DESC& work;
-    const BarycentricCoeffs& coeffs;
-    const API_STATE& state;
-    const SWR_PS_STATE& psState;
+    const SWR_TRIANGLE_DESC&   work;
+    const BarycentricCoeffs&   coeffs;
+    const API_STATE&           state;
+    const SWR_PS_STATE&        psState;
      const SWR_MULTISAMPLE_POS& samplePos;
-    const uint8_t clipDistanceMask;
-    uint8_t*& pDepthBuffer;
-    uint8_t*& pStencilBuffer;
+    const uint8_t              clipDistanceMask;
+    uint8_t*&                  pDepthBuffer;
+    uint8_t*&                  pStencilBuffer;
  };
  
-INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
+INLINE void CalcPixelBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT& psContext)
  {
      // evaluate I,J
-    psContext.vI.center = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
-    psContext.vJ.center = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.center, psContext.vY.center);
+    psContext.vI.center =
+        vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.center, psContext.vY.center);
+    psContext.vJ.center =
+        vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.center, psContext.vY.center);
      psContext.vI.center = _simd_mul_ps(psContext.vI.center, coeffs.vRecipDet);
      psContext.vJ.center = _simd_mul_ps(psContext.vJ.center, coeffs.vRecipDet);
  
      // interpolate 1/w
-    psContext.vOneOverW.center = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.center, psContext.vJ.center);
+    psContext.vOneOverW.center = vplaneps(coeffs.vAOneOverW,
+                                          coeffs.vBOneOverW,
+                                          coeffs.vCOneOverW,
+                                          psContext.vI.center,
+                                          psContext.vJ.center);
  }
  
-static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs, SWR_PS_CONTEXT &psContext)
+static INLINE void CalcSampleBarycentrics(const BarycentricCoeffs& coeffs,
+                                          SWR_PS_CONTEXT&          psContext)
  {
      // evaluate I,J
-    psContext.vI.sample = vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
-    psContext.vJ.sample = vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.sample, psContext.vY.sample);
+    psContext.vI.sample =
+        vplaneps(coeffs.vIa, coeffs.vIb, coeffs.vIc, psContext.vX.sample, psContext.vY.sample);
+    psContext.vJ.sample =
+        vplaneps(coeffs.vJa, coeffs.vJb, coeffs.vJc, psContext.vX.sample, psContext.vY.sample);
      psContext.vI.sample = _simd_mul_ps(psContext.vI.sample, coeffs.vRecipDet);
      psContext.vJ.sample = _simd_mul_ps(psContext.vJ.sample, coeffs.vRecipDet);
  
      // interpolate 1/w
-    psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW, coeffs.vBOneOverW, coeffs.vCOneOverW, psContext.vI.sample, psContext.vJ.sample);
+    psContext.vOneOverW.sample = vplaneps(coeffs.vAOneOverW,
+                                          coeffs.vBOneOverW,
+                                          coeffs.vCOneOverW,
+                                          psContext.vI.sample,
+                                          psContext.vJ.sample);
  }
  
  // Merge Output to 4x2 SIMD Tile Format
-INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
-    const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, uint32_t workerId)
+INLINE void OutputMerger4x2(DRAW_CONTEXT*   pDC,
+                            SWR_PS_CONTEXT& psContext,
+                            uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS],
+                            uint32_t               sample,
+                            const SWR_BLEND_STATE* pBlendState,
+                            const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS],
+                            simdscalar&       coverageMask,
+                            simdscalar const& depthPassMask,
+                            uint32_t          renderTargetMask,
+                            uint32_t          workerId)
  {
      // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
      const uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
-    simdvector blendOut;
+    simdvector     blendOut;
  
      DWORD rt = 0;
      while (_BitScanForward(&rt, renderTargetMask))
      {
          renderTargetMask &= ~(1 << rt);
-        uint8_t *pColorSample = pColorBase[rt] + rasterTileColorOffset;
+        uint8_t* pColorSample = pColorBase[rt] + rasterTileColorOffset;
  
-        const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
+        const SWR_RENDER_TARGET_BLEND_STATE* pRTBlend = &pBlendState->renderTarget[rt];
  
-        SWR_BLEND_CONTEXT blendContext = { 0 };
+        SWR_BLEND_CONTEXT blendContext = {0};
          {
              // pfnBlendFunc may not update all channels.  Initialize with PS output.
              /// TODO: move this into the blend JIT.
              blendOut = psContext.shaded[rt];
  
              blendContext.pBlendState = pBlendState;
-            blendContext.src = &psContext.shaded[rt];
-            blendContext.src1 = &psContext.shaded[1];
-            blendContext.src0alpha = reinterpret_cast<simdvector *>(&psContext.shaded[0].w);
-            blendContext.sampleNum = sample;
-            blendContext.pDst = (simdvector *) &pColorSample;
-            blendContext.result = &blendOut;
-            blendContext.oMask = &psContext.oMask;
-            blendContext.pMask = reinterpret_cast<simdscalari *>(&coverageMask);
+            blendContext.src         = &psContext.shaded[rt];
+            blendContext.src1        = &psContext.shaded[1];
+            blendContext.src0alpha   = reinterpret_cast<simdvector*>(&psContext.shaded[0].w);
+            blendContext.sampleNum   = sample;
+            blendContext.pDst        = (simdvector*)&pColorSample;
+            blendContext.result      = &blendOut;
+            blendContext.oMask       = &psContext.oMask;
+            blendContext.pMask       = reinterpret_cast<simdscalari*>(&coverageMask);
  
              // Blend outputs and update coverage mask for alpha test
-            if(pfnBlendFunc[rt] != nullptr)
+            if (pfnBlendFunc[rt] != nullptr)
              {
                  pfnBlendFunc[rt](&blendContext);
              }
          }
  
          // Track alpha events
-        AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+        AR_EVENT(
+            AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
  
-        // final write mask 
+        // final write mask
          simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
  
          ///@todo can only use maskstore fast path if bpc is 32. Assuming hot tile is RGBA32_FLOAT.
-        static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
+        static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
+                      "Unsupported hot tile format");
  
          const uint32_t simd = KNOB_SIMD_WIDTH * sizeof(float);
  
          // store with color mask
-        if(!pRTBlend->writeDisableRed)
+        if (!pRTBlend->writeDisableRed)
          {
              _simd_maskstore_ps((float*)pColorSample, outputMask, blendOut.x);
          }
-        if(!pRTBlend->writeDisableGreen)
+        if (!pRTBlend->writeDisableGreen)
          {
              _simd_maskstore_ps((float*)(pColorSample + simd), outputMask, blendOut.y);
          }
-        if(!pRTBlend->writeDisableBlue)
+        if (!pRTBlend->writeDisableBlue)
          {
              _simd_maskstore_ps((float*)(pColorSample + simd * 2), outputMask, blendOut.z);
          }
-        if(!pRTBlend->writeDisableAlpha)
+        if (!pRTBlend->writeDisableAlpha)
          {
              _simd_maskstore_ps((float*)(pColorSample + simd * 3), outputMask, blendOut.w);
          }
@@ -780,8 +976,17 @@ INLINE void OutputMerger4x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_
  
  #if USE_8x2_TILE_BACKEND
  // Merge Output to 8x2 SIMD16 Tile Format
-INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS], uint32_t sample, const SWR_BLEND_STATE *pBlendState,
-    const PFN_BLEND_JIT_FUNC(&pfnBlendFunc)[SWR_NUM_RENDERTARGETS], simdscalar &coverageMask, simdscalar const &depthPassMask, uint32_t renderTargetMask, bool useAlternateOffset, uint32_t workerId)
+INLINE void OutputMerger8x2(DRAW_CONTEXT*   pDC,
+                            SWR_PS_CONTEXT& psContext,
+                            uint8_t* (&pColorBase)[SWR_NUM_RENDERTARGETS],
+                            uint32_t               sample,
+                            const SWR_BLEND_STATE* pBlendState,
+                            const PFN_BLEND_JIT_FUNC (&pfnBlendFunc)[SWR_NUM_RENDERTARGETS],
+                            simdscalar&       coverageMask,
+                            simdscalar const& depthPassMask,
+                            uint32_t          renderTargetMask,
+                            bool              useAlternateOffset,
+                            uint32_t          workerId)
  {
      // type safety guaranteed from template instantiation in BEChooser<>::GetFunc
      uint32_t rasterTileColorOffset = RasterTileColorOffset(sample);
@@ -799,150 +1004,180 @@ INLINE void OutputMerger8x2(DRAW_CONTEXT *pDC, SWR_PS_CONTEXT &psContext, uint8_
      {
          renderTargetMask &= ~(1 << rt);
  
-        const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = &pBlendState->renderTarget[rt];
+        const SWR_RENDER_TARGET_BLEND_STATE* pRTBlend = &pBlendState->renderTarget[rt];
  
          simdscalar* pColorSample;
-        bool hotTileEnable = !pRTBlend->writeDisableAlpha || !pRTBlend->writeDisableRed || !pRTBlend->writeDisableGreen || !pRTBlend->writeDisableBlue;
+        bool        hotTileEnable = !pRTBlend->writeDisableAlpha || !pRTBlend->writeDisableRed ||
+                             !pRTBlend->writeDisableGreen || !pRTBlend->writeDisableBlue;
          if (hotTileEnable)
          {
-            pColorSample = reinterpret_cast<simdscalar *>(pColorBase[rt] + rasterTileColorOffset);
-            blendSrc[0] = pColorSample[0];
-            blendSrc[1] = pColorSample[2];
-            blendSrc[2] = pColorSample[4];
-            blendSrc[3] = pColorSample[6];
+            pColorSample = reinterpret_cast<simdscalar*>(pColorBase[rt] + rasterTileColorOffset);
+            blendSrc[0]  = pColorSample[0];
+            blendSrc[1]  = pColorSample[2];
+            blendSrc[2]  = pColorSample[4];
+            blendSrc[3]  = pColorSample[6];
          }
          else
          {
              pColorSample = nullptr;
          }
  
-        SWR_BLEND_CONTEXT blendContext = { 0 };
+        SWR_BLEND_CONTEXT blendContext = {0};
          {
              // pfnBlendFunc may not update all channels.  Initialize with PS output.
              /// TODO: move this into the blend JIT.
              blendOut = psContext.shaded[rt];
  
-            blendContext.pBlendState    = pBlendState;
-            blendContext.src            = &psContext.shaded[rt];
-            blendContext.src1           = &psContext.shaded[1];
-            blendContext.src0alpha      = reinterpret_cast<simdvector *>(&psContext.shaded[0].w);
-            blendContext.sampleNum      = sample;
-            blendContext.pDst           = &blendSrc;
-            blendContext.result         = &blendOut;
-            blendContext.oMask          = &psContext.oMask;
-            blendContext.pMask          = reinterpret_cast<simdscalari *>(&coverageMask);
+            blendContext.pBlendState = pBlendState;
+            blendContext.src         = &psContext.shaded[rt];
+            blendContext.src1        = &psContext.shaded[1];
+            blendContext.src0alpha   = reinterpret_cast<simdvector*>(&psContext.shaded[0].w);
+            blendContext.sampleNum   = sample;
+            blendContext.pDst        = &blendSrc;
+            blendContext.result      = &blendOut;
+            blendContext.oMask       = &psContext.oMask;
+            blendContext.pMask       = reinterpret_cast<simdscalari*>(&coverageMask);
  
              // Blend outputs and update coverage mask for alpha test
-            if(pfnBlendFunc[rt] != nullptr)
+            if (pfnBlendFunc[rt] != nullptr)
              {
                  pfnBlendFunc[rt](&blendContext);
              }
          }
  
          // Track alpha events
-        AR_EVENT(AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
+        AR_EVENT(
+            AlphaInfoEvent(pDC->drawId, blendContext.isAlphaTested, blendContext.isAlphaBlended));
  
-        // final write mask 
+        // final write mask
          simdscalari outputMask = _simd_castps_si(_simd_and_ps(coverageMask, depthPassMask));
  
          ///@todo can only use maskstore fast path if bpc is 32. Assuming hot tile is RGBA32_FLOAT.
-        static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
+        static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
+                      "Unsupported hot tile format");
  
          // store with color mask
          if (!pRTBlend->writeDisableRed)
          {
-            _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[0]), outputMask, blendOut.x);
+            _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[0]), outputMask, blendOut.x);
          }
          if (!pRTBlend->writeDisableGreen)
          {
-            _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[2]), outputMask, blendOut.y);
+            _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[2]), outputMask, blendOut.y);
          }
          if (!pRTBlend->writeDisableBlue)
          {
-            _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[4]), outputMask, blendOut.z);
+            _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[4]), outputMask, blendOut.z);
          }
          if (!pRTBlend->writeDisableAlpha)
          {
-            _simd_maskstore_ps(reinterpret_cast<float *>(&pColorSample[6]), outputMask, blendOut.w);
+            _simd_maskstore_ps(reinterpret_cast<float*>(&pColorSample[6]), outputMask, blendOut.w);
          }
      }
  }
  
  #endif
  
-template<typename T>
-void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <typename T>
+void BackendPixelRate(DRAW_CONTEXT*        pDC,
+                      uint32_t             workerId,
+                      uint32_t             x,
+                      uint32_t             y,
+                      SWR_TRIANGLE_DESC&   work,
+                      RenderOutputBuffers& renderBuffers)
  {
-    ///@todo: Need to move locals off stack to prevent __chkstk's from being generated for the backend
+    ///@todo: Need to move locals off stack to prevent __chkstk's from being generated for the
+    /// backend
  
  
      RDTSC_BEGIN(BEPixelRateBackend, pDC->drawId);
      RDTSC_BEGIN(BESetup, pDC->drawId);
  
-    const API_STATE &state = GetApiState(pDC);
+    const API_STATE& state = GetApiState(pDC);
  
      BarycentricCoeffs coeffs;
      SetupBarycentricCoeffs(&coeffs, work);
  
-    SWR_CONTEXT *pContext = pDC->pContext;
-    void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+    SWR_CONTEXT* pContext    = pDC->pContext;
+    void*        pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
-    SWR_PS_CONTEXT psContext;
+    SWR_PS_CONTEXT             psContext;
      const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
      SetupPixelShaderContext<T>(&psContext, samplePos, work);
  
      uint8_t *pDepthBuffer, *pStencilBuffer;
-    SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
+    SetupRenderBuffers(psContext.pColorBuffer,
+                       &pDepthBuffer,
+                       &pStencilBuffer,
+                       state.colorHottileEnable,
+                       renderBuffers);
  
      RDTSC_END(BESetup, 0);
  
-    PixelRateZTestLoop<T> PixelRateZTest(pDC, workerId, work, coeffs, state, pDepthBuffer, pStencilBuffer, state.backendState.clipDistanceMask);
+    PixelRateZTestLoop<T> PixelRateZTest(pDC,
+                                         workerId,
+                                         work,
+                                         coeffs,
+                                         state,
+                                         pDepthBuffer,
+                                         pStencilBuffer,
+                                         state.backendState.clipDistanceMask);
  
-    psContext.vY.UL     = _simd_add_ps(vULOffsetsY,     _simd_set1_ps(static_cast<float>(y)));
+    psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
      psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
  
      const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
  
-    for(uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
+    for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
      {
-        psContext.vX.UL     = _simd_add_ps(vULOffsetsX,     _simd_set1_ps(static_cast<float>(x)));
+        psContext.vX.UL     = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
          psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
  
          const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
  
-        for(uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
+        for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
          {
  #if USE_8x2_TILE_BACKEND
              const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
  #endif
              simdscalar activeLanes;
-            if(!(work.anyCoveredSamples & MASK)) {goto Endtile;};
+            if (!(work.anyCoveredSamples & MASK))
+            {
+                goto Endtile;
+            };
              activeLanes = _simd_vmask_ps(work.anyCoveredSamples & MASK);
  
              if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
              {
-                const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
+                const uint64_t* pCoverageMask =
+                    (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+                        ? &work.innerCoverageMask
+                        : &work.coverageMask[0];
  
-                generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
+                generateInputCoverage<T, T::InputCoverage>(
+                    pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
              }
  
              RDTSC_BEGIN(BEBarycentric, pDC->drawId);
  
              CalcPixelBarycentrics(coeffs, psContext);
  
-            CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
+            CalcCentroid<T, false>(
+                &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
  
              RDTSC_END(BEBarycentric, 0);
  
-            if(T::bForcedSampleCount)
+            if (T::bForcedSampleCount)
              {
-                // candidate pixels (that passed coverage) will cause shader invocation if any bits in the samplemask are set
-                const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(_simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
-                activeLanes = _simd_and_ps(activeLanes, vSampleMask);
+                // candidate pixels (that passed coverage) will cause shader invocation if any bits
+                // in the samplemask are set
+                const simdscalar vSampleMask = _simd_castsi_ps(_simd_cmpgt_epi32(
+                    _simd_set1_epi32(state.blendState.sampleMask), _simd_setzero_si()));
+                activeLanes                  = _simd_and_ps(activeLanes, vSampleMask);
              }
  
              // Early-Z?
-            if(T::bCanEarlyZ && !T::bForcedSampleCount)
+            if (T::bCanEarlyZ && !T::bForcedSampleCount)
              {
                  uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BEEarlyDepthTest);
                  UPDATE_STAT_BE(DepthPassCount, depthPassCount);
@@ -950,20 +1185,24 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
              }
  
              // if we have no covered samples that passed depth at this point, go to next tile
-            if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
+            if (!_simd_movemask_ps(activeLanes))
+            {
+                goto Endtile;
+            };
  
-            if(state.psState.usesSourceDepth)
+            if (state.psState.usesSourceDepth)
              {
                  RDTSC_BEGIN(BEBarycentric, pDC->drawId);
                  // interpolate and quantize z
-                psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+                psContext.vZ = vplaneps(
+                    coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
                  psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
                  RDTSC_END(BEBarycentric, 0);
              }
  
              // pixels that are currently active
              psContext.activeMask = _simd_castps_si(activeLanes);
-            psContext.oMask = T::MultisampleT::FullSampleMask();
+            psContext.oMask      = T::MultisampleT::FullSampleMask();
  
              // execute pixel shader
              RDTSC_BEGIN(BEPixelShader, pDC->drawId);
@@ -976,29 +1215,39 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
              AR_EVENT(PSStats(psContext.stats.numInstExecuted));
  
              // update active lanes to remove any discarded or oMask'd pixels
-            activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
-            if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
+            activeLanes = _simd_castsi_ps(_simd_and_si(
+                psContext.activeMask, _simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si())));
+            if (!_simd_movemask_ps(activeLanes))
+            {
+                goto Endtile;
+            };
  
              // late-Z
-            if(!T::bCanEarlyZ && !T::bForcedSampleCount)
+            if (!T::bCanEarlyZ && !T::bForcedSampleCount)
              {
                  uint32_t depthPassCount = PixelRateZTest(activeLanes, psContext, BELateDepthTest);
                  UPDATE_STAT_BE(DepthPassCount, depthPassCount);
                  AR_EVENT(LateDepthInfoPixelRate(depthPassCount, _simd_movemask_ps(activeLanes)));
              }
  
-            // if we have no covered samples that passed depth at this point, skip OM and go to next tile
-            if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
+            // if we have no covered samples that passed depth at this point, skip OM and go to next
+            // tile
+            if (!_simd_movemask_ps(activeLanes))
+            {
+                goto Endtile;
+            };
  
              // output merger
              // loop over all samples, broadcasting the results of the PS to all passing pixels
-            for(uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount); sample++)
+            for (uint32_t sample = 0; sample < GetNumOMSamples<T>(state.blendState.sampleCount);
+                 sample++)
              {
                  RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
-                // center pattern does a single coverage/depth/stencil test, standard pattern tests all samples
-                uint32_t coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
+                // center pattern does a single coverage/depth/stencil test, standard pattern tests
+                // all samples
+                uint32_t   coverageSampleNum = (T::bIsCenterPattern) ? 0 : sample;
                  simdscalar coverageMask, depthMask;
-                if(T::bForcedSampleCount)
+                if (T::bForcedSampleCount)
                  {
                      coverageMask = depthMask = activeLanes;
                  }
@@ -1006,40 +1255,66 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t
                  {
                      coverageMask = PixelRateZTest.vCoverageMask[coverageSampleNum];
                      depthMask = PixelRateZTest.depthPassMask[coverageSampleNum];
-                    if(!_simd_movemask_ps(depthMask))
+                    if (!_simd_movemask_ps(depthMask))
                      {
                          // stencil should already have been written in early/lateZ tests
                          RDTSC_END(BEOutputMerger, 0);
                          continue;
                      }
                  }
-                
+
                  // broadcast the results of the PS to all passing pixels
  #if USE_8x2_TILE_BACKEND
-                OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState,state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
-#else // USE_8x2_TILE_BACKEND
-                OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, coverageMask, depthMask, state.psState.renderTargetMask, workerId);
+                OutputMerger8x2(pDC,
+                                psContext,
+                                psContext.pColorBuffer,
+                                sample,
+                                &state.blendState,
+                                state.pfnBlendFunc,
+                                coverageMask,
+                                depthMask,
+                                state.psState.renderTargetMask,
+                                useAlternateOffset,
+                                workerId);
+#else  // USE_8x2_TILE_BACKEND
+                OutputMerger4x2(pDC,
+                                psContext,
+                                psContext.pColorBuffer,
+                                sample,
+                                &state.blendState,
+                                state.pfnBlendFunc,
+                                coverageMask,
+                                depthMask,
+                                state.psState.renderTargetMask,
+                                workerId);
  #endif // USE_8x2_TILE_BACKEND
  
-                if(!state.psState.forceEarlyZ && !T::bForcedSampleCount)
+                if (!state.psState.forceEarlyZ && !T::bForcedSampleCount)
                  {
-                    uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
-                    uint8_t * pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
-
-                    DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, PixelRateZTest.vZ[coverageSampleNum],
-                                      pDepthSample, depthMask, coverageMask, pStencilSample, PixelRateZTest.stencilPassMask[coverageSampleNum]);
+                    uint8_t* pDepthSample   = pDepthBuffer + RasterTileDepthOffset(sample);
+                    uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+
+                    DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                      &state.depthStencilState,
+                                      work.triFlags.frontFacing,
+                                      PixelRateZTest.vZ[coverageSampleNum],
+                                      pDepthSample,
+                                      depthMask,
+                                      coverageMask,
+                                      pStencilSample,
+                                      PixelRateZTest.stencilPassMask[coverageSampleNum]);
                  }
                  RDTSC_END(BEOutputMerger, 0);
              }
-Endtile:
+        Endtile:
              RDTSC_BEGIN(BEEndTile, pDC->drawId);
  
-            for(uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
+            for (uint32_t sample = 0; sample < T::MultisampleT::numCoverageSamples; sample++)
              {
                  work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
              }
  
-            if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+            if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
              {
                  work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
              }
@@ -1048,48 +1323,55 @@ Endtile:
  #if USE_8x2_TILE_BACKEND
              if (useAlternateOffset)
              {
-                DWORD rt;
+                DWORD    rt;
                  uint32_t rtMask = state.colorHottileEnable;
                  while (_BitScanForward(&rt, rtMask))
                  {
                      rtMask &= ~(1 << rt);
-                    psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+                    psContext.pColorBuffer[rt] +=
+                        (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
                  }
              }
  #else
-            DWORD rt;
+            DWORD    rt;
              uint32_t rtMask = state.colorHottileEnable;
              while (_BitScanForward(&rt, rtMask))
              {
                  rtMask &= ~(1 << rt);
-                psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+                psContext.pColorBuffer[rt] +=
+                    (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
              }
  #endif
              pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
-            pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+            pStencilBuffer +=
+                (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
  
              RDTSC_END(BEEndTile, 0);
  
-            psContext.vX.UL     = _simd_add_ps(psContext.vX.UL,     dx);
+            psContext.vX.UL     = _simd_add_ps(psContext.vX.UL, dx);
              psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
          }
  
-        psContext.vY.UL     = _simd_add_ps(psContext.vY.UL,     dy);
+        psContext.vY.UL     = _simd_add_ps(psContext.vY.UL, dy);
          psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
      }
  
      RDTSC_END(BEPixelRateBackend, 0);
  }
  
-template<uint32_t sampleCountT = SWR_MULTISAMPLE_1X, uint32_t isCenter = 0,
-         uint32_t coverage = 0, uint32_t centroid = 0, uint32_t forced = 0, uint32_t canEarlyZ = 0
-    >
+template <uint32_t sampleCountT = SWR_MULTISAMPLE_1X,
+          uint32_t isCenter     = 0,
+          uint32_t coverage     = 0,
+          uint32_t centroid     = 0,
+          uint32_t forced       = 0,
+          uint32_t canEarlyZ    = 0
+          >
  struct SwrBackendTraits
  {
-    static const bool bIsCenterPattern = (isCenter == 1);
-    static const uint32_t InputCoverage = coverage;
-    static const bool bCentroidPos = (centroid == 1);
-    static const bool bForcedSampleCount = (forced == 1);
-    static const bool bCanEarlyZ = (canEarlyZ == 1);
+    static const bool     bIsCenterPattern   = (isCenter == 1);
+    static const uint32_t InputCoverage      = coverage;
+    static const bool     bCentroidPos       = (centroid == 1);
+    static const bool     bForcedSampleCount = (forced == 1);
+    static const bool     bCanEarlyZ         = (canEarlyZ == 1);
      typedef MultisampleTraits<(SWR_MULTISAMPLE_COUNT)sampleCountT, bIsCenterPattern> MultisampleT;
  };
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp

index 5940aa7ba45495e1818bcf8f1c80823dfe1deb63..a1a1185bcfb506df1c6e46a1f3473b34af0bdf3e 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-*        operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ *        operations.
+ *
+ ******************************************************************************/
  
  #include <smmintrin.h>
  
@@ -37,35 +37,44 @@
  
  #include <algorithm>
  
-template<typename T>
-void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <typename T>
+void BackendSampleRate(DRAW_CONTEXT*        pDC,
+                       uint32_t             workerId,
+                       uint32_t             x,
+                       uint32_t             y,
+                       SWR_TRIANGLE_DESC&   work,
+                       RenderOutputBuffers& renderBuffers)
  {
      RDTSC_BEGIN(BESampleRateBackend, pDC->drawId);
      RDTSC_BEGIN(BESetup, pDC->drawId);
  
-    void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
-    const API_STATE &state = GetApiState(pDC);
+    void* pWorkerData      = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
+    const API_STATE& state = GetApiState(pDC);
  
      BarycentricCoeffs coeffs;
      SetupBarycentricCoeffs(&coeffs, work);
  
-    SWR_PS_CONTEXT psContext;
+    SWR_PS_CONTEXT             psContext;
      const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
      SetupPixelShaderContext<T>(&psContext, samplePos, work);
  
      uint8_t *pDepthBuffer, *pStencilBuffer;
-    SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
+    SetupRenderBuffers(psContext.pColorBuffer,
+                       &pDepthBuffer,
+                       &pStencilBuffer,
+                       state.colorHottileEnable,
+                       renderBuffers);
  
      RDTSC_END(BESetup, 0);
  
-    psContext.vY.UL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
+    psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
      psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
  
      const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
  
      for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
      {
-        psContext.vX.UL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
+        psContext.vX.UL     = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
          psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
  
          const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
@@ -77,16 +86,21 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
  #endif
              if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
              {
-                const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
+                const uint64_t* pCoverageMask =
+                    (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+                        ? &work.innerCoverageMask
+                        : &work.coverageMask[0];
  
-                generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
+                generateInputCoverage<T, T::InputCoverage>(
+                    pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
              }
  
              RDTSC_BEGIN(BEBarycentric, pDC->drawId);
  
              CalcPixelBarycentrics(coeffs, psContext);
  
-            CalcCentroid<T, false>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
+            CalcCentroid<T, false>(
+                &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
  
              RDTSC_END(BEBarycentric, 0);
  
@@ -97,14 +111,16 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                  if (coverageMask)
                  {
                      // offset depth/stencil buffers current sample
-                    uint8_t *pDepthSample = pDepthBuffer + RasterTileDepthOffset(sample);
-                    uint8_t *pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
+                    uint8_t* pDepthSample   = pDepthBuffer + RasterTileDepthOffset(sample);
+                    uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
  
                      if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
                      {
-                        static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+                        static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+                                      "Unsupported depth hot tile format");
  
-                        const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthSample));
+                        const simdscalar z =
+                            _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
  
                          const float minz = state.depthBoundsState.depthBoundsTestMinValue;
                          const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
@@ -121,7 +137,11 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                      CalcSampleBarycentrics(coeffs, psContext);
  
                      // interpolate and quantize z
-                    psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.sample, psContext.vJ.sample);
+                    psContext.vZ = vplaneps(coeffs.vZa,
+                                            coeffs.vZb,
+                                            coeffs.vZc,
+                                            psContext.vI.sample,
+                                            psContext.vJ.sample);
                      psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
  
                      RDTSC_END(BEBarycentric, 0);
@@ -129,27 +149,45 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                      // interpolate user clip distance if available
                      if (state.backendState.clipDistanceMask)
                      {
-                        coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.sample, psContext.vJ.sample);
+                        coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
+                                                             work.pUserClipBuffer,
+                                                             psContext.vI.sample,
+                                                             psContext.vJ.sample);
                      }
  
-                    simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
-                    simdscalar depthPassMask = vCoverageMask;
+                    simdscalar vCoverageMask   = _simd_vmask_ps(coverageMask);
+                    simdscalar depthPassMask   = vCoverageMask;
                      simdscalar stencilPassMask = vCoverageMask;
  
                      // Early-Z?
                      if (T::bCanEarlyZ)
                      {
                          RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
-                        depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
-                            psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
-                        AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+                        depthPassMask = DepthStencilTest(&state,
+                                                         work.triFlags.frontFacing,
+                                                         work.triFlags.viewportIndex,
+                                                         psContext.vZ,
+                                                         pDepthSample,
+                                                         vCoverageMask,
+                                                         pStencilSample,
+                                                         &stencilPassMask);
+                        AR_EVENT(EarlyDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
+                                                                 _simd_movemask_ps(stencilPassMask),
+                                                                 _simd_movemask_ps(vCoverageMask)));
                          RDTSC_END(BEEarlyDepthTest, 0);
  
                          // early-exit if no samples passed depth or earlyZ is forced on.
                          if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
                          {
-                            DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
-                                pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+                            DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                              &state.depthStencilState,
+                                              work.triFlags.frontFacing,
+                                              psContext.vZ,
+                                              pDepthSample,
+                                              depthPassMask,
+                                              vCoverageMask,
+                                              pStencilSample,
+                                              stencilPassMask);
  
                              if (!_simd_movemask_ps(depthPassMask))
                              {
@@ -160,7 +198,7 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                      }
  
                      psContext.sampleIndex = sample;
-                    psContext.activeMask = _simd_castps_si(vCoverageMask);
+                    psContext.activeMask  = _simd_castps_si(vCoverageMask);
  
                      // execute pixel shader
                      RDTSC_BEGIN(BEPixelShader, pDC->drawId);
@@ -177,39 +215,80 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
                      if (!T::bCanEarlyZ)
                      {
                          RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
-                        depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
-                            psContext.vZ, pDepthSample, vCoverageMask, pStencilSample, &stencilPassMask);
-                        AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+                        depthPassMask = DepthStencilTest(&state,
+                                                         work.triFlags.frontFacing,
+                                                         work.triFlags.viewportIndex,
+                                                         psContext.vZ,
+                                                         pDepthSample,
+                                                         vCoverageMask,
+                                                         pStencilSample,
+                                                         &stencilPassMask);
+                        AR_EVENT(LateDepthStencilInfoSampleRate(_simd_movemask_ps(depthPassMask),
+                                                                _simd_movemask_ps(stencilPassMask),
+                                                                _simd_movemask_ps(vCoverageMask)));
                          RDTSC_END(BELateDepthTest, 0);
  
                          if (!_simd_movemask_ps(depthPassMask))
                          {
                              // need to call depth/stencil write for stencil write
-                            DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
-                                pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+                            DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                              &state.depthStencilState,
+                                              work.triFlags.frontFacing,
+                                              psContext.vZ,
+                                              pDepthSample,
+                                              depthPassMask,
+                                              vCoverageMask,
+                                              pStencilSample,
+                                              stencilPassMask);
  
                              work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
                              continue;
                          }
                      }
  
-                    uint32_t statMask = _simd_movemask_ps(depthPassMask);
+                    uint32_t statMask  = _simd_movemask_ps(depthPassMask);
                      uint32_t statCount = _mm_popcnt_u32(statMask);
                      UPDATE_STAT_BE(DepthPassCount, statCount);
  
                      // output merger
                      RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
  #if USE_8x2_TILE_BACKEND
-                    OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
+                    OutputMerger8x2(pDC,
+                                    psContext,
+                                    psContext.pColorBuffer,
+                                    sample,
+                                    &state.blendState,
+                                    state.pfnBlendFunc,
+                                    vCoverageMask,
+                                    depthPassMask,
+                                    state.psState.renderTargetMask,
+                                    useAlternateOffset,
+                                    workerId);
  #else
-                    OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, sample, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId);
+                    OutputMerger4x2(pDC,
+                                    psContext,
+                                    psContext.pColorBuffer,
+                                    sample,
+                                    &state.blendState,
+                                    state.pfnBlendFunc,
+                                    vCoverageMask,
+                                    depthPassMask,
+                                    state.psState.renderTargetMask,
+                                    workerId);
  #endif
  
                      // do final depth write after all pixel kills
                      if (!state.psState.forceEarlyZ)
                      {
-                        DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
-                            pDepthSample, depthPassMask, vCoverageMask, pStencilSample, stencilPassMask);
+                        DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                          &state.depthStencilState,
+                                          work.triFlags.frontFacing,
+                                          psContext.vZ,
+                                          pDepthSample,
+                                          depthPassMask,
+                                          vCoverageMask,
+                                          pStencilSample,
+                                          stencilPassMask);
                      }
                      RDTSC_END(BEOutputMerger, 0);
                  }
@@ -229,12 +308,13 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
  #if USE_8x2_TILE_BACKEND
              if (useAlternateOffset)
              {
-                DWORD rt;
+                DWORD    rt;
                  uint32_t rtMask = state.colorHottileEnable;
                  while (_BitScanForward(&rt, rtMask))
                  {
                      rtMask &= ~(1 << rt);
-                    psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+                    psContext.pColorBuffer[rt] +=
+                        (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
                  }
              }
  #else
@@ -243,19 +323,21 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_
              while (_BitScanForward(&rt, rtMask))
              {
                  rtMask &= ~(1 << rt);
-                psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+                psContext.pColorBuffer[rt] +=
+                    (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
              }
  #endif
              pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
-            pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+            pStencilBuffer +=
+                (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
  
              RDTSC_END(BEEndTile, 0);
  
-            psContext.vX.UL = _simd_add_ps(psContext.vX.UL, dx);
+            psContext.vX.UL     = _simd_add_ps(psContext.vX.UL, dx);
              psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
          }
  
-        psContext.vY.UL = _simd_add_ps(psContext.vY.UL, dy);
+        psContext.vY.UL     = _simd_add_ps(psContext.vY.UL, dy);
          psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
      }
  
@@ -272,7 +354,9 @@ struct BEChooserSampleRate
      {
          switch (tArg)
          {
-        case SWR_BACKEND_MSAA_SAMPLE_RATE: return BackendSampleRate<SwrBackendTraits<ArgsT...>>; break;
+        case SWR_BACKEND_MSAA_SAMPLE_RATE:
+            return BackendSampleRate<SwrBackendTraits<ArgsT...>>;
+            break;
          case SWR_BACKEND_SINGLE_SAMPLE:
          case SWR_BACKEND_MSAA_PIXEL_RATE:
              SWR_ASSERT(0 && "Invalid backend func\n");
@@ -291,12 +375,22 @@ struct BEChooserSampleRate
      {
          switch (tArg)
          {
-        case SWR_INPUT_COVERAGE_NONE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
-        case SWR_INPUT_COVERAGE_NORMAL: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
-        case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
+        case SWR_INPUT_COVERAGE_NONE:
+            return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+                remainingArgs...);
+            break;
+        case SWR_INPUT_COVERAGE_NORMAL:
+            return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(
+                remainingArgs...);
+            break;
+        case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE:
+            return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(
+                remainingArgs...);
+            break;
          default:
              SWR_ASSERT(0 && "Invalid sample pattern\n");
-            return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
+            return BEChooserSampleRate<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+                remainingArgs...);
              break;
          }
      }
@@ -307,11 +401,21 @@ struct BEChooserSampleRate
      {
          switch (tArg)
          {
-        case SWR_MULTISAMPLE_1X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_2X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_4X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_8X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_16X: return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
+        case SWR_MULTISAMPLE_1X:
+            return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_2X:
+            return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_4X:
+            return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_8X:
+            return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_16X:
+            return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...);
+            break;
          default:
              SWR_ASSERT(0 && "Invalid sample count\n");
              return BEChooserSampleRate<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
@@ -332,9 +436,11 @@ struct BEChooserSampleRate
      }
  };
  
-void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
+void InitBackendSampleFuncTable(
+    PFN_BACKEND_FUNC (&table)[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT][2][2])
  {
-    for (uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT; sampleCount++)
+    for (uint32_t sampleCount = SWR_MULTISAMPLE_1X; sampleCount < SWR_MULTISAMPLE_TYPE_COUNT;
+         sampleCount++)
      {
          for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
          {
@@ -343,8 +449,14 @@ void InitBackendSampleFuncTable(PFN_BACKEND_FUNC(&table)[SWR_MULTISAMPLE_TYPE_CO
                  for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
                  {
                      table[sampleCount][inputCoverage][centroid][canEarlyZ] =
-                        BEChooserSampleRate<>::GetFunc((SWR_MULTISAMPLE_COUNT)sampleCount, false, (SWR_INPUT_COVERAGE)inputCoverage,
-                        (centroid > 0), false, (canEarlyZ > 0), (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
+                        BEChooserSampleRate<>::GetFunc(
+                            (SWR_MULTISAMPLE_COUNT)sampleCount,
+                            false,
+                            (SWR_INPUT_COVERAGE)inputCoverage,
+                            (centroid > 0),
+                            false,
+                            (canEarlyZ > 0),
+                            (SWR_BACKEND_FUNCS)SWR_BACKEND_MSAA_SAMPLE_RATE);
                  }
              }
          }
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp

index aaaba636ed375cd70c695b83bdd33a492fe5eeea..2efb01f95db2b3650b37a51d3f03a10c5b6f92c3 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file backend.cpp
-*
-* @brief Backend handles rasterization, pixel shading and output merger
-*        operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file backend.cpp
+ *
+ * @brief Backend handles rasterization, pixel shading and output merger
+ *        operations.
+ *
+ ******************************************************************************/
  
  #include <smmintrin.h>
  
@@ -37,36 +37,45 @@
  
  #include <algorithm>
  
-template<typename T>
-void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint32_t y, SWR_TRIANGLE_DESC &work, RenderOutputBuffers &renderBuffers)
+template <typename T>
+void BackendSingleSample(DRAW_CONTEXT*        pDC,
+                         uint32_t             workerId,
+                         uint32_t             x,
+                         uint32_t             y,
+                         SWR_TRIANGLE_DESC&   work,
+                         RenderOutputBuffers& renderBuffers)
  {
      RDTSC_BEGIN(BESingleSampleBackend, pDC->drawId);
      RDTSC_BEGIN(BESetup, pDC->drawId);
  
      void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
-    const API_STATE &state = GetApiState(pDC);
+    const API_STATE& state = GetApiState(pDC);
  
      BarycentricCoeffs coeffs;
      SetupBarycentricCoeffs(&coeffs, work);
  
-    SWR_PS_CONTEXT psContext;
+    SWR_PS_CONTEXT             psContext;
      const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
      SetupPixelShaderContext<T>(&psContext, samplePos, work);
  
      uint8_t *pDepthBuffer, *pStencilBuffer;
-    SetupRenderBuffers(psContext.pColorBuffer, &pDepthBuffer, &pStencilBuffer, state.colorHottileEnable, renderBuffers);
+    SetupRenderBuffers(psContext.pColorBuffer,
+                       &pDepthBuffer,
+                       &pStencilBuffer,
+                       state.colorHottileEnable,
+                       renderBuffers);
  
      RDTSC_END(BESetup, 1);
  
-    psContext.vY.UL     = _simd_add_ps(vULOffsetsY,     _simd_set1_ps(static_cast<float>(y)));
+    psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
      psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
  
      const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
  
      for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
      {
-        psContext.vX.UL     = _simd_add_ps(vULOffsetsX,     _simd_set1_ps(static_cast<float>(x)));
+        psContext.vX.UL     = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
          psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
  
          const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
@@ -82,9 +91,11 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
              {
                  if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
                  {
-                    static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
+                    static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
+                                  "Unsupported depth hot tile format");
  
-                    const simdscalar z = _simd_load_ps(reinterpret_cast<const float *>(pDepthBuffer));
+                    const simdscalar z =
+                        _simd_load_ps(reinterpret_cast<const float*>(pDepthBuffer));
  
                      const float minz = state.depthBoundsState.depthBoundsTestMinValue;
                      const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
@@ -94,19 +105,25 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
  
                  if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
                  {
-                    const uint64_t* pCoverageMask = (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE) ? &work.innerCoverageMask : &work.coverageMask[0];
+                    const uint64_t* pCoverageMask =
+                        (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+                            ? &work.innerCoverageMask
+                            : &work.coverageMask[0];
  
-                    generateInputCoverage<T, T::InputCoverage>(pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
+                    generateInputCoverage<T, T::InputCoverage>(
+                        pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
                  }
  
                  RDTSC_BEGIN(BEBarycentric, pDC->drawId);
  
                  CalcPixelBarycentrics(coeffs, psContext);
  
-                CalcCentroid<T, true>(&psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
+                CalcCentroid<T, true>(
+                    &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
  
                  // interpolate and quantize z
-                psContext.vZ = vplaneps(coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
+                psContext.vZ = vplaneps(
+                    coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
                  psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
  
                  RDTSC_END(BEBarycentric, 1);
@@ -114,27 +131,45 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                  // interpolate user clip distance if available
                  if (state.backendState.clipDistanceMask)
                  {
-                    coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask, work.pUserClipBuffer, psContext.vI.center, psContext.vJ.center);
+                    coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
+                                                         work.pUserClipBuffer,
+                                                         psContext.vI.center,
+                                                         psContext.vJ.center);
                  }
  
-                simdscalar vCoverageMask = _simd_vmask_ps(coverageMask);
-                simdscalar depthPassMask = vCoverageMask;
+                simdscalar vCoverageMask   = _simd_vmask_ps(coverageMask);
+                simdscalar depthPassMask   = vCoverageMask;
                  simdscalar stencilPassMask = vCoverageMask;
  
                  // Early-Z?
                  if (T::bCanEarlyZ)
                  {
                      RDTSC_BEGIN(BEEarlyDepthTest, pDC->drawId);
-                    depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
-                                                     psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
-                    AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+                    depthPassMask = DepthStencilTest(&state,
+                                                     work.triFlags.frontFacing,
+                                                     work.triFlags.viewportIndex,
+                                                     psContext.vZ,
+                                                     pDepthBuffer,
+                                                     vCoverageMask,
+                                                     pStencilBuffer,
+                                                     &stencilPassMask);
+                    AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
+                                                               _simd_movemask_ps(stencilPassMask),
+                                                               _simd_movemask_ps(vCoverageMask)));
                      RDTSC_END(BEEarlyDepthTest, 0);
  
                      // early-exit if no pixels passed depth or earlyZ is forced on
                      if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
                      {
-                        DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
-                            pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
+                        DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                          &state.depthStencilState,
+                                          work.triFlags.frontFacing,
+                                          psContext.vZ,
+                                          pDepthBuffer,
+                                          depthPassMask,
+                                          vCoverageMask,
+                                          pStencilBuffer,
+                                          stencilPassMask);
  
                          if (!_simd_movemask_ps(depthPassMask))
                          {
@@ -144,7 +179,7 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                  }
  
                  psContext.sampleIndex = 0;
-                psContext.activeMask = _simd_castps_si(vCoverageMask);
+                psContext.activeMask  = _simd_castps_si(vCoverageMask);
  
                  // execute pixel shader
                  RDTSC_BEGIN(BEPixelShader, pDC->drawId);
@@ -161,50 +196,94 @@ void BackendSingleSample(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t x, uint3
                  if (!T::bCanEarlyZ)
                  {
                      RDTSC_BEGIN(BELateDepthTest, pDC->drawId);
-                    depthPassMask = DepthStencilTest(&state, work.triFlags.frontFacing, work.triFlags.viewportIndex,
-                                                        psContext.vZ, pDepthBuffer, vCoverageMask, pStencilBuffer, &stencilPassMask);
-                    AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask), _simd_movemask_ps(stencilPassMask), _simd_movemask_ps(vCoverageMask)));
+                    depthPassMask = DepthStencilTest(&state,
+                                                     work.triFlags.frontFacing,
+                                                     work.triFlags.viewportIndex,
+                                                     psContext.vZ,
+                                                     pDepthBuffer,
+                                                     vCoverageMask,
+                                                     pStencilBuffer,
+                                                     &stencilPassMask);
+                    AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
+                                                              _simd_movemask_ps(stencilPassMask),
+                                                              _simd_movemask_ps(vCoverageMask)));
                      RDTSC_END(BELateDepthTest, 0);
  
                      if (!_simd_movemask_ps(depthPassMask))
                      {
                          // need to call depth/stencil write for stencil write
-                        DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
-                            pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
+                        DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                          &state.depthStencilState,
+                                          work.triFlags.frontFacing,
+                                          psContext.vZ,
+                                          pDepthBuffer,
+                                          depthPassMask,
+                                          vCoverageMask,
+                                          pStencilBuffer,
+                                          stencilPassMask);
                          goto Endtile;
                      }
-                } else {
+                }
+                else
+                {
                      // for early z, consolidate discards from shader
                      // into depthPassMask
                      depthPassMask = _simd_and_ps(depthPassMask, vCoverageMask);
                  }
  
-                uint32_t statMask = _simd_movemask_ps(depthPassMask);
+                uint32_t statMask  = _simd_movemask_ps(depthPassMask);
                  uint32_t statCount = _mm_popcnt_u32(statMask);
                  UPDATE_STAT_BE(DepthPassCount, statCount);
  
                  // output merger
                  RDTSC_BEGIN(BEOutputMerger, pDC->drawId);
  #if USE_8x2_TILE_BACKEND
-                OutputMerger8x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, useAlternateOffset, workerId);
+                OutputMerger8x2(pDC,
+                                psContext,
+                                psContext.pColorBuffer,
+                                0,
+                                &state.blendState,
+                                state.pfnBlendFunc,
+                                vCoverageMask,
+                                depthPassMask,
+                                state.psState.renderTargetMask,
+                                useAlternateOffset,
+                                workerId);
  #else
-                OutputMerger4x2(pDC, psContext, psContext.pColorBuffer, 0, &state.blendState, state.pfnBlendFunc, vCoverageMask, depthPassMask, state.psState.renderTargetMask, workerId, workerId);
+                OutputMerger4x2(pDC,
+                                psContext,
+                                psContext.pColorBuffer,
+                                0,
+                                &state.blendState,
+                                state.pfnBlendFunc,
+                                vCoverageMask,
+                                depthPassMask,
+                                state.psState.renderTargetMask,
+                                workerId,
+                                workerId);
  #endif
  
                  // do final depth write after all pixel kills
                  if (!state.psState.forceEarlyZ)
                  {
-                    DepthStencilWrite(&state.vp[work.triFlags.viewportIndex], &state.depthStencilState, work.triFlags.frontFacing, psContext.vZ,
-                        pDepthBuffer, depthPassMask, vCoverageMask, pStencilBuffer, stencilPassMask);
+                    DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
+                                      &state.depthStencilState,
+                                      work.triFlags.frontFacing,
+                                      psContext.vZ,
+                                      pDepthBuffer,
+                                      depthPassMask,
+                                      vCoverageMask,
+                                      pStencilBuffer,
+                                      stencilPassMask);
                  }
                  RDTSC_END(BEOutputMerger, 0);
              }
  
-Endtile:
+        Endtile:
              RDTSC_BEGIN(BEEndTile, pDC->drawId);
  
              work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
-            if(T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
+            if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
              {
                  work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
              }
@@ -212,12 +291,13 @@ Endtile:
  #if USE_8x2_TILE_BACKEND
              if (useAlternateOffset)
              {
-                DWORD rt;
+                DWORD    rt;
                  uint32_t rtMask = state.colorHottileEnable;
-                while(_BitScanForward(&rt, rtMask))
+                while (_BitScanForward(&rt, rtMask))
                  {
                      rtMask &= ~(1 << rt);
-                    psContext.pColorBuffer[rt] += (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+                    psContext.pColorBuffer[rt] +=
+                        (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
                  }
              }
  #else
@@ -226,19 +306,21 @@ Endtile:
              while (_BitScanForward(&rt, rtMask))
              {
                  rtMask &= ~(1 << rt);
-                psContext.pColorBuffer[rt] += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
+                psContext.pColorBuffer[rt] +=
+                    (KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
              }
  #endif
              pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
-            pStencilBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
+            pStencilBuffer +=
+                (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
  
              RDTSC_END(BEEndTile, 0);
  
-            psContext.vX.UL     = _simd_add_ps(psContext.vX.UL,     dx);
+            psContext.vX.UL     = _simd_add_ps(psContext.vX.UL, dx);
              psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
          }
  
-        psContext.vY.UL     = _simd_add_ps(psContext.vY.UL,     dy);
+        psContext.vY.UL     = _simd_add_ps(psContext.vY.UL, dy);
          psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
      }
  
@@ -253,9 +335,11 @@ struct BEChooserSingleSample
      // Last Arg Terminator
      static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
      {
-        switch(tArg)
+        switch (tArg)
          {
-        case SWR_BACKEND_SINGLE_SAMPLE: return BackendSingleSample<SwrBackendTraits<ArgsT...>>; break;
+        case SWR_BACKEND_SINGLE_SAMPLE:
+            return BackendSingleSample<SwrBackendTraits<ArgsT...>>;
+            break;
          case SWR_BACKEND_MSAA_PIXEL_RATE:
          case SWR_BACKEND_MSAA_SAMPLE_RATE:
          default:
@@ -269,15 +353,25 @@ struct BEChooserSingleSample
      template <typename... TArgsT>
      static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
      {
-        switch(tArg)
+        switch (tArg)
          {
-        case SWR_INPUT_COVERAGE_NONE: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...); break;
-        case SWR_INPUT_COVERAGE_NORMAL: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(remainingArgs...); break;
-        case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE: return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(remainingArgs...); break;
+        case SWR_INPUT_COVERAGE_NONE:
+            return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+                remainingArgs...);
+            break;
+        case SWR_INPUT_COVERAGE_NORMAL:
+            return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(
+                remainingArgs...);
+            break;
+        case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE:
+            return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(
+                remainingArgs...);
+            break;
          default:
-        SWR_ASSERT(0 && "Invalid sample pattern\n");
-        return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(remainingArgs...);
-        break;
+            SWR_ASSERT(0 && "Invalid sample pattern\n");
+            return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
+                remainingArgs...);
+            break;
          }
      }
  
@@ -285,17 +379,27 @@ struct BEChooserSingleSample
      template <typename... TArgsT>
      static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
      {
-        switch(tArg)
+        switch (tArg)
          {
-        case SWR_MULTISAMPLE_1X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_2X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_4X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_8X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...); break;
-        case SWR_MULTISAMPLE_16X: return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...); break;
+        case SWR_MULTISAMPLE_1X:
+            return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_2X:
+            return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_4X:
+            return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_8X:
+            return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...);
+            break;
+        case SWR_MULTISAMPLE_16X:
+            return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...);
+            break;
          default:
-        SWR_ASSERT(0 && "Invalid sample count\n");
-        return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
-        break;
+            SWR_ASSERT(0 && "Invalid sample count\n");
+            return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
+            break;
          }
      }
  
@@ -303,7 +407,7 @@ struct BEChooserSingleSample
      template <typename... TArgsT>
      static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
      {
-        if(tArg == true)
+        if (tArg == true)
          {
              return BEChooserSingleSample<ArgsT..., 1>::GetFunc(remainingArgs...);
          }
@@ -314,15 +418,20 @@ struct BEChooserSingleSample
  
  void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2])
  {
-    for(uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
+    for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
      {
-        for(uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
+        for (uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
          {
-            for(uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
+            for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
              {
                  table[inputCoverage][isCentroid][canEarlyZ] =
-                    BEChooserSingleSample<>::GetFunc(SWR_MULTISAMPLE_1X, false, (SWR_INPUT_COVERAGE)inputCoverage,
-                                         (isCentroid > 0), false, (canEarlyZ > 0), SWR_BACKEND_SINGLE_SAMPLE);
+                    BEChooserSingleSample<>::GetFunc(SWR_MULTISAMPLE_1X,
+                                                     false,
+                                                     (SWR_INPUT_COVERAGE)inputCoverage,
+                                                     (isCentroid > 0),
+                                                     false,
+                                                     (canEarlyZ > 0),
+                                                     SWR_BACKEND_SINGLE_SAMPLE);
              }
          }
      }
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp

index 7b9c20ef802b45bfd5249808c4cc94d875962442..6d9680b72c550ba4b3a2358fd3e1c58b41b5e43c 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file binner.cpp
-*
-* @brief Implementation for the macrotile binner
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file binner.cpp
+ *
+ * @brief Implementation for the macrotile binner
+ *
+ ******************************************************************************/
  
  #include "binner.h"
  #include "context.h"
@@ -37,27 +37,25 @@
  
  // Function Prototype
  template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupLinesImpl(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    Vec4<SIMD_T> prim[],
-    Float<SIMD_T> recipW[],
-    uint32_t primMask,
-    Integer<SIMD_T> const &primID,
-    Integer<SIMD_T> const &viewportIdx,
-    Integer<SIMD_T> const &rtIdx);
+void BinPostSetupLinesImpl(DRAW_CONTEXT*          pDC,
+                           PA_STATE&              pa,
+                           uint32_t               workerId,
+                           Vec4<SIMD_T>           prim[],
+                           Float<SIMD_T>          recipW[],
+                           uint32_t               primMask,
+                           Integer<SIMD_T> const& primID,
+                           Integer<SIMD_T> const& viewportIdx,
+                           Integer<SIMD_T> const& rtIdx);
  
  template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupPointsImpl(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    Vec4<SIMD_T> prim[],
-    uint32_t primMask,
-    Integer<SIMD_T> const &primID,
-    Integer<SIMD_T> const &viewportIdx,
-    Integer<SIMD_T> const &rtIdx);
+void BinPostSetupPointsImpl(DRAW_CONTEXT*          pDC,
+                            PA_STATE&              pa,
+                            uint32_t               workerId,
+                            Vec4<SIMD_T>           prim[],
+                            uint32_t               primMask,
+                            Integer<SIMD_T> const& primID,
+                            Integer<SIMD_T> const& viewportIdx,
+                            Integer<SIMD_T> const& rtIdx);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Processes attributes for the backend based on linkage mask and
@@ -68,26 +66,23 @@ void BinPostSetupPointsImpl(
  /// @param pLinkageMap - maps VS attribute slot to PS slot
  /// @param triIndex - Triangle to process attributes for
  /// @param pBuffer - Output result
-template<typename NumVertsT, typename IsSwizzledT, typename HasConstantInterpT, typename IsDegenerate>
+template <typename NumVertsT,
+          typename IsSwizzledT,
+          typename HasConstantInterpT,
+          typename IsDegenerate>
  INLINE void ProcessAttributes(
-    DRAW_CONTEXT *pDC,
-    PA_STATE&pa,
-    uint32_t triIndex,
-    uint32_t primId,
-    float *pBuffer)
+    DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t triIndex, uint32_t primId, float* pBuffer)
  {
      static_assert(NumVertsT::value > 0 && NumVertsT::value <= 3, "Invalid value for NumVertsT");
      const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
      // Conservative Rasterization requires degenerate tris to have constant attribute interpolation
-    uint32_t constantInterpMask = IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
+    uint32_t constantInterpMask =
+        IsDegenerate::value ? 0xFFFFFFFF : backendState.constantInterpolationMask;
      const uint32_t provokingVertex = pDC->pState->state.frontendState.topologyProvokingVertex;
-    const PRIMITIVE_TOPOLOGY topo = pa.binTopology;
+    const PRIMITIVE_TOPOLOGY topo  = pa.binTopology;
  
      static const float constTable[3][4] = {
-        { 0.0f, 0.0f, 0.0f, 0.0f },
-        { 0.0f, 0.0f, 0.0f, 1.0f },
-        { 1.0f, 1.0f, 1.0f, 1.0f }
-    };
+        {0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 0.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f}};
  
      for (uint32_t i = 0; i < backendState.numAttributes; ++i)
      {
@@ -96,46 +91,45 @@ INLINE void ProcessAttributes(
          {
              SWR_ATTRIB_SWIZZLE attribSwizzle = backendState.swizzleMap[i];
              inputSlot = backendState.vertexAttribOffset + attribSwizzle.sourceAttrib;
-
          }
          else
          {
              inputSlot = backendState.vertexAttribOffset + i;
          }
  
-        simd4scalar attrib[3];    // triangle attribs (always 4 wide)
-        float* pAttribStart = pBuffer;
+        simd4scalar attrib[3]; // triangle attribs (always 4 wide)
+        float*      pAttribStart = pBuffer;
  
          if (HasConstantInterpT::value || IsDegenerate::value)
          {
              if (CheckBit(constantInterpMask, i))
              {
-                uint32_t vid;
-                uint32_t adjustedTriIndex;
-                static const uint32_t tristripProvokingVertex[] = { 0, 2, 1 };
-                static const int32_t quadProvokingTri[2][4] = { { 0, 0, 0, 1 },{ 0, -1, 0, 0 } };
-                static const uint32_t quadProvokingVertex[2][4] = { { 0, 1, 2, 2 },{ 0, 1, 1, 2 } };
-                static const int32_t qstripProvokingTri[2][4] = { { 0, 0, 0, 1 },{ -1, 0, 0, 0 } };
-                static const uint32_t qstripProvokingVertex[2][4] = { { 0, 1, 2, 1 },{ 0, 0, 2, 1 } };
-
-                switch (topo) {
+                uint32_t              vid;
+                uint32_t              adjustedTriIndex;
+                static const uint32_t tristripProvokingVertex[]   = {0, 2, 1};
+                static const int32_t  quadProvokingTri[2][4]      = {{0, 0, 0, 1}, {0, -1, 0, 0}};
+                static const uint32_t quadProvokingVertex[2][4]   = {{0, 1, 2, 2}, {0, 1, 1, 2}};
+                static const int32_t  qstripProvokingTri[2][4]    = {{0, 0, 0, 1}, {-1, 0, 0, 0}};
+                static const uint32_t qstripProvokingVertex[2][4] = {{0, 1, 2, 1}, {0, 0, 2, 1}};
+
+                switch (topo)
+                {
                  case TOP_QUAD_LIST:
                      adjustedTriIndex = triIndex + quadProvokingTri[triIndex & 1][provokingVertex];
-                    vid = quadProvokingVertex[triIndex & 1][provokingVertex];
+                    vid              = quadProvokingVertex[triIndex & 1][provokingVertex];
                      break;
                  case TOP_QUAD_STRIP:
                      adjustedTriIndex = triIndex + qstripProvokingTri[triIndex & 1][provokingVertex];
-                    vid = qstripProvokingVertex[triIndex & 1][provokingVertex];
+                    vid              = qstripProvokingVertex[triIndex & 1][provokingVertex];
                      break;
                  case TOP_TRIANGLE_STRIP:
                      adjustedTriIndex = triIndex;
-                    vid = (triIndex & 1)
-                        ? tristripProvokingVertex[provokingVertex]
-                        : provokingVertex;
+                    vid =
+                        (triIndex & 1) ? tristripProvokingVertex[provokingVertex] : provokingVertex;
                      break;
                  default:
                      adjustedTriIndex = triIndex;
-                    vid = provokingVertex;
+                    vid              = provokingVertex;
                      break;
                  }
  
@@ -214,7 +208,7 @@ INLINE void ProcessAttributes(
      }
  }
  
-typedef void(*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
+typedef void (*PFN_PROCESS_ATTRIBUTES)(DRAW_CONTEXT*, PA_STATE&, uint32_t, uint32_t, float*);
  
  struct ProcessAttributesChooser
  {
@@ -227,9 +221,13 @@ struct ProcessAttributesChooser
      }
  };
  
-PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzled, bool HasConstantInterp, bool IsDegenerate = false)
+PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts,
+                                                bool     IsSwizzled,
+                                                bool     HasConstantInterp,
+                                                bool     IsDegenerate = false)
  {
-    return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
+    return TemplateArgUnroller<ProcessAttributesChooser>::GetFunc(
+        IntArg<1, 3>{NumVerts}, IsSwizzled, HasConstantInterp, IsDegenerate);
  }
  
  //////////////////////////////////////////////////////////////////////////
@@ -240,18 +238,22 @@ PFN_PROCESS_ATTRIBUTES GetProcessAttributesFunc(uint32_t NumVerts, bool IsSwizzl
  /// @param primIndex - primitive index to process
  /// @param clipDistMask - mask of enabled clip distances
  /// @param pUserClipBuffer - buffer to store results
-template<uint32_t NumVerts>
-void ProcessUserClipDist(const SWR_BACKEND_STATE& state, PA_STATE& pa, uint32_t primIndex, float *pRecipW, float* pUserClipBuffer)
+template <uint32_t NumVerts>
+void ProcessUserClipDist(const SWR_BACKEND_STATE& state,
+                         PA_STATE&                pa,
+                         uint32_t                 primIndex,
+                         float*                   pRecipW,
+                         float*                   pUserClipBuffer)
  {
-    DWORD clipDist;
+    DWORD    clipDist;
      uint32_t clipDistMask = state.clipDistanceMask;
      while (_BitScanForward(&clipDist, clipDistMask))
      {
          clipDistMask &= ~(1 << clipDist);
          uint32_t clipSlot = clipDist >> 2;
          uint32_t clipComp = clipDist & 0x3;
-        uint32_t clipAttribSlot = clipSlot == 0 ?
-            state.vertexClipCullOffset : state.vertexClipCullOffset + 1;
+        uint32_t clipAttribSlot =
+            clipSlot == 0 ? state.vertexClipCullOffset : state.vertexClipCullOffset + 1;
  
          simd4scalar primClipDist[3];
          pa.AssembleSingle(clipAttribSlot, primIndex, primClipDist);
@@ -281,30 +283,35 @@ void ProcessUserClipDist(const SWR_BACKEND_STATE& state, PA_STATE& pa, uint32_t
  }
  
  INLINE
-void TransposeVertices(simd4scalar(&dst)[8], const simdscalar &src0, const simdscalar &src1, const simdscalar &src2)
+void TransposeVertices(simd4scalar (&dst)[8],
+                       const simdscalar& src0,
+                       const simdscalar& src1,
+                       const simdscalar& src2)
  {
      vTranspose3x8(dst, src0, src1, src2);
  }
  
  INLINE
-void TransposeVertices(simd4scalar(&dst)[16], const simd16scalar &src0, const simd16scalar &src1, const simd16scalar &src2)
+void TransposeVertices(simd4scalar (&dst)[16],
+                       const simd16scalar& src0,
+                       const simd16scalar& src1,
+                       const simd16scalar& src2)
  {
-    vTranspose4x16(reinterpret_cast<simd16scalar(&)[4]>(dst), src0, src1, src2, _simd16_setzero_ps());
+    vTranspose4x16(
+        reinterpret_cast<simd16scalar(&)[4]>(dst), src0, src1, src2, _simd16_setzero_ps());
  }
  
-
  #if KNOB_ENABLE_EARLY_RAST
  
  #define ER_SIMD_TILE_X_DIM (1 << ER_SIMD_TILE_X_SHIFT)
  #define ER_SIMD_TILE_Y_DIM (1 << ER_SIMD_TILE_Y_SHIFT)
  
-
-template<typename SIMD_T>
+template <typename SIMD_T>
  struct EarlyRastHelper
  {
  };
  
-template<>
+template <>
  struct EarlyRastHelper<SIMD256>
  {
      static SIMD256::Integer InitShiftCntrl()
@@ -314,7 +321,7 @@ struct EarlyRastHelper<SIMD256>
  };
  
  #if USE_SIMD16_FRONTEND
-template<>
+template <>
  struct EarlyRastHelper<SIMD512>
  {
      static SIMD512::Integer InitShiftCntrl()
@@ -340,21 +347,22 @@ struct EarlyRastHelper<SIMD512>
  /// @param oneTileMask - defines triangles for ER to work on
  ///                      (tris that fit into ER tile)
  template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
-uint32_t SIMDCALL EarlyRasterizer(
-        SIMDBBOX_T<SIMD_T> &er_bbox,
-        Integer<SIMD_T> (&vAi)[3],
-        Integer<SIMD_T> (&vBi)[3],
-        Integer<SIMD_T> (&vXi)[3],
-        Integer<SIMD_T> (&vYi)[3],
-        uint32_t cwTrisMask,
-        uint32_t triMask,
-        uint32_t oneTileMask)
+uint32_t SIMDCALL EarlyRasterizer(SIMDBBOX_T<SIMD_T>& er_bbox,
+                                  Integer<SIMD_T> (&vAi)[3],
+                                  Integer<SIMD_T> (&vBi)[3],
+                                  Integer<SIMD_T> (&vXi)[3],
+                                  Integer<SIMD_T> (&vYi)[3],
+                                  uint32_t cwTrisMask,
+                                  uint32_t triMask,
+                                  uint32_t oneTileMask)
  {
      // step to pixel center of top-left pixel of the triangle bbox
-    Integer<SIMD_T> vTopLeftX = SIMD_T::template slli_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(er_bbox.xmin);
+    Integer<SIMD_T> vTopLeftX =
+        SIMD_T::template slli_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(er_bbox.xmin);
      vTopLeftX = SIMD_T::add_epi32(vTopLeftX, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
  
-    Integer<SIMD_T> vTopLeftY = SIMD_T::template slli_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(er_bbox.ymin);
+    Integer<SIMD_T> vTopLeftY =
+        SIMD_T::template slli_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(er_bbox.ymin);
      vTopLeftY = SIMD_T::add_epi32(vTopLeftY, SIMD_T::set1_epi32(FIXED_POINT_SCALE / 2));
  
      // negate A and B for CW tris
@@ -367,16 +375,22 @@ uint32_t SIMDCALL EarlyRasterizer(
  
      RDTSC_EVENT(FEEarlyRastEnter, _mm_popcnt_u32(oneTileMask & triMask), 0);
  
-    Integer<SIMD_T> vShiftCntrl = EarlyRastHelper <SIMD_T>::InitShiftCntrl();
-    Integer<SIMD_T> vCwTris = SIMD_T::set1_epi32(cwTrisMask);
-    Integer<SIMD_T> vMask = SIMD_T::sllv_epi32(vCwTris, vShiftCntrl);
-
-    vAi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[0]), SIMD_T::castsi_ps(vNegA0), SIMD_T::castsi_ps(vMask)));
-    vAi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[1]), SIMD_T::castsi_ps(vNegA1), SIMD_T::castsi_ps(vMask)));
-    vAi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vAi[2]), SIMD_T::castsi_ps(vNegA2), SIMD_T::castsi_ps(vMask)));
-    vBi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[0]), SIMD_T::castsi_ps(vNegB0), SIMD_T::castsi_ps(vMask)));
-    vBi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[1]), SIMD_T::castsi_ps(vNegB1), SIMD_T::castsi_ps(vMask)));
-    vBi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vBi[2]), SIMD_T::castsi_ps(vNegB2), SIMD_T::castsi_ps(vMask)));
+    Integer<SIMD_T> vShiftCntrl = EarlyRastHelper<SIMD_T>::InitShiftCntrl();
+    Integer<SIMD_T> vCwTris     = SIMD_T::set1_epi32(cwTrisMask);
+    Integer<SIMD_T> vMask       = SIMD_T::sllv_epi32(vCwTris, vShiftCntrl);
+
+    vAi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vAi[0]), SIMD_T::castsi_ps(vNegA0), SIMD_T::castsi_ps(vMask)));
+    vAi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vAi[1]), SIMD_T::castsi_ps(vNegA1), SIMD_T::castsi_ps(vMask)));
+    vAi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vAi[2]), SIMD_T::castsi_ps(vNegA2), SIMD_T::castsi_ps(vMask)));
+    vBi[0] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vBi[0]), SIMD_T::castsi_ps(vNegB0), SIMD_T::castsi_ps(vMask)));
+    vBi[1] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vBi[1]), SIMD_T::castsi_ps(vNegB1), SIMD_T::castsi_ps(vMask)));
+    vBi[2] = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vBi[2]), SIMD_T::castsi_ps(vNegB2), SIMD_T::castsi_ps(vMask)));
  
      // evaluate edge equations at top-left pixel
      Integer<SIMD_T> vDeltaX0 = SIMD_T::sub_epi32(vTopLeftX, vXi[0]);
@@ -409,9 +423,12 @@ uint32_t SIMDCALL EarlyRasterizer(
      Integer<SIMD_T> vEdgeAdjust2 = SIMD_T::sub_epi32(vEdge2, SIMD_T::set1_epi32(1));
  
      // vA < 0
-    vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vAi[0])));
-    vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vAi[1])));
-    vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vAi[2])));
+    vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vAi[0])));
+    vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vAi[1])));
+    vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vAi[2])));
  
      // vA == 0 && vB < 0
      Integer<SIMD_T> vCmp0 = SIMD_T::cmpeq_epi32(vAi[0], SIMD_T::setzero_si());
@@ -422,75 +439,77 @@ uint32_t SIMDCALL EarlyRasterizer(
      vCmp1 = SIMD_T::and_si(vCmp1, vBi[1]);
      vCmp2 = SIMD_T::and_si(vCmp2, vBi[2]);
  
-    vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vCmp0)));
-    vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vCmp1)));
-    vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vCmp2)));
-
+    vEdge0 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vEdge0), SIMD_T::castsi_ps(vEdgeAdjust0), SIMD_T::castsi_ps(vCmp0)));
+    vEdge1 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vEdge1), SIMD_T::castsi_ps(vEdgeAdjust1), SIMD_T::castsi_ps(vCmp1)));
+    vEdge2 = SIMD_T::castps_si(SIMD_T::blendv_ps(
+        SIMD_T::castsi_ps(vEdge2), SIMD_T::castsi_ps(vEdgeAdjust2), SIMD_T::castsi_ps(vCmp2)));
  
  #if ER_SIMD_TILE_X_DIM == 4 && ER_SIMD_TILE_Y_DIM == 4
      // Go down
      // coverage pixel 0
      Integer<SIMD_T> vMask0 = SIMD_T::and_si(vEdge0, vEdge1);
-    vMask0 = SIMD_T::and_si(vMask0, vEdge2);
+    vMask0                 = SIMD_T::and_si(vMask0, vEdge2);
  
      // coverage pixel 1
      Integer<SIMD_T> vEdge0N = SIMD_T::add_epi32(vEdge0, vBi[0]);
      Integer<SIMD_T> vEdge1N = SIMD_T::add_epi32(vEdge1, vBi[1]);
      Integer<SIMD_T> vEdge2N = SIMD_T::add_epi32(vEdge2, vBi[2]);
-    Integer<SIMD_T> vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
+    Integer<SIMD_T> vMask1  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask1                  = SIMD_T::and_si(vMask1, vEdge2N);
  
      // coverage pixel 2
-    vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
-    vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
-    vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
+    vEdge0N                = SIMD_T::add_epi32(vEdge0N, vBi[0]);
+    vEdge1N                = SIMD_T::add_epi32(vEdge1N, vBi[1]);
+    vEdge2N                = SIMD_T::add_epi32(vEdge2N, vBi[2]);
      Integer<SIMD_T> vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
+    vMask2                 = SIMD_T::and_si(vMask2, vEdge2N);
  
      // coverage pixel 3
-    vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
-    vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
-    vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
+    vEdge0N                = SIMD_T::add_epi32(vEdge0N, vBi[0]);
+    vEdge1N                = SIMD_T::add_epi32(vEdge1N, vBi[1]);
+    vEdge2N                = SIMD_T::add_epi32(vEdge2N, vBi[2]);
      Integer<SIMD_T> vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
+    vMask3                 = SIMD_T::and_si(vMask3, vEdge2N);
  
      // One step to the right and then up
  
      // coverage pixel 4
-    vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
-    vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
-    vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
+    vEdge0N                = SIMD_T::add_epi32(vEdge0N, vAi[0]);
+    vEdge1N                = SIMD_T::add_epi32(vEdge1N, vAi[1]);
+    vEdge2N                = SIMD_T::add_epi32(vEdge2N, vAi[2]);
      Integer<SIMD_T> vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
+    vMask4                 = SIMD_T::and_si(vMask4, vEdge2N);
  
      // coverage pixel 5
-    vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
-    vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
-    vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
+    vEdge0N                = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
+    vEdge1N                = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
+    vEdge2N                = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
      Integer<SIMD_T> vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
+    vMask5                 = SIMD_T::and_si(vMask5, vEdge2N);
  
      // coverage pixel 6
-    vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
-    vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
-    vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
+    vEdge0N                = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
+    vEdge1N                = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
+    vEdge2N                = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
      Integer<SIMD_T> vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
+    vMask6                 = SIMD_T::and_si(vMask6, vEdge2N);
  
      // coverage pixel 7
-    vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
-    vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
-    vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
+    vEdge0N                = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
+    vEdge1N                = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
+    vEdge2N                = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
      Integer<SIMD_T> vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
+    vMask7                 = SIMD_T::and_si(vMask7, vEdge2N);
  
      Integer<SIMD_T> vLit1 = SIMD_T::or_si(vMask0, vMask1);
-    vLit1 = SIMD_T::or_si(vLit1, vMask2);
-    vLit1 = SIMD_T::or_si(vLit1, vMask3);
-    vLit1 = SIMD_T::or_si(vLit1, vMask4);
-    vLit1 = SIMD_T::or_si(vLit1, vMask5);
-    vLit1 = SIMD_T::or_si(vLit1, vMask6);
-    vLit1 = SIMD_T::or_si(vLit1, vMask7);
+    vLit1                 = SIMD_T::or_si(vLit1, vMask2);
+    vLit1                 = SIMD_T::or_si(vLit1, vMask3);
+    vLit1                 = SIMD_T::or_si(vLit1, vMask4);
+    vLit1                 = SIMD_T::or_si(vLit1, vMask5);
+    vLit1                 = SIMD_T::or_si(vLit1, vMask6);
+    vLit1                 = SIMD_T::or_si(vLit1, vMask7);
  
      // Step to the right and go down again
  
@@ -498,29 +517,29 @@ uint32_t SIMDCALL EarlyRasterizer(
      vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
      vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
      vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
-    vMask0 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask0 = SIMD_T::and_si(vMask0, vEdge2N);
+    vMask0  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask0  = SIMD_T::and_si(vMask0, vEdge2N);
  
      // coverage pixel 1
      vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
      vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
      vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
-    vMask1 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask1 = SIMD_T::and_si(vMask1, vEdge2N);
+    vMask1  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask1  = SIMD_T::and_si(vMask1, vEdge2N);
  
      // coverage pixel 2
      vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
      vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
      vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
-    vMask2 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask2 = SIMD_T::and_si(vMask2, vEdge2N);
+    vMask2  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask2  = SIMD_T::and_si(vMask2, vEdge2N);
  
      // coverage pixel 3
      vEdge0N = SIMD_T::add_epi32(vEdge0N, vBi[0]);
      vEdge1N = SIMD_T::add_epi32(vEdge1N, vBi[1]);
      vEdge2N = SIMD_T::add_epi32(vEdge2N, vBi[2]);
-    vMask3 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask3 = SIMD_T::and_si(vMask3, vEdge2N);
+    vMask3  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask3  = SIMD_T::and_si(vMask3, vEdge2N);
  
      // And for the last time - to the right and up
  
@@ -528,37 +547,37 @@ uint32_t SIMDCALL EarlyRasterizer(
      vEdge0N = SIMD_T::add_epi32(vEdge0N, vAi[0]);
      vEdge1N = SIMD_T::add_epi32(vEdge1N, vAi[1]);
      vEdge2N = SIMD_T::add_epi32(vEdge2N, vAi[2]);
-    vMask4 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask4 = SIMD_T::and_si(vMask4, vEdge2N);
+    vMask4  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask4  = SIMD_T::and_si(vMask4, vEdge2N);
  
      // coverage pixel 5
      vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
      vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
      vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
-    vMask5 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask5 = SIMD_T::and_si(vMask5, vEdge2N);
+    vMask5  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask5  = SIMD_T::and_si(vMask5, vEdge2N);
  
      // coverage pixel 6
      vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
      vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
      vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
-    vMask6 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask6 = SIMD_T::and_si(vMask6, vEdge2N);
+    vMask6  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask6  = SIMD_T::and_si(vMask6, vEdge2N);
  
      // coverage pixel 7
      vEdge0N = SIMD_T::sub_epi32(vEdge0N, vBi[0]);
      vEdge1N = SIMD_T::sub_epi32(vEdge1N, vBi[1]);
      vEdge2N = SIMD_T::sub_epi32(vEdge2N, vBi[2]);
-    vMask7 = SIMD_T::and_si(vEdge0N, vEdge1N);
-    vMask7 = SIMD_T::and_si(vMask7, vEdge2N);
+    vMask7  = SIMD_T::and_si(vEdge0N, vEdge1N);
+    vMask7  = SIMD_T::and_si(vMask7, vEdge2N);
  
      Integer<SIMD_T> vLit2 = SIMD_T::or_si(vMask0, vMask1);
-    vLit2 = SIMD_T::or_si(vLit2, vMask2);
-    vLit2 = SIMD_T::or_si(vLit2, vMask3);
-    vLit2 = SIMD_T::or_si(vLit2, vMask4);
-    vLit2 = SIMD_T::or_si(vLit2, vMask5);
-    vLit2 = SIMD_T::or_si(vLit2, vMask6);
-    vLit2 = SIMD_T::or_si(vLit2, vMask7);
+    vLit2                 = SIMD_T::or_si(vLit2, vMask2);
+    vLit2                 = SIMD_T::or_si(vLit2, vMask3);
+    vLit2                 = SIMD_T::or_si(vLit2, vMask4);
+    vLit2                 = SIMD_T::or_si(vLit2, vMask5);
+    vLit2                 = SIMD_T::or_si(vLit2, vMask6);
+    vLit2                 = SIMD_T::or_si(vLit2, vMask7);
  
      Integer<SIMD_T> vLit = SIMD_T::or_si(vLit1, vLit2);
  
@@ -612,7 +631,7 @@ uint32_t SIMDCALL EarlyRasterizer(
  
  #endif
      // Check which triangles has any pixel lit
-    uint32_t maskLit = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vLit));
+    uint32_t maskLit   = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vLit));
      uint32_t maskUnlit = ~maskLit & oneTileMask;
  
      uint32_t oldTriMask = triMask;
@@ -638,25 +657,24 @@ uint32_t SIMDCALL EarlyRasterizer(
  /// @param viewportIdx - viewport array index for each triangle.
  /// @tparam CT - ConservativeRastFETraits
  template <typename SIMD_T, uint32_t SIMD_WIDTH, typename CT>
-void SIMDCALL BinTrianglesImpl(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    Vec4<SIMD_T> tri[3],
-    uint32_t triMask,
-    Integer<SIMD_T> const &primID,
-    Integer<SIMD_T> const &viewportIdx,
-    Integer<SIMD_T> const &rtIdx)
+void SIMDCALL BinTrianglesImpl(DRAW_CONTEXT*          pDC,
+                               PA_STATE&              pa,
+                               uint32_t               workerId,
+                               Vec4<SIMD_T>           tri[3],
+                               uint32_t               triMask,
+                               Integer<SIMD_T> const& primID,
+                               Integer<SIMD_T> const& viewportIdx,
+                               Integer<SIMD_T> const& rtIdx)
  {
-    const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+    const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
  
      RDTSC_BEGIN(FEBinTriangles, pDC->drawId);
  
-    const API_STATE& state = GetApiState(pDC);
-    const SWR_RASTSTATE& rastState = state.rastState;
-    const SWR_FRONTEND_STATE& feState = state.frontendState;
+    const API_STATE&          state     = GetApiState(pDC);
+    const SWR_RASTSTATE&      rastState = state.rastState;
+    const SWR_FRONTEND_STATE& feState   = state.frontendState;
  
-    MacroTileMgr *pTileMgr = pDC->pTileMgr;
+    MacroTileMgr* pTileMgr = pDC->pTileMgr;
  
      Float<SIMD_T> vRecipW0 = SIMD_T::set1_ps(1.0f);
      Float<SIMD_T> vRecipW1 = SIMD_T::set1_ps(1.0f);
@@ -724,8 +742,10 @@ void SIMDCALL BinTrianglesImpl(
      calcDeterminantIntVertical(vAi, vBi, vDet);
  
      // cull zero area
-    uint32_t maskLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[0], SIMD_T::setzero_si())));
-    uint32_t maskHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[1], SIMD_T::setzero_si())));
+    uint32_t maskLo =
+        SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[0], SIMD_T::setzero_si())));
+    uint32_t maskHi =
+        SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpeq_epi64(vDet[1], SIMD_T::setzero_si())));
  
      uint32_t cullZeroAreaMask = maskLo | (maskHi << (SIMD_WIDTH / 2));
  
@@ -744,13 +764,17 @@ void SIMDCALL BinTrianglesImpl(
      uint32_t frontWindingTris;
      if (rastState.frontWinding == SWR_FRONTWINDING_CW)
      {
-        maskLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
-        maskHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
+        maskLo = SIMD_T::movemask_pd(
+            SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
+        maskHi = SIMD_T::movemask_pd(
+            SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
      }
      else
      {
-        maskLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[0])));
-        maskHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[1])));
+        maskLo = SIMD_T::movemask_pd(
+            SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[0])));
+        maskHi = SIMD_T::movemask_pd(
+            SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(SIMD_T::setzero_si(), vDet[1])));
      }
      frontWindingTris = maskLo | (maskHi << (SIMD_WIDTH / 2));
  
@@ -758,12 +782,24 @@ void SIMDCALL BinTrianglesImpl(
      uint32_t cullTris;
      switch ((SWR_CULLMODE)rastState.cullMode)
      {
-    case SWR_CULLMODE_BOTH:  cullTris = 0xffffffff; break;
-    case SWR_CULLMODE_NONE:  cullTris = 0x0; break;
-    case SWR_CULLMODE_FRONT: cullTris = frontWindingTris; break;
-        // 0 area triangles are marked as backfacing, which is required behavior for conservative rast
-    case SWR_CULLMODE_BACK:  cullTris = ~frontWindingTris; break;
-    default: SWR_INVALID("Invalid cull mode: %d", rastState.cullMode); cullTris = 0x0; break;
+    case SWR_CULLMODE_BOTH:
+        cullTris = 0xffffffff;
+        break;
+    case SWR_CULLMODE_NONE:
+        cullTris = 0x0;
+        break;
+    case SWR_CULLMODE_FRONT:
+        cullTris = frontWindingTris;
+        break;
+        // 0 area triangles are marked as backfacing, which is required behavior for conservative
+        // rast
+    case SWR_CULLMODE_BACK:
+        cullTris = ~frontWindingTris;
+        break;
+    default:
+        SWR_INVALID("Invalid cull mode: %d", rastState.cullMode);
+        cullTris = 0x0;
+        break;
      }
  
      triMask &= ~cullTris;
@@ -777,12 +813,12 @@ void SIMDCALL BinTrianglesImpl(
  
      /// Note: these variable initializations must stay above any 'goto endBenTriangles'
      // compute per tri backface
-    uint32_t frontFaceMask = frontWindingTris;
-    uint32_t *pPrimID = (uint32_t *)&primID;
-    const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
-    DWORD triIndex = 0;
+    uint32_t        frontFaceMask  = frontWindingTris;
+    uint32_t*       pPrimID        = (uint32_t*)&primID;
+    const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
+    DWORD           triIndex       = 0;
  
-    uint32_t edgeEnable;
+    uint32_t      edgeEnable;
      PFN_WORK_FUNC pfnWork;
      if (CT::IsConservativeT::value)
      {
@@ -794,13 +830,15 @@ void SIMDCALL BinTrianglesImpl(
              const Integer<SIMD_T> x0x1Mask = SIMD_T::cmpeq_epi32(vXi[0], vXi[1]);
              const Integer<SIMD_T> y0y1Mask = SIMD_T::cmpeq_epi32(vYi[0], vYi[1]);
  
-            uint32_t e0Mask = SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x0x1Mask, y0y1Mask)));
+            uint32_t e0Mask =
+                SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x0x1Mask, y0y1Mask)));
  
              // e1 = v2-v1
              const Integer<SIMD_T> x1x2Mask = SIMD_T::cmpeq_epi32(vXi[1], vXi[2]);
              const Integer<SIMD_T> y1y2Mask = SIMD_T::cmpeq_epi32(vYi[1], vYi[2]);
  
-            uint32_t e1Mask = SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x1x2Mask, y1y2Mask)));
+            uint32_t e1Mask =
+                SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(x1x2Mask, y1y2Mask)));
  
              // e2 = v0-v2
              // if v0 == v1 & v1 == v2, v0 == v2
@@ -827,8 +865,12 @@ void SIMDCALL BinTrianglesImpl(
      else
      {
          // degenerate triangles won't be sent to rasterizer; just enable all edges
-        pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
-            (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, EdgeValToEdgeState(ALL_EDGES_VALID), (state.scissorsTileAligned == false));
+        pfnWork = GetRasterizerFunc(rastState.sampleCount,
+                                    rastState.bIsCenterPattern,
+                                    (rastState.conservativeRast > 0),
+                                    (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage,
+                                    EdgeValToEdgeState(ALL_EDGES_VALID),
+                                    (state.scissorsTileAligned == false));
      }
  
      SIMDBBOX_T<SIMD_T> bbox;
@@ -854,20 +896,20 @@ void SIMDCALL BinTrianglesImpl(
  
          {
              Integer<SIMD_T> xmin = SIMD_T::add_epi32(bbox.xmin, SIMD_T::set1_epi32(127));
-            xmin = SIMD_T::and_si(xmin, SIMD_T::set1_epi32(~255));
+            xmin                 = SIMD_T::and_si(xmin, SIMD_T::set1_epi32(~255));
              Integer<SIMD_T> xmax = SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(128));
-            xmax = SIMD_T::and_si(xmax, SIMD_T::set1_epi32(~255));
+            xmax                 = SIMD_T::and_si(xmax, SIMD_T::set1_epi32(~255));
  
              Integer<SIMD_T> vMaskH = SIMD_T::cmpeq_epi32(xmin, xmax);
  
              Integer<SIMD_T> ymin = SIMD_T::add_epi32(bbox.ymin, SIMD_T::set1_epi32(127));
-            ymin = SIMD_T::and_si(ymin, SIMD_T::set1_epi32(~255));
+            ymin                 = SIMD_T::and_si(ymin, SIMD_T::set1_epi32(~255));
              Integer<SIMD_T> ymax = SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(128));
-            ymax = SIMD_T::and_si(ymax, SIMD_T::set1_epi32(~255));
+            ymax                 = SIMD_T::and_si(ymax, SIMD_T::set1_epi32(~255));
  
              Integer<SIMD_T> vMaskV = SIMD_T::cmpeq_epi32(ymin, ymax);
  
-            vMaskV = SIMD_T::or_si(vMaskH, vMaskV);
+            vMaskV         = SIMD_T::or_si(vMaskH, vMaskV);
              cullCenterMask = SIMD_T::movemask_ps(SIMD_T::castsi_ps(vMaskV));
          }
  
@@ -879,15 +921,20 @@ void SIMDCALL BinTrianglesImpl(
          }
      }
  
-    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
-    // Gather the AOS effective scissor rects based on the per-prim VP index.
+    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is
+    // exclusive. Gather the AOS effective scissor rects based on the per-prim VP index.
      /// @todo:  Look at speeding this up -- weigh against corresponding costs in rasterizer.
      {
          Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
          if (pa.viewportArrayActive)
  
          {
-            GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
+            GatherScissors(&state.scissorsInFixedPoint[0],
+                           pViewportIndex,
+                           scisXmin,
+                           scisYmin,
+                           scisXmax,
+                           scisYmax);
          }
          else // broadcast fast path for non-VPAI case.
          {
@@ -909,23 +956,26 @@ void SIMDCALL BinTrianglesImpl(
  
      if (CT::IsConservativeT::value)
      {
-        // in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
-        // some area. Bump the xmax/ymax edges out 
+        // in the case where a degenerate triangle is on a scissor edge, we need to make sure the
+        // primitive bbox has some area. Bump the xmax/ymax edges out
  
          Integer<SIMD_T> topEqualsBottom = SIMD_T::cmpeq_epi32(bbox.ymin, bbox.ymax);
-        bbox.ymax = SIMD_T::blendv_epi32(bbox.ymax, SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), topEqualsBottom);
+        bbox.ymax                       = SIMD_T::blendv_epi32(
+            bbox.ymax, SIMD_T::add_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), topEqualsBottom);
  
          Integer<SIMD_T> leftEqualsRight = SIMD_T::cmpeq_epi32(bbox.xmin, bbox.xmax);
-        bbox.xmax = SIMD_T::blendv_epi32(bbox.xmax, SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), leftEqualsRight);
+        bbox.xmax                       = SIMD_T::blendv_epi32(
+            bbox.xmax, SIMD_T::add_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), leftEqualsRight);
      }
  
      // Cull tris completely outside scissor
      {
          Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
          Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
-        Integer<SIMD_T> maskOutsideScissorXY = SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
+        Integer<SIMD_T> maskOutsideScissorXY =
+            SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
          uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
-        triMask = triMask & ~maskOutsideScissor;
+        triMask                     = triMask & ~maskOutsideScissor;
      }
  
  #if KNOB_ENABLE_EARLY_RAST
@@ -936,26 +986,34 @@ void SIMDCALL BinTrianglesImpl(
          // convert to ER tiles
          SIMDBBOX_T<SIMD_T> er_bbox;
  
-        er_bbox.xmin = SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmin);
-        er_bbox.xmax = SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmax);
-        er_bbox.ymin = SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymin);
-        er_bbox.ymax = SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymax);
+        er_bbox.xmin =
+            SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmin);
+        er_bbox.xmax =
+            SIMD_T::template srai_epi32<ER_SIMD_TILE_X_SHIFT + FIXED_POINT_SHIFT>(bbox.xmax);
+        er_bbox.ymin =
+            SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymin);
+        er_bbox.ymax =
+            SIMD_T::template srai_epi32<ER_SIMD_TILE_Y_SHIFT + FIXED_POINT_SHIFT>(bbox.ymax);
  
          Integer<SIMD_T> vTileX = SIMD_T::cmpeq_epi32(er_bbox.xmin, er_bbox.xmax);
          Integer<SIMD_T> vTileY = SIMD_T::cmpeq_epi32(er_bbox.ymin, er_bbox.ymax);
  
          // Take only triangles that fit into ER tile
-        uint32_t oneTileMask = triMask & SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(vTileX, vTileY)));
+        uint32_t oneTileMask =
+            triMask & SIMD_T::movemask_ps(SIMD_T::castsi_ps(SIMD_T::and_si(vTileX, vTileY)));
  
          if (oneTileMask)
          {
              // determine CW tris (det > 0)
-            uint32_t maskCwLo = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
-            uint32_t maskCwHi = SIMD_T::movemask_pd(SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
+            uint32_t maskCwLo = SIMD_T::movemask_pd(
+                SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[0], SIMD_T::setzero_si())));
+            uint32_t maskCwHi = SIMD_T::movemask_pd(
+                SIMD_T::castsi_pd(SIMD_T::cmpgt_epi64(vDet[1], SIMD_T::setzero_si())));
              uint32_t cwTrisMask = maskCwLo | (maskCwHi << (SIMD_WIDTH / 2));
  
              // Try early rasterization
-            triMask = EarlyRasterizer<SIMD_T, SIMD_WIDTH, CT>(er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
+            triMask = EarlyRasterizer<SIMD_T, SIMD_WIDTH, CT>(
+                er_bbox, vAi, vBi, vXi, vYi, cwTrisMask, triMask, oneTileMask);
  
              if (!triMask)
              {
@@ -963,7 +1021,6 @@ void SIMDCALL BinTrianglesImpl(
                  return;
              }
          }
-
      }
  #endif
  
@@ -975,29 +1032,32 @@ endBinTriangles:
      {
          // Simple non-conformant wireframe mode, useful for debugging
          // construct 3 SIMD lines out of the triangle and call the line binner for each SIMD
-        Vec4<SIMD_T> line[2];
+        Vec4<SIMD_T>  line[2];
          Float<SIMD_T> recipW[2];
  
-        line[0] = tri[0];
-        line[1] = tri[1];
+        line[0]   = tri[0];
+        line[1]   = tri[1];
          recipW[0] = vRecipW0;
          recipW[1] = vRecipW1;
  
-        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
+            pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
  
-        line[0] = tri[1];
-        line[1] = tri[2];
+        line[0]   = tri[1];
+        line[1]   = tri[2];
          recipW[0] = vRecipW1;
          recipW[1] = vRecipW2;
  
-        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
+            pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
  
-        line[0] = tri[2];
-        line[1] = tri[0];
+        line[0]   = tri[2];
+        line[1]   = tri[0];
          recipW[0] = vRecipW2;
          recipW[1] = vRecipW0;
  
-        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
+            pDC, pa, workerId, line, recipW, triMask, primID, viewportIdx, rtIdx);
  
          RDTSC_END(FEBinTriangles, 1);
          return;
@@ -1005,9 +1065,12 @@ endBinTriangles:
      else if (rastState.fillMode == SWR_FILLMODE_POINT)
      {
          // Bin 3 points
-        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx);
-        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
-        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
+            pDC, pa, workerId, &tri[0], triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
+            pDC, pa, workerId, &tri[1], triMask, primID, viewportIdx, rtIdx);
+        BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
+            pDC, pa, workerId, &tri[2], triMask, primID, viewportIdx, rtIdx);
  
          RDTSC_END(FEBinTriangles, 1);
          return;
@@ -1019,12 +1082,13 @@ endBinTriangles:
      bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
      bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
  
-    OSALIGNSIMD16(uint32_t) aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
+    OSALIGNSIMD16(uint32_t)
+    aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
  
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTLeft),   bbox.xmin);
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTRight),  bbox.xmax);
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTTop),    bbox.ymin);
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTBottom), bbox.ymax);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
  
      // transpose verts needed for backend
      /// @todo modify BE to take non-transformed verts
@@ -1041,7 +1105,7 @@ endBinTriangles:
      // scan remaining valid triangles and bin each separately
      while (_BitScanForward(&triIndex, triMask))
      {
-        uint32_t linkageCount = state.backendState.numAttributes;
+        uint32_t linkageCount     = state.backendState.numAttributes;
          uint32_t numScalarAttribs = linkageCount * 4;
  
          BE_WORK work;
@@ -1052,8 +1116,13 @@ endBinTriangles:
          {
              // only rasterize valid edges if we have a degenerate primitive
              int32_t triEdgeEnable = (edgeEnable >> (triIndex * 3)) & ALL_EDGES_VALID;
-            work.pfnWork = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, (rastState.conservativeRast > 0),
-                (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, EdgeValToEdgeState(triEdgeEnable), (state.scissorsTileAligned == false));
+            work.pfnWork =
+                GetRasterizerFunc(rastState.sampleCount,
+                                  rastState.bIsCenterPattern,
+                                  (rastState.conservativeRast > 0),
+                                  (SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage,
+                                  EdgeValToEdgeState(triEdgeEnable),
+                                  (state.scissorsTileAligned == false));
  
              // Degenerate triangles are required to be constant interpolated
              isDegenerate = (triEdgeEnable != ALL_EDGES_VALID) ? true : false;
@@ -1065,30 +1134,33 @@ endBinTriangles:
          }
  
          // Select attribute processor
-        PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(3,
-            state.backendState.swizzleEnable, state.backendState.constantInterpolationMask, isDegenerate);
+        PFN_PROCESS_ATTRIBUTES pfnProcessAttribs =
+            GetProcessAttributesFunc(3,
+                                     state.backendState.swizzleEnable,
+                                     state.backendState.constantInterpolationMask,
+                                     isDegenerate);
  
-        TRIANGLE_WORK_DESC &desc = work.desc.tri;
+        TRIANGLE_WORK_DESC& desc = work.desc.tri;
  
          desc.triFlags.frontFacing = state.forceFront ? 1 : ((frontFaceMask >> triIndex) & 1);
          desc.triFlags.renderTargetArrayIndex = aRTAI[triIndex];
-        desc.triFlags.viewportIndex = pViewportIndex[triIndex];
+        desc.triFlags.viewportIndex          = pViewportIndex[triIndex];
  
          auto pArena = pDC->pArena;
          SWR_ASSERT(pArena != nullptr);
  
          // store active attribs
-        float *pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
-        desc.pAttribs = pAttribs;
+        float* pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
+        desc.pAttribs   = pAttribs;
          desc.numAttribs = linkageCount;
          pfnProcessAttribs(pDC, pa, triIndex, pPrimID[triIndex], desc.pAttribs);
  
          // store triangle vertex data
          desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
  
-        SIMD128::store_ps(&desc.pTriBuffer[0],  vHorizX[triIndex]);
-        SIMD128::store_ps(&desc.pTriBuffer[4],  vHorizY[triIndex]);
-        SIMD128::store_ps(&desc.pTriBuffer[8],  vHorizZ[triIndex]);
+        SIMD128::store_ps(&desc.pTriBuffer[0], vHorizX[triIndex]);
+        SIMD128::store_ps(&desc.pTriBuffer[4], vHorizY[triIndex]);
+        SIMD128::store_ps(&desc.pTriBuffer[8], vHorizZ[triIndex]);
          SIMD128::store_ps(&desc.pTriBuffer[12], vHorizW[triIndex]);
  
          // store user clip distances
@@ -1096,7 +1168,8 @@ endBinTriangles:
          {
              uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
              desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 3 * sizeof(float));
-            ProcessUserClipDist<3>(state.backendState, pa, triIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
+            ProcessUserClipDist<3>(
+                state.backendState, pa, triIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
          }
  
          for (uint32_t y = aMTTop[triIndex]; y <= aMTBottom[triIndex]; ++y)
@@ -1112,39 +1185,39 @@ endBinTriangles:
              }
          }
  
-                     triMask &= ~(1 << triIndex);
+        triMask &= ~(1 << triIndex);
      }
  
      RDTSC_END(FEBinTriangles, 1);
  }
  
  template <typename CT>
-void BinTriangles(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    simdvector tri[3],
-    uint32_t triMask,
-    simdscalari const &primID,
-    simdscalari const &viewportIdx,
-    simdscalari const &rtIdx)
+void BinTriangles(DRAW_CONTEXT*      pDC,
+                  PA_STATE&          pa,
+                  uint32_t           workerId,
+                  simdvector         tri[3],
+                  uint32_t           triMask,
+                  simdscalari const& primID,
+                  simdscalari const& viewportIdx,
+                  simdscalari const& rtIdx)
  {
-    BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
+    BinTrianglesImpl<SIMD256, KNOB_SIMD_WIDTH, CT>(
+        pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
  }
  
  #if USE_SIMD16_FRONTEND
  template <typename CT>
-void SIMDCALL BinTriangles_simd16(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    simd16vector tri[3],
-    uint32_t triMask,
-    simd16scalari const &primID,
-    simd16scalari const &viewportIdx,
-    simd16scalari const &rtIdx)
+void SIMDCALL BinTriangles_simd16(DRAW_CONTEXT*        pDC,
+                                  PA_STATE&            pa,
+                                  uint32_t             workerId,
+                                  simd16vector         tri[3],
+                                  uint32_t             triMask,
+                                  simd16scalari const& primID,
+                                  simd16scalari const& viewportIdx,
+                                  simd16scalari const& rtIdx)
  {
-    BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
+    BinTrianglesImpl<SIMD512, KNOB_SIMD16_WIDTH, CT>(
+        pDC, pa, workerId, tri, triMask, primID, viewportIdx, rtIdx);
  }
  
  #endif
@@ -1186,27 +1259,26 @@ PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative)
  #endif
  
  template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupPointsImpl(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    Vec4<SIMD_T> prim[],
-    uint32_t primMask,
-    Integer<SIMD_T> const &primID,
-    Integer<SIMD_T> const &viewportIdx,
-    Integer<SIMD_T> const &rtIdx)
+void BinPostSetupPointsImpl(DRAW_CONTEXT*          pDC,
+                            PA_STATE&              pa,
+                            uint32_t               workerId,
+                            Vec4<SIMD_T>           prim[],
+                            uint32_t               primMask,
+                            Integer<SIMD_T> const& primID,
+                            Integer<SIMD_T> const& viewportIdx,
+                            Integer<SIMD_T> const& rtIdx)
  {
      RDTSC_BEGIN(FEBinPoints, pDC->drawId);
  
-    Vec4<SIMD_T> &primVerts = prim[0];
+    Vec4<SIMD_T>& primVerts = prim[0];
  
-    const API_STATE& state = GetApiState(pDC);
-    const SWR_RASTSTATE& rastState = state.rastState;
-    const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
+    const API_STATE&     state          = GetApiState(pDC);
+    const SWR_RASTSTATE& rastState      = state.rastState;
+    const uint32_t*      pViewportIndex = (uint32_t*)&viewportIdx;
  
      // Select attribute processor
-    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(1,
-        state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(
+        1, state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
  
      // convert to fixed point
      Integer<SIMD_T> vXi, vYi;
@@ -1224,64 +1296,68 @@ void BinPostSetupPointsImpl(
          primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vXi));
          primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vYi));
  
-        // compute macro tile coordinates 
+        // compute macro tile coordinates
          Integer<SIMD_T> macroX = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(vXi);
          Integer<SIMD_T> macroY = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(vYi);
  
          OSALIGNSIMD16(uint32_t) aMacroX[SIMD_WIDTH], aMacroY[SIMD_WIDTH];
  
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMacroX), macroX);
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMacroY), macroY);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMacroX), macroX);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMacroY), macroY);
  
          // compute raster tile coordinates
-        Integer<SIMD_T> rasterX = SIMD_T::template srai_epi32<KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT>(vXi);
-        Integer<SIMD_T> rasterY = SIMD_T::template srai_epi32<KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT>(vYi);
+        Integer<SIMD_T> rasterX =
+            SIMD_T::template srai_epi32<KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT>(vXi);
+        Integer<SIMD_T> rasterY =
+            SIMD_T::template srai_epi32<KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT>(vYi);
  
          // compute raster tile relative x,y for coverage mask
          Integer<SIMD_T> tileAlignedX = SIMD_T::template slli_epi32<KNOB_TILE_X_DIM_SHIFT>(rasterX);
          Integer<SIMD_T> tileAlignedY = SIMD_T::template slli_epi32<KNOB_TILE_Y_DIM_SHIFT>(rasterY);
  
-        Integer<SIMD_T> tileRelativeX = SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vXi), tileAlignedX);
-        Integer<SIMD_T> tileRelativeY = SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vYi), tileAlignedY);
+        Integer<SIMD_T> tileRelativeX =
+            SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vXi), tileAlignedX);
+        Integer<SIMD_T> tileRelativeY =
+            SIMD_T::sub_epi32(SIMD_T::template srai_epi32<FIXED_POINT_SHIFT>(vYi), tileAlignedY);
  
          OSALIGNSIMD16(uint32_t) aTileRelativeX[SIMD_WIDTH];
          OSALIGNSIMD16(uint32_t) aTileRelativeY[SIMD_WIDTH];
  
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileRelativeX), tileRelativeX);
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileRelativeY), tileRelativeY);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileRelativeX), tileRelativeX);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileRelativeY), tileRelativeY);
  
          OSALIGNSIMD16(uint32_t) aTileAlignedX[SIMD_WIDTH];
          OSALIGNSIMD16(uint32_t) aTileAlignedY[SIMD_WIDTH];
  
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileAlignedX), tileAlignedX);
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aTileAlignedY), tileAlignedY);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileAlignedX), tileAlignedX);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aTileAlignedY), tileAlignedY);
  
          OSALIGNSIMD16(float) aZ[SIMD_WIDTH];
-        SIMD_T::store_ps(reinterpret_cast<float *>(aZ), primVerts.z);
+        SIMD_T::store_ps(reinterpret_cast<float*>(aZ), primVerts.z);
  
          // store render target array index
-        const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
-        
-        uint32_t *pPrimID = (uint32_t *)&primID;
-        DWORD primIndex = 0;
+        const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
+
+        uint32_t* pPrimID   = (uint32_t*)&primID;
+        DWORD     primIndex = 0;
  
          const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
  
          // scan remaining valid triangles and bin each separately
          while (_BitScanForward(&primIndex, primMask))
          {
-            uint32_t linkageCount = backendState.numAttributes;
+            uint32_t linkageCount     = backendState.numAttributes;
              uint32_t numScalarAttribs = linkageCount * 4;
  
              BE_WORK work;
              work.type = DRAW;
  
-            TRIANGLE_WORK_DESC &desc = work.desc.tri;
+            TRIANGLE_WORK_DESC& desc = work.desc.tri;
  
              // points are always front facing
-            desc.triFlags.frontFacing = 1;
+            desc.triFlags.frontFacing            = 1;
              desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
-            desc.triFlags.viewportIndex = pViewportIndex[primIndex];
+            desc.triFlags.viewportIndex          = pViewportIndex[primIndex];
  
              work.pfnWork = RasterizeSimplePoint;
  
@@ -1289,18 +1365,19 @@ void BinPostSetupPointsImpl(
              SWR_ASSERT(pArena != nullptr);
  
              // store attributes
-            float *pAttribs = (float*)pArena->AllocAligned(3 * numScalarAttribs * sizeof(float), 16);
-            desc.pAttribs = pAttribs;
+            float* pAttribs =
+                (float*)pArena->AllocAligned(3 * numScalarAttribs * sizeof(float), 16);
+            desc.pAttribs   = pAttribs;
              desc.numAttribs = linkageCount;
  
              pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], pAttribs);
  
              // store raster tile aligned x, y, perspective correct z
-            float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
-            desc.pTriBuffer = pTriBuffer;
+            float* pTriBuffer        = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
+            desc.pTriBuffer          = pTriBuffer;
              *(uint32_t*)pTriBuffer++ = aTileAlignedX[primIndex];
              *(uint32_t*)pTriBuffer++ = aTileAlignedY[primIndex];
-            *pTriBuffer = aZ[primIndex];
+            *pTriBuffer              = aZ[primIndex];
  
              uint32_t tX = aTileRelativeX[primIndex];
              uint32_t tY = aTileRelativeY[primIndex];
@@ -1310,7 +1387,7 @@ void BinPostSetupPointsImpl(
              work.desc.tri.triFlags.coverageMask = tX | (tY << 4);
  
              // bin it
-            MacroTileMgr *pTileMgr = pDC->pTileMgr;
+            MacroTileMgr* pTileMgr = pDC->pTileMgr;
  #if KNOB_ENABLE_TOSS_POINTS
              if (!KNOB_TOSS_SETUP_TRIS)
  #endif
@@ -1343,7 +1420,7 @@ void BinPostSetupPointsImpl(
          bbox.xmin = bbox.xmax = vXi;
          bbox.ymin = bbox.ymax = vYi;
  
-        Float<SIMD_T> vHalfWidth = SIMD_T::mul_ps(vPointSize, SIMD_T::set1_ps(0.5f));
+        Float<SIMD_T>   vHalfWidth  = SIMD_T::mul_ps(vPointSize, SIMD_T::set1_ps(0.5f));
          Integer<SIMD_T> vHalfWidthi = fpToFixedPointVertical<SIMD_T>(vHalfWidth);
  
          bbox.xmin = SIMD_T::sub_epi32(bbox.xmin, vHalfWidthi);
@@ -1351,15 +1428,20 @@ void BinPostSetupPointsImpl(
          bbox.ymin = SIMD_T::sub_epi32(bbox.ymin, vHalfWidthi);
          bbox.ymax = SIMD_T::add_epi32(bbox.ymax, vHalfWidthi);
  
-        // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
-        // Gather the AOS effective scissor rects based on the per-prim VP index.
+        // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge
+        // is exclusive. Gather the AOS effective scissor rects based on the per-prim VP index.
          /// @todo:  Look at speeding this up -- weigh against corresponding costs in rasterizer.
          {
              Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
  
              if (pa.viewportArrayActive)
              {
-                GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
+                GatherScissors(&state.scissorsInFixedPoint[0],
+                               pViewportIndex,
+                               scisXmin,
+                               scisYmin,
+                               scisXmax,
+                               scisYmax);
              }
              else // broadcast fast path for non-VPAI case.
              {
@@ -1371,16 +1453,19 @@ void BinPostSetupPointsImpl(
  
              bbox.xmin = SIMD_T::max_epi32(bbox.xmin, scisXmin);
              bbox.ymin = SIMD_T::max_epi32(bbox.ymin, scisYmin);
-            bbox.xmax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
-            bbox.ymax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
+            bbox.xmax =
+                SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
+            bbox.ymax =
+                SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
          }
  
          // Cull bloated points completely outside scissor
          Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
          Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
-        Integer<SIMD_T> maskOutsideScissorXY = SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
+        Integer<SIMD_T> maskOutsideScissorXY =
+            SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
          uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
-        primMask = primMask & ~maskOutsideScissor;
+        primMask                    = primMask & ~maskOutsideScissor;
  
          // Convert bbox to macrotile units.
          bbox.xmin = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmin);
@@ -1388,46 +1473,47 @@ void BinPostSetupPointsImpl(
          bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
          bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
  
-        OSALIGNSIMD16(uint32_t) aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
+        OSALIGNSIMD16(uint32_t)
+        aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
  
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTLeft),   bbox.xmin);
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTRight),  bbox.xmax);
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTTop),    bbox.ymin);
-        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTBottom), bbox.ymax);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
+        SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
  
          // store render target array index
-        const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+        const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
  
          OSALIGNSIMD16(float) aPointSize[SIMD_WIDTH];
-        SIMD_T::store_ps(reinterpret_cast<float *>(aPointSize), vPointSize);
+        SIMD_T::store_ps(reinterpret_cast<float*>(aPointSize), vPointSize);
  
-        uint32_t *pPrimID = (uint32_t *)&primID;
+        uint32_t* pPrimID = (uint32_t*)&primID;
  
          OSALIGNSIMD16(float) aPrimVertsX[SIMD_WIDTH];
          OSALIGNSIMD16(float) aPrimVertsY[SIMD_WIDTH];
          OSALIGNSIMD16(float) aPrimVertsZ[SIMD_WIDTH];
  
-        SIMD_T::store_ps(reinterpret_cast<float *>(aPrimVertsX), primVerts.x);
-        SIMD_T::store_ps(reinterpret_cast<float *>(aPrimVertsY), primVerts.y);
-        SIMD_T::store_ps(reinterpret_cast<float *>(aPrimVertsZ), primVerts.z);
+        SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsX), primVerts.x);
+        SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsY), primVerts.y);
+        SIMD_T::store_ps(reinterpret_cast<float*>(aPrimVertsZ), primVerts.z);
  
          // scan remaining valid prims and bin each separately
          const SWR_BACKEND_STATE& backendState = state.backendState;
-        DWORD primIndex;
+        DWORD                    primIndex;
          while (_BitScanForward(&primIndex, primMask))
          {
-            uint32_t linkageCount = backendState.numAttributes;
+            uint32_t linkageCount     = backendState.numAttributes;
              uint32_t numScalarAttribs = linkageCount * 4;
  
              BE_WORK work;
              work.type = DRAW;
  
-            TRIANGLE_WORK_DESC &desc = work.desc.tri;
+            TRIANGLE_WORK_DESC& desc = work.desc.tri;
  
-            desc.triFlags.frontFacing = 1;
-            desc.triFlags.pointSize = aPointSize[primIndex];
+            desc.triFlags.frontFacing            = 1;
+            desc.triFlags.pointSize              = aPointSize[primIndex];
              desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
-            desc.triFlags.viewportIndex = pViewportIndex[primIndex];
+            desc.triFlags.viewportIndex          = pViewportIndex[primIndex];
  
              work.pfnWork = RasterizeTriPoint;
  
@@ -1440,11 +1526,11 @@ void BinPostSetupPointsImpl(
              pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
  
              // store point vertex data
-            float *pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
-            desc.pTriBuffer = pTriBuffer;
-            *pTriBuffer++ = aPrimVertsX[primIndex];
-            *pTriBuffer++ = aPrimVertsY[primIndex];
-            *pTriBuffer = aPrimVertsZ[primIndex];
+            float* pTriBuffer = (float*)pArena->AllocAligned(4 * sizeof(float), 16);
+            desc.pTriBuffer   = pTriBuffer;
+            *pTriBuffer++     = aPrimVertsX[primIndex];
+            *pTriBuffer++     = aPrimVertsY[primIndex];
+            *pTriBuffer       = aPrimVertsZ[primIndex];
  
              // store user clip distances
              if (backendState.clipDistanceMask)
@@ -1454,14 +1540,15 @@ void BinPostSetupPointsImpl(
                  float dists[8];
                  float one = 1.0f;
                  ProcessUserClipDist<1>(backendState, pa, primIndex, &one, dists);
-                for (uint32_t i = 0; i < numClipDist; i++) {
+                for (uint32_t i = 0; i < numClipDist; i++)
+                {
                      desc.pUserClipBuffer[3 * i + 0] = 0.0f;
                      desc.pUserClipBuffer[3 * i + 1] = 0.0f;
                      desc.pUserClipBuffer[3 * i + 2] = dists[i];
                  }
              }
  
-            MacroTileMgr *pTileMgr = pDC->pTileMgr;
+            MacroTileMgr* pTileMgr = pDC->pTileMgr;
              for (uint32_t y = aMTTop[primIndex]; y <= aMTBottom[primIndex]; ++y)
              {
                  for (uint32_t x = aMTLeft[primIndex]; x <= aMTRight[primIndex]; ++x)
@@ -1490,19 +1577,18 @@ void BinPostSetupPointsImpl(
  /// @param tri - Contains point position data for SIMDs worth of points.
  /// @param primID - Primitive ID for each point.
  template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPointsImpl(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    Vec4<SIMD_T> prim[3],
-    uint32_t primMask,
-    Integer<SIMD_T> const &primID,
-    Integer<SIMD_T> const &viewportIdx,
-    Integer<SIMD_T> const &rtIdx)
+void BinPointsImpl(DRAW_CONTEXT*          pDC,
+                   PA_STATE&              pa,
+                   uint32_t               workerId,
+                   Vec4<SIMD_T>           prim[3],
+                   uint32_t               primMask,
+                   Integer<SIMD_T> const& primID,
+                   Integer<SIMD_T> const& viewportIdx,
+                   Integer<SIMD_T> const& rtIdx)
  {
-    const API_STATE& state = GetApiState(pDC);
-    const SWR_FRONTEND_STATE& feState = state.frontendState;
-    const SWR_RASTSTATE& rastState = state.rastState;
+    const API_STATE&          state     = GetApiState(pDC);
+    const SWR_FRONTEND_STATE& feState   = state.frontendState;
+    const SWR_RASTSTATE&      rastState = state.rastState;
  
      if (!feState.vpTransformDisable)
      {
@@ -1530,57 +1616,34 @@ void BinPointsImpl(
      prim[0].y = SIMD_T::add_ps(prim[0].y, offset);
  
      BinPostSetupPointsImpl<SIMD_T, SIMD_WIDTH>(
-        pDC,
-        pa,
-        workerId,
-        prim,
-        primMask,
-        primID,
-        viewportIdx,
-        rtIdx);
+        pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
  }
  
-void BinPoints(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    simdvector prim[3],
-    uint32_t primMask,
-    simdscalari const &primID,
-    simdscalari const &viewportIdx,
-    simdscalari const &rtIdx)
+void BinPoints(DRAW_CONTEXT*      pDC,
+               PA_STATE&          pa,
+               uint32_t           workerId,
+               simdvector         prim[3],
+               uint32_t           primMask,
+               simdscalari const& primID,
+               simdscalari const& viewportIdx,
+               simdscalari const& rtIdx)
  {
      BinPointsImpl<SIMD256, KNOB_SIMD_WIDTH>(
-        pDC,
-        pa,
-        workerId,
-        prim,
-        primMask,
-        primID,
-        viewportIdx,
-        rtIdx);
+        pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
  }
  
  #if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    simd16vector prim[3],
-    uint32_t primMask,
-    simd16scalari const &primID,
-    simd16scalari const &viewportIdx,
-    simd16scalari const & rtIdx)
+void SIMDCALL BinPoints_simd16(DRAW_CONTEXT*        pDC,
+                               PA_STATE&            pa,
+                               uint32_t             workerId,
+                               simd16vector         prim[3],
+                               uint32_t             primMask,
+                               simd16scalari const& primID,
+                               simd16scalari const& viewportIdx,
+                               simd16scalari const& rtIdx)
  {
      BinPointsImpl<SIMD512, KNOB_SIMD16_WIDTH>(
-        pDC,
-        pa,
-        workerId,
-        prim,
-        primMask,
-        primID,
-        viewportIdx,
-        rtIdx);
+        pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
  }
  
  #endif
@@ -1593,30 +1656,29 @@ void SIMDCALL BinPoints_simd16(
  /// @param primID - Primitive ID for each line.
  /// @param viewportIdx - Viewport Array Index for each line.
  template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void BinPostSetupLinesImpl(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    Vec4<SIMD_T> prim[],
-    Float<SIMD_T> recipW[],
-    uint32_t primMask,
-    Integer<SIMD_T> const &primID,
-    Integer<SIMD_T> const &viewportIdx,
-    Integer<SIMD_T> const &rtIdx)
+void BinPostSetupLinesImpl(DRAW_CONTEXT*          pDC,
+                           PA_STATE&              pa,
+                           uint32_t               workerId,
+                           Vec4<SIMD_T>           prim[],
+                           Float<SIMD_T>          recipW[],
+                           uint32_t               primMask,
+                           Integer<SIMD_T> const& primID,
+                           Integer<SIMD_T> const& viewportIdx,
+                           Integer<SIMD_T> const& rtIdx)
  {
-    const uint32_t *aRTAI = reinterpret_cast<const uint32_t *>(&rtIdx);
+    const uint32_t* aRTAI = reinterpret_cast<const uint32_t*>(&rtIdx);
  
      RDTSC_BEGIN(FEBinLines, pDC->drawId);
  
-    const API_STATE &state = GetApiState(pDC);
-    const SWR_RASTSTATE &rastState = state.rastState;
+    const API_STATE&     state     = GetApiState(pDC);
+    const SWR_RASTSTATE& rastState = state.rastState;
  
      // Select attribute processor
-    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(2,
-        state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
+    PFN_PROCESS_ATTRIBUTES pfnProcessAttribs = GetProcessAttributesFunc(
+        2, state.backendState.swizzleEnable, state.backendState.constantInterpolationMask);
  
-    Float<SIMD_T> &vRecipW0 = recipW[0];
-    Float<SIMD_T> &vRecipW1 = recipW[1];
+    Float<SIMD_T>& vRecipW0 = recipW[0];
+    Float<SIMD_T>& vRecipW1 = recipW[1];
  
      // convert to fixed point
      Integer<SIMD_T> vXi[2], vYi[2];
@@ -1627,19 +1689,20 @@ void BinPostSetupLinesImpl(
      vYi[1] = fpToFixedPointVertical<SIMD_T>(prim[1].y);
  
      // compute x-major vs y-major mask
-    Integer<SIMD_T> xLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vXi[0], vXi[1]));
-    Integer<SIMD_T> yLength = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vYi[0], vYi[1]));
-    Float<SIMD_T> vYmajorMask = SIMD_T::castsi_ps(SIMD_T::cmpgt_epi32(yLength, xLength));
-    uint32_t yMajorMask = SIMD_T::movemask_ps(vYmajorMask);
+    Integer<SIMD_T> xLength     = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vXi[0], vXi[1]));
+    Integer<SIMD_T> yLength     = SIMD_T::abs_epi32(SIMD_T::sub_epi32(vYi[0], vYi[1]));
+    Float<SIMD_T>   vYmajorMask = SIMD_T::castsi_ps(SIMD_T::cmpgt_epi32(yLength, xLength));
+    uint32_t        yMajorMask  = SIMD_T::movemask_ps(vYmajorMask);
  
      // cull zero-length lines
      Integer<SIMD_T> vZeroLengthMask = SIMD_T::cmpeq_epi32(xLength, SIMD_T::setzero_si());
-    vZeroLengthMask = SIMD_T::and_si(vZeroLengthMask, SIMD_T::cmpeq_epi32(yLength, SIMD_T::setzero_si()));
+    vZeroLengthMask =
+        SIMD_T::and_si(vZeroLengthMask, SIMD_T::cmpeq_epi32(yLength, SIMD_T::setzero_si()));
  
      primMask &= ~SIMD_T::movemask_ps(SIMD_T::castsi_ps(vZeroLengthMask));
  
-    uint32_t *pPrimID = (uint32_t *)&primID;
-    const uint32_t *pViewportIndex = (uint32_t *)&viewportIdx;
+    uint32_t*       pPrimID        = (uint32_t*)&primID;
+    const uint32_t* pViewportIndex = (uint32_t*)&viewportIdx;
  
      // Calc bounding box of lines
      SIMDBBOX_T<SIMD_T> bbox;
@@ -1649,7 +1712,7 @@ void BinPostSetupLinesImpl(
      bbox.ymax = SIMD_T::max_epi32(vYi[0], vYi[1]);
  
      // bloat bbox by line width along minor axis
-    Float<SIMD_T> vHalfWidth = SIMD_T::set1_ps(rastState.lineWidth / 2.0f);
+    Float<SIMD_T>   vHalfWidth  = SIMD_T::set1_ps(rastState.lineWidth / 2.0f);
      Integer<SIMD_T> vHalfWidthi = fpToFixedPointVertical<SIMD_T>(vHalfWidth);
  
      SIMDBBOX_T<SIMD_T> bloatBox;
@@ -1664,13 +1727,19 @@ void BinPostSetupLinesImpl(
      bbox.ymin = SIMD_T::blendv_epi32(bloatBox.ymin, bbox.ymin, vYmajorMask);
      bbox.ymax = SIMD_T::blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
  
-    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is
+    // exclusive.
      {
          Integer<SIMD_T> scisXmin, scisYmin, scisXmax, scisYmax;
  
          if (pa.viewportArrayActive)
          {
-            GatherScissors(&state.scissorsInFixedPoint[0], pViewportIndex, scisXmin, scisYmin, scisXmax, scisYmax);
+            GatherScissors(&state.scissorsInFixedPoint[0],
+                           pViewportIndex,
+                           scisXmin,
+                           scisYmin,
+                           scisXmax,
+                           scisYmax);
          }
          else // broadcast fast path for non-VPAI case.
          {
@@ -1682,17 +1751,20 @@ void BinPostSetupLinesImpl(
  
          bbox.xmin = SIMD_T::max_epi32(bbox.xmin, scisXmin);
          bbox.ymin = SIMD_T::max_epi32(bbox.ymin, scisYmin);
-        bbox.xmax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
-        bbox.ymax = SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
+        bbox.xmax =
+            SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.xmax, SIMD_T::set1_epi32(1)), scisXmax);
+        bbox.ymax =
+            SIMD_T::min_epi32(SIMD_T::sub_epi32(bbox.ymax, SIMD_T::set1_epi32(1)), scisYmax);
      }
  
      // Cull prims completely outside scissor
      {
          Integer<SIMD_T> maskOutsideScissorX = SIMD_T::cmpgt_epi32(bbox.xmin, bbox.xmax);
          Integer<SIMD_T> maskOutsideScissorY = SIMD_T::cmpgt_epi32(bbox.ymin, bbox.ymax);
-        Integer<SIMD_T> maskOutsideScissorXY = SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
+        Integer<SIMD_T> maskOutsideScissorXY =
+            SIMD_T::or_si(maskOutsideScissorX, maskOutsideScissorY);
          uint32_t maskOutsideScissor = SIMD_T::movemask_ps(SIMD_T::castsi_ps(maskOutsideScissorXY));
-        primMask = primMask & ~maskOutsideScissor;
+        primMask                    = primMask & ~maskOutsideScissor;
      }
  
      // transpose verts needed for backend
@@ -1713,34 +1785,35 @@ void BinPostSetupLinesImpl(
      bbox.xmax = SIMD_T::template srai_epi32<KNOB_MACROTILE_X_DIM_FIXED_SHIFT>(bbox.xmax);
      bbox.ymax = SIMD_T::template srai_epi32<KNOB_MACROTILE_Y_DIM_FIXED_SHIFT>(bbox.ymax);
  
-    OSALIGNSIMD16(uint32_t) aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
+    OSALIGNSIMD16(uint32_t)
+    aMTLeft[SIMD_WIDTH], aMTRight[SIMD_WIDTH], aMTTop[SIMD_WIDTH], aMTBottom[SIMD_WIDTH];
  
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTLeft),   bbox.xmin);
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTRight),  bbox.xmax);
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTTop),    bbox.ymin);
-    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T> *>(aMTBottom), bbox.ymax);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTLeft), bbox.xmin);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTRight), bbox.xmax);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTTop), bbox.ymin);
+    SIMD_T::store_si(reinterpret_cast<Integer<SIMD_T>*>(aMTBottom), bbox.ymax);
  
      TransposeVertices(vHorizX, prim[0].x, prim[1].x, SIMD_T::setzero_ps());
      TransposeVertices(vHorizY, prim[0].y, prim[1].y, SIMD_T::setzero_ps());
      TransposeVertices(vHorizZ, prim[0].z, prim[1].z, SIMD_T::setzero_ps());
-    TransposeVertices(vHorizW, vRecipW0,  vRecipW1,  SIMD_T::setzero_ps());
+    TransposeVertices(vHorizW, vRecipW0, vRecipW1, SIMD_T::setzero_ps());
  
      // scan remaining valid prims and bin each separately
      DWORD primIndex;
      while (_BitScanForward(&primIndex, primMask))
      {
-        uint32_t linkageCount = state.backendState.numAttributes;
+        uint32_t linkageCount     = state.backendState.numAttributes;
          uint32_t numScalarAttribs = linkageCount * 4;
  
          BE_WORK work;
          work.type = DRAW;
  
-        TRIANGLE_WORK_DESC &desc = work.desc.tri;
+        TRIANGLE_WORK_DESC& desc = work.desc.tri;
  
-        desc.triFlags.frontFacing = 1;
-        desc.triFlags.yMajor = (yMajorMask >> primIndex) & 1;
+        desc.triFlags.frontFacing            = 1;
+        desc.triFlags.yMajor                 = (yMajorMask >> primIndex) & 1;
          desc.triFlags.renderTargetArrayIndex = aRTAI[primIndex];
-        desc.triFlags.viewportIndex = pViewportIndex[primIndex];
+        desc.triFlags.viewportIndex          = pViewportIndex[primIndex];
  
          work.pfnWork = RasterizeLine;
  
@@ -1748,16 +1821,16 @@ void BinPostSetupLinesImpl(
          SWR_ASSERT(pArena != nullptr);
  
          // store active attribs
-        desc.pAttribs = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
+        desc.pAttribs   = (float*)pArena->AllocAligned(numScalarAttribs * 3 * sizeof(float), 16);
          desc.numAttribs = linkageCount;
          pfnProcessAttribs(pDC, pa, primIndex, pPrimID[primIndex], desc.pAttribs);
  
          // store line vertex data
          desc.pTriBuffer = (float*)pArena->AllocAligned(4 * 4 * sizeof(float), 16);
  
-        _mm_store_ps(&desc.pTriBuffer[0],  vHorizX[primIndex]);
-        _mm_store_ps(&desc.pTriBuffer[4],  vHorizY[primIndex]);
-        _mm_store_ps(&desc.pTriBuffer[8],  vHorizZ[primIndex]);
+        _mm_store_ps(&desc.pTriBuffer[0], vHorizX[primIndex]);
+        _mm_store_ps(&desc.pTriBuffer[4], vHorizY[primIndex]);
+        _mm_store_ps(&desc.pTriBuffer[8], vHorizZ[primIndex]);
          _mm_store_ps(&desc.pTriBuffer[12], vHorizW[primIndex]);
  
          // store user clip distances
@@ -1765,10 +1838,11 @@ void BinPostSetupLinesImpl(
          {
              uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
              desc.pUserClipBuffer = (float*)pArena->Alloc(numClipDist * 2 * sizeof(float));
-            ProcessUserClipDist<2>(state.backendState, pa, primIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
+            ProcessUserClipDist<2>(
+                state.backendState, pa, primIndex, &desc.pTriBuffer[12], desc.pUserClipBuffer);
          }
  
-        MacroTileMgr *pTileMgr = pDC->pTileMgr;
+        MacroTileMgr* pTileMgr = pDC->pTileMgr;
          for (uint32_t y = aMTTop[primIndex]; y <= aMTBottom[primIndex]; ++y)
          {
              for (uint32_t x = aMTLeft[primIndex]; x <= aMTRight[primIndex]; ++x)
@@ -1799,21 +1873,20 @@ endBinLines:
  /// @param primID - Primitive ID for each line.
  /// @param viewportIdx - Viewport Array Index for each line.
  template <typename SIMD_T, uint32_t SIMD_WIDTH>
-void SIMDCALL BinLinesImpl(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    Vec4<SIMD_T> prim[3],
-    uint32_t primMask,
-    Integer<SIMD_T> const &primID,
-    Integer<SIMD_T> const &viewportIdx,
-    Integer<SIMD_T> const & rtIdx)
+void SIMDCALL BinLinesImpl(DRAW_CONTEXT*          pDC,
+                           PA_STATE&              pa,
+                           uint32_t               workerId,
+                           Vec4<SIMD_T>           prim[3],
+                           uint32_t               primMask,
+                           Integer<SIMD_T> const& primID,
+                           Integer<SIMD_T> const& viewportIdx,
+                           Integer<SIMD_T> const& rtIdx)
  {
-    const API_STATE& state = GetApiState(pDC);
-    const SWR_RASTSTATE& rastState = state.rastState;
-    const SWR_FRONTEND_STATE& feState = state.frontendState;
+    const API_STATE&          state     = GetApiState(pDC);
+    const SWR_RASTSTATE&      rastState = state.rastState;
+    const SWR_FRONTEND_STATE& feState   = state.frontendState;
  
-    Float<SIMD_T> vRecipW[2] = { SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f) };
+    Float<SIMD_T> vRecipW[2] = {SIMD_T::set1_ps(1.0f), SIMD_T::set1_ps(1.0f)};
  
      if (!feState.vpTransformDisable)
      {
@@ -1851,42 +1924,34 @@ void SIMDCALL BinLinesImpl(
      prim[1].y = SIMD_T::add_ps(prim[1].y, offset);
  
      BinPostSetupLinesImpl<SIMD_T, SIMD_WIDTH>(
-        pDC,
-        pa,
-        workerId,
-        prim,
-        vRecipW,
-        primMask,
-        primID,
-        viewportIdx,
-        rtIdx);
+        pDC, pa, workerId, prim, vRecipW, primMask, primID, viewportIdx, rtIdx);
  }
  
-void BinLines(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    simdvector prim[],
-    uint32_t primMask,
-    simdscalari const &primID,
-    simdscalari const &viewportIdx,
-    simdscalari const &rtIdx)
+void BinLines(DRAW_CONTEXT*      pDC,
+              PA_STATE&          pa,
+              uint32_t           workerId,
+              simdvector         prim[],
+              uint32_t           primMask,
+              simdscalari const& primID,
+              simdscalari const& viewportIdx,
+              simdscalari const& rtIdx)
  {
-    BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
+    BinLinesImpl<SIMD256, KNOB_SIMD_WIDTH>(
+        pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
  }
  
  #if USE_SIMD16_FRONTEND
-void SIMDCALL BinLines_simd16(
-    DRAW_CONTEXT *pDC,
-    PA_STATE &pa,
-    uint32_t workerId,
-    simd16vector prim[3],
-    uint32_t primMask,
-    simd16scalari const &primID,
-    simd16scalari const &viewportIdx,
-    simd16scalari const &rtIdx)
+void SIMDCALL BinLines_simd16(DRAW_CONTEXT*        pDC,
+                              PA_STATE&            pa,
+                              uint32_t             workerId,
+                              simd16vector         prim[3],
+                              uint32_t             primMask,
+                              simd16scalari const& primID,
+                              simd16scalari const& viewportIdx,
+                              simd16scalari const& rtIdx)
  {
-    BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
+    BinLinesImpl<SIMD512, KNOB_SIMD16_WIDTH>(
+        pDC, pa, workerId, prim, primMask, primID, viewportIdx, rtIdx);
  }
  
  #endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.h b/src/gallium/drivers/swr/rasterizer/core/binner.h

index 443dac57fef6d73ad856a43cc357861e16076fba..f5f6d8074cb0dd71bf2b6bdd26c26a8ebf4a18f2 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/binner.h
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file binner.h
-*
-* @brief Declaration for the macrotile binner
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file binner.h
+ *
+ * @brief Declaration for the macrotile binner
+ *
+ ******************************************************************************/
  #include "state.h"
  #include "conservativeRast.h"
  #include "utils.h"
@@ -47,22 +47,23 @@ public:
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Convert the X,Y coords of a triangle to the requested Fixed 
+/// @brief Convert the X,Y coords of a triangle to the requested Fixed
  /// Point precision from FP32.
  template <typename SIMD_T, typename PT = FixedPointTraits<Fixed_16_8>>
-INLINE Integer<SIMD_T> fpToFixedPointVertical(const Float<SIMD_T> &vIn)
+INLINE Integer<SIMD_T> fpToFixedPointVertical(const Float<SIMD_T>& vIn)
  {
      return SIMD_T::cvtps_epi32(SIMD_T::mul_ps(vIn, SIMD_T::set1_ps(PT::ScaleT::value)));
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Helper function to set the X,Y coords of a triangle to the 
+/// @brief Helper function to set the X,Y coords of a triangle to the
  /// requested Fixed Point precision from FP32.
  /// @param tri: simdvector[3] of FP triangle verts
  /// @param vXi: fixed point X coords of tri verts
  /// @param vYi: fixed point Y coords of tri verts
  template <typename SIMD_T>
-INLINE static void FPToFixedPoint(const Vec4<SIMD_T> *const tri, Integer<SIMD_T>(&vXi)[3], Integer<SIMD_T>(&vYi)[3])
+INLINE static void
+FPToFixedPoint(const Vec4<SIMD_T>* const tri, Integer<SIMD_T> (&vXi)[3], Integer<SIMD_T> (&vYi)[3])
  {
      vXi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].x);
      vYi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].y);
@@ -78,10 +79,12 @@ INLINE static void FPToFixedPoint(const Vec4<SIMD_T> *const tri, Integer<SIMD_T>
  /// @param vX: fixed point X position for triangle verts
  /// @param vY: fixed point Y position for triangle verts
  /// @param bbox: fixed point bbox
-/// *Note*: expects vX, vY to be in the correct precision for the type 
+/// *Note*: expects vX, vY to be in the correct precision for the type
  /// of rasterization. This avoids unnecessary FP->fixed conversions.
  template <typename SIMD_T, typename CT>
-INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T>(&vX)[3], const Integer<SIMD_T>(&vY)[3], SIMDBBOX_T<SIMD_T> &bbox)
+INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T> (&vX)[3],
+                                       const Integer<SIMD_T> (&vY)[3],
+                                       SIMDBBOX_T<SIMD_T>& bbox)
  {
      Integer<SIMD_T> vMinX = vX[0];
  
@@ -105,8 +108,9 @@ INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T>(&vX)[3], const Inte
  
      if (CT::BoundingBoxOffsetT::value != 0)
      {
-        /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
-        /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
+        /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative
+        /// rasterization expand bbox by 1/256; coverage will be correctly handled in the
+        /// rasterizer.
  
          const Integer<SIMD_T> value = SIMD_T::set1_epi32(CT::BoundingBoxOffsetT::value);
  
@@ -132,119 +136,119 @@ INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T>(&vX)[3], const Inte
  /// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
  //
  /// @todo:  Look at speeding this up -- weigh against corresponding costs in rasterizer.
-static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
-    simdscalari &scisXmin, simdscalari &scisYmin, simdscalari &scisXmax, simdscalari &scisYmax)
+static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
+                           const uint32_t* pViewportIndex,
+                           simdscalari&    scisXmin,
+                           simdscalari&    scisYmin,
+                           simdscalari&    scisXmax,
+                           simdscalari&    scisYmax)
  {
-    scisXmin = _simd_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[7]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[6]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[5]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[4]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[3]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[2]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[1]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[0]].xmin);
-    scisYmin = _simd_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[7]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[6]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[5]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[4]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[3]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[2]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[1]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[0]].ymin);
-    scisXmax = _simd_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[7]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[6]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[5]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[4]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[3]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[2]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[1]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[0]].xmax);
-    scisYmax = _simd_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[7]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[6]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[5]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[4]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[3]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[2]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[1]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[0]].ymax);
+    scisXmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmin,
+                               pScissorsInFixedPoint[pViewportIndex[6]].xmin,
+                               pScissorsInFixedPoint[pViewportIndex[5]].xmin,
+                               pScissorsInFixedPoint[pViewportIndex[4]].xmin,
+                               pScissorsInFixedPoint[pViewportIndex[3]].xmin,
+                               pScissorsInFixedPoint[pViewportIndex[2]].xmin,
+                               pScissorsInFixedPoint[pViewportIndex[1]].xmin,
+                               pScissorsInFixedPoint[pViewportIndex[0]].xmin);
+    scisYmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymin,
+                               pScissorsInFixedPoint[pViewportIndex[6]].ymin,
+                               pScissorsInFixedPoint[pViewportIndex[5]].ymin,
+                               pScissorsInFixedPoint[pViewportIndex[4]].ymin,
+                               pScissorsInFixedPoint[pViewportIndex[3]].ymin,
+                               pScissorsInFixedPoint[pViewportIndex[2]].ymin,
+                               pScissorsInFixedPoint[pViewportIndex[1]].ymin,
+                               pScissorsInFixedPoint[pViewportIndex[0]].ymin);
+    scisXmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmax,
+                               pScissorsInFixedPoint[pViewportIndex[6]].xmax,
+                               pScissorsInFixedPoint[pViewportIndex[5]].xmax,
+                               pScissorsInFixedPoint[pViewportIndex[4]].xmax,
+                               pScissorsInFixedPoint[pViewportIndex[3]].xmax,
+                               pScissorsInFixedPoint[pViewportIndex[2]].xmax,
+                               pScissorsInFixedPoint[pViewportIndex[1]].xmax,
+                               pScissorsInFixedPoint[pViewportIndex[0]].xmax);
+    scisYmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymax,
+                               pScissorsInFixedPoint[pViewportIndex[6]].ymax,
+                               pScissorsInFixedPoint[pViewportIndex[5]].ymax,
+                               pScissorsInFixedPoint[pViewportIndex[4]].ymax,
+                               pScissorsInFixedPoint[pViewportIndex[3]].ymax,
+                               pScissorsInFixedPoint[pViewportIndex[2]].ymax,
+                               pScissorsInFixedPoint[pViewportIndex[1]].ymax,
+                               pScissorsInFixedPoint[pViewportIndex[0]].ymax);
  }
  
-static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
-    simd16scalari &scisXmin, simd16scalari &scisYmin, simd16scalari &scisXmax, simd16scalari &scisYmax)
+static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
+                           const uint32_t* pViewportIndex,
+                           simd16scalari&  scisXmin,
+                           simd16scalari&  scisYmin,
+                           simd16scalari&  scisXmax,
+                           simd16scalari&  scisYmax)
  {
-    scisXmin = _simd16_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[15]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[14]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[13]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[12]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[11]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[10]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[9]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[8]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[7]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[6]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[5]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[4]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[3]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[2]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[1]].xmin,
-        pScissorsInFixedPoint[pViewportIndex[0]].xmin);
-
-    scisYmin = _simd16_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[15]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[14]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[13]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[12]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[11]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[10]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[9]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[8]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[7]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[6]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[5]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[4]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[3]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[2]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[1]].ymin,
-        pScissorsInFixedPoint[pViewportIndex[0]].ymin);
-
-    scisXmax = _simd16_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[15]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[14]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[13]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[12]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[11]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[10]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[9]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[8]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[7]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[6]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[5]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[4]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[3]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[2]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[1]].xmax,
-        pScissorsInFixedPoint[pViewportIndex[0]].xmax);
-
-    scisYmax = _simd16_set_epi32(
-        pScissorsInFixedPoint[pViewportIndex[15]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[14]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[13]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[12]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[11]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[10]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[9]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[8]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[7]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[6]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[5]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[4]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[3]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[2]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[1]].ymax,
-        pScissorsInFixedPoint[pViewportIndex[0]].ymax);
+    scisXmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[14]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[13]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[12]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[11]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[10]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[9]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[8]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[7]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[6]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[5]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[4]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[3]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[2]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[1]].xmin,
+                                 pScissorsInFixedPoint[pViewportIndex[0]].xmin);
+
+    scisYmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[14]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[13]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[12]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[11]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[10]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[9]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[8]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[7]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[6]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[5]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[4]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[3]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[2]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[1]].ymin,
+                                 pScissorsInFixedPoint[pViewportIndex[0]].ymin);
+
+    scisXmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[14]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[13]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[12]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[11]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[10]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[9]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[8]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[7]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[6]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[5]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[4]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[3]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[2]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[1]].xmax,
+                                 pScissorsInFixedPoint[pViewportIndex[0]].xmax);
+
+    scisYmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[14]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[13]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[12]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[11]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[10]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[9]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[8]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[7]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[6]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[5]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[4]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[3]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[2]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[1]].ymax,
+                                 pScissorsInFixedPoint[pViewportIndex[0]].ymax);
  }
 \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/blend.h b/src/gallium/drivers/swr/rasterizer/core/blend.h

index c89c47646a3cd9e72dc973852f7a9830c357606e..7b2f77985f8a5df5be07c91a4cd557b73d530be1 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/blend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/blend.h
@@ -1,77 +1,82 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file blend.cpp
-*
-* @brief Implementation for blending operations.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file blend.cpp
+ *
+ * @brief Implementation for blending operations.
+ *
+ ******************************************************************************/
  #include "state.h"
  
-template<bool Color, bool Alpha>
-INLINE
-void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdvector &src, simdvector &src1, simdvector &dst, simdvector &out)
+template <bool Color, bool Alpha>
+INLINE void GenerateBlendFactor(SWR_BLEND_FACTOR func,
+                                simdvector&      constantColor,
+                                simdvector&      src,
+                                simdvector&      src1,
+                                simdvector&      dst,
+                                simdvector&      out)
  {
      simdvector result;
  
      switch (func)
      {
-    case BLENDFACTOR_ZERO: 
+    case BLENDFACTOR_ZERO:
          result.x = _simd_setzero_ps();
          result.y = _simd_setzero_ps();
          result.z = _simd_setzero_ps();
          result.w = _simd_setzero_ps();
          break;
  
-    case BLENDFACTOR_ONE: 
+    case BLENDFACTOR_ONE:
          result.x = _simd_set1_ps(1.0);
          result.y = _simd_set1_ps(1.0);
          result.z = _simd_set1_ps(1.0);
          result.w = _simd_set1_ps(1.0);
          break;
  
-    case BLENDFACTOR_SRC_COLOR: 
+    case BLENDFACTOR_SRC_COLOR:
          result = src;
          break;
  
-    case BLENDFACTOR_DST_COLOR: 
+    case BLENDFACTOR_DST_COLOR:
          result = dst;
          break;
  
-    case BLENDFACTOR_INV_SRC_COLOR: 
+    case BLENDFACTOR_INV_SRC_COLOR:
          result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
          result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
          result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
          result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
          break;
  
-    case BLENDFACTOR_INV_DST_COLOR: 
+    case BLENDFACTOR_INV_DST_COLOR:
          result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
          result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
          result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
          result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
          break;
  
-    case BLENDFACTOR_SRC_ALPHA: result.x = src.w;
+    case BLENDFACTOR_SRC_ALPHA:
+        result.x = src.w;
          result.y = src.w;
          result.z = src.w;
          result.w = src.w;
@@ -80,14 +85,15 @@ void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdv
      case BLENDFACTOR_INV_SRC_ALPHA:
      {
          simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
-        result.x = oneMinusSrcA;
-        result.y = oneMinusSrcA;
-        result.z = oneMinusSrcA;
-        result.w = oneMinusSrcA;
+        result.x                = oneMinusSrcA;
+        result.y                = oneMinusSrcA;
+        result.z                = oneMinusSrcA;
+        result.w                = oneMinusSrcA;
          break;
      }
  
-    case BLENDFACTOR_DST_ALPHA: result.x = dst.w;
+    case BLENDFACTOR_DST_ALPHA:
+        result.x = dst.w;
          result.y = dst.w;
          result.z = dst.w;
          result.w = dst.w;
@@ -96,20 +102,20 @@ void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdv
      case BLENDFACTOR_INV_DST_ALPHA:
      {
          simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
-        result.x = oneMinusDstA;
-        result.y = oneMinusDstA;
-        result.z = oneMinusDstA;
-        result.w = oneMinusDstA;
+        result.x                = oneMinusDstA;
+        result.y                = oneMinusDstA;
+        result.z                = oneMinusDstA;
+        result.w                = oneMinusDstA;
          break;
      }
  
      case BLENDFACTOR_SRC_ALPHA_SATURATE:
      {
          simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
-        result.x = sat;
-        result.y = sat;
-        result.z = sat;
-        result.w = _simd_set1_ps(1.0);
+        result.x       = sat;
+        result.y       = sat;
+        result.z       = sat;
+        result.w       = _simd_set1_ps(1.0);
          break;
      }
  
@@ -135,7 +141,8 @@ void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdv
  
      case BLENDFACTOR_INV_CONST_ALPHA:
      {
-        result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
+        result.x = result.y = result.z = result.w =
+            _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
          break;
      }
  
@@ -161,7 +168,8 @@ void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdv
          result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
          break;
  
-    default: SWR_INVALID("Unimplemented blend factor: %d", func);
+    default:
+        SWR_INVALID("Unimplemented blend factor: %d", func);
      }
  
      if (Color)
@@ -174,11 +182,15 @@ void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdv
      {
          out.w = result.w;
      }
-
  }
  
-template<bool Color, bool Alpha>
-INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFactor, simdvector &dst, simdvector &dstFactor, simdvector &out)
+template <bool Color, bool Alpha>
+INLINE void BlendFunc(SWR_BLEND_OP blendOp,
+                      simdvector&  src,
+                      simdvector&  srcFactor,
+                      simdvector&  dst,
+                      simdvector&  dstFactor,
+                      simdvector&  out)
  {
      simdvector result;
  
@@ -204,21 +216,21 @@ INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFact
          result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
          result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
          break;
-        
+
      case BLENDOP_MIN:
          result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
          result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
          result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
          result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
          break;
-        
+
      case BLENDOP_MAX:
          result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
          result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
          result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
          result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
          break;
-        
+
      default:
          SWR_INVALID("Unimplemented blend function: %d", blendOp);
      }
@@ -235,8 +247,8 @@ INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFact
      }
  }
  
-template<SWR_TYPE type>
-INLINE void Clamp(simdvector &src)
+template <SWR_TYPE type>
+INLINE void Clamp(simdvector& src)
  {
      switch (type)
      {
@@ -277,8 +289,13 @@ INLINE void Clamp(simdvector &src)
      }
  }
  
-template<SWR_TYPE type>
-void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector &src, simdvector& src1, uint8_t *pDst, simdvector &result)
+template <SWR_TYPE type>
+void Blend(const SWR_BLEND_STATE*               pBlendState,
+           const SWR_RENDER_TARGET_BLEND_STATE* pState,
+           simdvector&                          src,
+           simdvector&                          src1,
+           uint8_t*                             pDst,
+           simdvector&                          result)
  {
      // load render target
      simdvector dst;
@@ -299,20 +316,33 @@ void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STA
      simdvector srcFactor, dstFactor;
      if (pBlendState->independentAlphaBlendEnable)
      {
-        GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
-        GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor, constColor, src, src1, dst, srcFactor);
-
-        GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
-        GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
-
-        BlendFunc<true, false>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
-        BlendFunc<false, true>((SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
+        GenerateBlendFactor<true, false>(
+            (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
+        GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor,
+                                         constColor,
+                                         src,
+                                         src1,
+                                         dst,
+                                         srcFactor);
+
+        GenerateBlendFactor<true, false>(
+            (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
+        GenerateBlendFactor<false, true>(
+            (SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
+
+        BlendFunc<true, false>(
+            (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+        BlendFunc<false, true>(
+            (SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
      }
      else
      {
-        GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
-        GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
+        GenerateBlendFactor<true, true>(
+            (SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
+        GenerateBlendFactor<true, true>(
+            (SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
  
-        BlendFunc<true, true>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+        BlendFunc<true, true>(
+            (SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
      }
  }
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.cpp b/src/gallium/drivers/swr/rasterizer/core/clip.cpp

index e6c22180683dda70b610233359d852f4c98efc4a..8c53fca6432d3fc399822839dd8360e40ff5fedc 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/clip.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.cpp
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file clip.cpp
-*
-* @brief Implementation for clipping
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file clip.cpp
+ *
+ * @brief Implementation for clipping
+ *
+ ******************************************************************************/
  
  #include <assert.h>
  
@@ -42,115 +42,137 @@ float ComputeInterpFactor(float boundaryCoord0, float boundaryCoord1)
      return (boundaryCoord0 / (boundaryCoord0 - boundaryCoord1));
  }
  
-template<SWR_CLIPCODES ClippingPlane>
+template <SWR_CLIPCODES ClippingPlane>
  inline void intersect(
-    int s,                       // index to first edge vertex v0 in pInPts.
-    int p,                       // index to second edge vertex v1 in pInPts.
-    const float *pInPts,         // array of all the input positions.
-    const float *pInAttribs,     // array of all attributes for all vertex. All the attributes for each vertex is contiguous.
-    int numInAttribs,            // number of attributes per vertex.
-    int i,                       // output index.
-    float *pOutPts,              // array of output positions. We'll write our new intersection point at i*4.
-    float *pOutAttribs)          // array of output attributes. We'll write our new attributes at i*numInAttribs.
+    int          s,          // index to first edge vertex v0 in pInPts.
+    int          p,          // index to second edge vertex v1 in pInPts.
+    const float* pInPts,     // array of all the input positions.
+    const float* pInAttribs, // array of all attributes for all vertex. All the attributes for each
+                             // vertex is contiguous.
+    int    numInAttribs,     // number of attributes per vertex.
+    int    i,                // output index.
+    float* pOutPts,     // array of output positions. We'll write our new intersection point at i*4.
+    float* pOutAttribs) // array of output attributes. We'll write our new attributes at
+                        // i*numInAttribs.
  {
      float t;
  
      // Find the parameter of the intersection.
      //        t = (v1.w - v1.x) / ((v2.x - v1.x) - (v2.w - v1.w)) for x = w (RIGHT) plane, etc.
-    const float *v1 = &pInPts[s*4];
-    const float *v2 = &pInPts[p*4];
+    const float* v1 = &pInPts[s * 4];
+    const float* v2 = &pInPts[p * 4];
  
      switch (ClippingPlane)
      {
-    case FRUSTUM_LEFT:      t = ComputeInterpFactor(v1[3] + v1[0], v2[3] + v2[0]); break;
-    case FRUSTUM_RIGHT:     t = ComputeInterpFactor(v1[3] - v1[0], v2[3] - v2[0]); break;
-    case FRUSTUM_TOP:       t = ComputeInterpFactor(v1[3] + v1[1], v2[3] + v2[1]); break;
-    case FRUSTUM_BOTTOM:    t = ComputeInterpFactor(v1[3] - v1[1], v2[3] - v2[1]); break;
-    case FRUSTUM_NEAR:      t = ComputeInterpFactor(v1[2], v2[2]); break;
-    case FRUSTUM_FAR:       t = ComputeInterpFactor(v1[3] - v1[2], v2[3] - v2[2]); break;
-    default: SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
+    case FRUSTUM_LEFT:
+        t = ComputeInterpFactor(v1[3] + v1[0], v2[3] + v2[0]);
+        break;
+    case FRUSTUM_RIGHT:
+        t = ComputeInterpFactor(v1[3] - v1[0], v2[3] - v2[0]);
+        break;
+    case FRUSTUM_TOP:
+        t = ComputeInterpFactor(v1[3] + v1[1], v2[3] + v2[1]);
+        break;
+    case FRUSTUM_BOTTOM:
+        t = ComputeInterpFactor(v1[3] - v1[1], v2[3] - v2[1]);
+        break;
+    case FRUSTUM_NEAR:
+        t = ComputeInterpFactor(v1[2], v2[2]);
+        break;
+    case FRUSTUM_FAR:
+        t = ComputeInterpFactor(v1[3] - v1[2], v2[3] - v2[2]);
+        break;
+    default:
+        SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
      };
  
+    const float* a1 = &pInAttribs[s * numInAttribs];
+    const float* a2 = &pInAttribs[p * numInAttribs];
  
-    const float *a1 = &pInAttribs[s*numInAttribs];
-    const float *a2 = &pInAttribs[p*numInAttribs];
-
-    float *pOutP    = &pOutPts[i*4];
-    float *pOutA    = &pOutAttribs[i*numInAttribs];
+    float* pOutP = &pOutPts[i * 4];
+    float* pOutA = &pOutAttribs[i * numInAttribs];
  
      // Interpolate new position.
-    for(int j = 0; j < 4; ++j)
+    for (int j = 0; j < 4; ++j)
      {
-        pOutP[j] = v1[j] + (v2[j]-v1[j])*t;
+        pOutP[j] = v1[j] + (v2[j] - v1[j]) * t;
      }
  
      // Interpolate Attributes
-    for(int attr = 0; attr < numInAttribs; ++attr)
+    for (int attr = 0; attr < numInAttribs; ++attr)
      {
-        pOutA[attr] = a1[attr] + (a2[attr]-a1[attr])*t;
+        pOutA[attr] = a1[attr] + (a2[attr] - a1[attr]) * t;
      }
  }
  
-
  // Checks whether vertex v lies inside clipping plane
  // in homogenous coords check -w < {x,y,z} < w;
  //
-template<SWR_CLIPCODES ClippingPlane>
+template <SWR_CLIPCODES ClippingPlane>
  inline int inside(const float v[4])
  {
      switch (ClippingPlane)
      {
-    case FRUSTUM_LEFT   : return (v[0]>=-v[3]);
-    case FRUSTUM_RIGHT  : return (v[0]<= v[3]);
-    case FRUSTUM_TOP    : return (v[1]>=-v[3]);
-    case FRUSTUM_BOTTOM : return (v[1]<= v[3]);
-    case FRUSTUM_NEAR   : return (v[2]>=0.0f);
-    case FRUSTUM_FAR    : return (v[2]<= v[3]);
+    case FRUSTUM_LEFT:
+        return (v[0] >= -v[3]);
+    case FRUSTUM_RIGHT:
+        return (v[0] <= v[3]);
+    case FRUSTUM_TOP:
+        return (v[1] >= -v[3]);
+    case FRUSTUM_BOTTOM:
+        return (v[1] <= v[3]);
+    case FRUSTUM_NEAR:
+        return (v[2] >= 0.0f);
+    case FRUSTUM_FAR:
+        return (v[2] <= v[3]);
      default:
          SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
          return 0;
      }
  }
  
-
  // Clips a polygon in homogenous coordinates to a particular clipping plane.
  // Takes in vertices of the polygon (InPts) and the clipping plane
  // Puts the vertices of the clipped polygon in OutPts
  // Returns number of points in clipped polygon
  //
-template<SWR_CLIPCODES ClippingPlane>
-int ClipTriToPlane( const float *pInPts, int numInPts,
-                    const float *pInAttribs, int numInAttribs,
-                    float *pOutPts, float *pOutAttribs)
+template <SWR_CLIPCODES ClippingPlane>
+int ClipTriToPlane(const float* pInPts,
+                   int          numInPts,
+                   const float* pInAttribs,
+                   int          numInAttribs,
+                   float*       pOutPts,
+                   float*       pOutAttribs)
  {
-    int i=0; // index number of OutPts, # of vertices in OutPts = i div 4;
+    int i = 0; // index number of OutPts, # of vertices in OutPts = i div 4;
  
      for (int j = 0; j < numInPts; ++j)
      {
          int s = j;
          int p = (j + 1) % numInPts;
  
-        int s_in = inside<ClippingPlane>(&pInPts[s*4]);
-        int p_in = inside<ClippingPlane>(&pInPts[p*4]);
+        int s_in = inside<ClippingPlane>(&pInPts[s * 4]);
+        int p_in = inside<ClippingPlane>(&pInPts[p * 4]);
  
          // test if vertex is to be added to output vertices
-        if (s_in != p_in)  // edge crosses clipping plane
+        if (s_in != p_in) // edge crosses clipping plane
          {
              // find point of intersection
-            intersect<ClippingPlane>(s, p, pInPts, pInAttribs, numInAttribs, i, pOutPts, pOutAttribs);
+            intersect<ClippingPlane>(
+                s, p, pInPts, pInAttribs, numInAttribs, i, pOutPts, pOutAttribs);
              i++;
          }
          if (p_in) // 2nd vertex is inside clipping volume, add it to output
          {
              // Copy 2nd vertex position of edge over to output.
-            for(int k = 0; k < 4; ++k)
+            for (int k = 0; k < 4; ++k)
              {
-                pOutPts[i*4 + k] = pInPts[p*4 + k];
+                pOutPts[i * 4 + k] = pInPts[p * 4 + k];
              }
              // Copy 2nd vertex attributes of edge over to output.
-            for(int attr = 0; attr < numInAttribs; ++attr)
+            for (int attr = 0; attr < numInAttribs; ++attr)
              {
-                pOutAttribs[i*numInAttribs+attr] = pInAttribs[p*numInAttribs+attr];
+                pOutAttribs[i * numInAttribs + attr] = pInAttribs[p * numInAttribs + attr];
              }
              i++;
          }
@@ -160,8 +182,14 @@ int ClipTriToPlane( const float *pInPts, int numInPts,
      return i;
  }
  
-void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
-    simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipRectangles(DRAW_CONTEXT*      pDC,
+                    PA_STATE&          pa,
+                    uint32_t           workerId,
+                    simdvector         prims[],
+                    uint32_t           primMask,
+                    simdscalari const& primId,
+                    simdscalari const& viewportIdx,
+                    simdscalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
      Clipper<SIMD256, 3> clipper(workerId, pDC);
@@ -169,8 +197,14 @@ void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvect
      RDTSC_END(FEClipRectangles, 1);
  }
  
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
-                   simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipTriangles(DRAW_CONTEXT*      pDC,
+                   PA_STATE&          pa,
+                   uint32_t           workerId,
+                   simdvector         prims[],
+                   uint32_t           primMask,
+                   simdscalari const& primId,
+                   simdscalari const& viewportIdx,
+                   simdscalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
      Clipper<SIMD256, 3> clipper(workerId, pDC);
@@ -178,8 +212,14 @@ void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvecto
      RDTSC_END(FEClipTriangles, 1);
  }
  
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
-               simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipLines(DRAW_CONTEXT*      pDC,
+               PA_STATE&          pa,
+               uint32_t           workerId,
+               simdvector         prims[],
+               uint32_t           primMask,
+               simdscalari const& primId,
+               simdscalari const& viewportIdx,
+               simdscalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipLines, pDC->drawId);
      Clipper<SIMD256, 2> clipper(workerId, pDC);
@@ -187,8 +227,14 @@ void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector pr
      RDTSC_END(FEClipLines, 1);
  }
  
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask,
-                simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx)
+void ClipPoints(DRAW_CONTEXT*      pDC,
+                PA_STATE&          pa,
+                uint32_t           workerId,
+                simdvector         prims[],
+                uint32_t           primMask,
+                simdscalari const& primId,
+                simdscalari const& viewportIdx,
+                simdscalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipPoints, pDC->drawId);
      Clipper<SIMD256, 1> clipper(workerId, pDC);
@@ -197,12 +243,21 @@ void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector p
  }
  
  #if USE_SIMD16_FRONTEND
-void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
-    simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT*        pDC,
+                                    PA_STATE&            pa,
+                                    uint32_t             workerId,
+                                    simd16vector         prims[],
+                                    uint32_t             primMask,
+                                    simd16scalari const& primId,
+                                    simd16scalari const& viewportIdx,
+                                    simd16scalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipRectangles, pDC->drawId);
  
-    enum { VERTS_PER_PRIM = 3 };
+    enum
+    {
+        VERTS_PER_PRIM = 3
+    };
  
      Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
  
@@ -212,12 +267,21 @@ void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wo
      RDTSC_END(FEClipRectangles, 1);
  }
  
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
-                                   simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT*        pDC,
+                                   PA_STATE&            pa,
+                                   uint32_t             workerId,
+                                   simd16vector         prims[],
+                                   uint32_t             primMask,
+                                   simd16scalari const& primId,
+                                   simd16scalari const& viewportIdx,
+                                   simd16scalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipTriangles, pDC->drawId);
  
-    enum { VERTS_PER_PRIM = 3 };
+    enum
+    {
+        VERTS_PER_PRIM = 3
+    };
  
      Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
  
@@ -227,12 +291,21 @@ void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t wor
      RDTSC_END(FEClipTriangles, 1);
  }
  
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
-                               simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT*        pDC,
+                               PA_STATE&            pa,
+                               uint32_t             workerId,
+                               simd16vector         prims[],
+                               uint32_t             primMask,
+                               simd16scalari const& primId,
+                               simd16scalari const& viewportIdx,
+                               simd16scalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipLines, pDC->drawId);
  
-    enum { VERTS_PER_PRIM = 2 };
+    enum
+    {
+        VERTS_PER_PRIM = 2
+    };
  
      Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
  
@@ -242,12 +315,21 @@ void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerI
      RDTSC_END(FEClipLines, 1);
  }
  
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask,
-                                simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx)
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT*        pDC,
+                                PA_STATE&            pa,
+                                uint32_t             workerId,
+                                simd16vector         prims[],
+                                uint32_t             primMask,
+                                simd16scalari const& primId,
+                                simd16scalari const& viewportIdx,
+                                simd16scalari const& rtIdx)
  {
      RDTSC_BEGIN(FEClipPoints, pDC->drawId);
  
-    enum { VERTS_PER_PRIM = 1 };
+    enum
+    {
+        VERTS_PER_PRIM = 1
+    };
  
      Clipper<SIMD512, VERTS_PER_PRIM> clipper(workerId, pDC);
  
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h

index 90ae426357554c9643a974b62c1c1f9ee0cfb223..7b4ed58c3fa28cca4c1021da71ca8af0da7f8cca 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file clip.h
-*
-* @brief Definitions for clipping
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file clip.h
+ *
+ * @brief Definitions for clipping
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "common/simdintrin.h"
@@ -40,18 +40,19 @@ extern THREAD SIMDVERTEX_T<SIMD512> tlsTempVertices_simd16[7];
  
  enum SWR_CLIPCODES
  {
-    // Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
-    // Guardband is able to use a single high-bit with 4 separate LSBs, because it computes a union, rather than intersection, of clipcodes.
+// Shift clip codes out of the mantissa to prevent denormalized values when used in float compare.
+// Guardband is able to use a single high-bit with 4 separate LSBs, because it computes a union,
+// rather than intersection, of clipcodes.
  #define CLIPCODE_SHIFT 23
-    FRUSTUM_LEFT    = (0x01 << CLIPCODE_SHIFT),
-    FRUSTUM_TOP     = (0x02 << CLIPCODE_SHIFT),
-    FRUSTUM_RIGHT   = (0x04 << CLIPCODE_SHIFT),
-    FRUSTUM_BOTTOM  = (0x08 << CLIPCODE_SHIFT),
+    FRUSTUM_LEFT   = (0x01 << CLIPCODE_SHIFT),
+    FRUSTUM_TOP    = (0x02 << CLIPCODE_SHIFT),
+    FRUSTUM_RIGHT  = (0x04 << CLIPCODE_SHIFT),
+    FRUSTUM_BOTTOM = (0x08 << CLIPCODE_SHIFT),
  
-    FRUSTUM_NEAR    = (0x10 << CLIPCODE_SHIFT),
-    FRUSTUM_FAR     = (0x20 << CLIPCODE_SHIFT),
+    FRUSTUM_NEAR = (0x10 << CLIPCODE_SHIFT),
+    FRUSTUM_FAR  = (0x20 << CLIPCODE_SHIFT),
  
-    NEGW            = (0x40 << CLIPCODE_SHIFT),
+    NEGW = (0x40 << CLIPCODE_SHIFT),
  
      GUARDBAND_LEFT   = (0x80 << CLIPCODE_SHIFT | 0x1),
      GUARDBAND_TOP    = (0x80 << CLIPCODE_SHIFT | 0x2),
@@ -59,32 +60,41 @@ enum SWR_CLIPCODES
      GUARDBAND_BOTTOM = (0x80 << CLIPCODE_SHIFT | 0x8)
  };
  
-#define GUARDBAND_CLIP_MASK (FRUSTUM_NEAR|FRUSTUM_FAR|GUARDBAND_LEFT|GUARDBAND_TOP|GUARDBAND_RIGHT|GUARDBAND_BOTTOM|NEGW)
-#define FRUSTUM_CLIP_MASK (FRUSTUM_NEAR|FRUSTUM_FAR|FRUSTUM_LEFT|FRUSTUM_RIGHT|FRUSTUM_TOP|FRUSTUM_BOTTOM)
-
-template<typename SIMD_T>
-void ComputeClipCodes(const API_STATE &state, const Vec4<SIMD_T> &vertex, Float<SIMD_T> &clipCodes, Integer<SIMD_T> const &viewportIndexes)
+#define GUARDBAND_CLIP_MASK                                                          \
+    (FRUSTUM_NEAR | FRUSTUM_FAR | GUARDBAND_LEFT | GUARDBAND_TOP | GUARDBAND_RIGHT | \
+     GUARDBAND_BOTTOM | NEGW)
+#define FRUSTUM_CLIP_MASK \
+    (FRUSTUM_NEAR | FRUSTUM_FAR | FRUSTUM_LEFT | FRUSTUM_RIGHT | FRUSTUM_TOP | FRUSTUM_BOTTOM)
+
+template <typename SIMD_T>
+void ComputeClipCodes(const API_STATE&       state,
+                      const Vec4<SIMD_T>&    vertex,
+                      Float<SIMD_T>&         clipCodes,
+                      Integer<SIMD_T> const& viewportIndexes)
  {
      clipCodes = SIMD_T::setzero_ps();
  
      // -w
-    Float<SIMD_T> vNegW = SIMD_T::mul_ps(vertex.w,SIMD_T::set1_ps(-1.0f));
+    Float<SIMD_T> vNegW = SIMD_T::mul_ps(vertex.w, SIMD_T::set1_ps(-1.0f));
  
      // FRUSTUM_LEFT
      Float<SIMD_T> vRes = SIMD_T::cmplt_ps(vertex.x, vNegW);
-    clipCodes = SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_LEFT)));
+    clipCodes          = SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_LEFT)));
  
      // FRUSTUM_TOP
-    vRes = SIMD_T::cmplt_ps(vertex.y, vNegW);
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_TOP))));
+    vRes      = SIMD_T::cmplt_ps(vertex.y, vNegW);
+    clipCodes = SIMD_T::or_ps(
+        clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_TOP))));
  
      // FRUSTUM_RIGHT
-    vRes = SIMD_T::cmpgt_ps(vertex.x, vertex.w);
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_RIGHT))));
+    vRes      = SIMD_T::cmpgt_ps(vertex.x, vertex.w);
+    clipCodes = SIMD_T::or_ps(
+        clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_RIGHT))));
  
      // FRUSTUM_BOTTOM
-    vRes = SIMD_T::cmpgt_ps(vertex.y, vertex.w);
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_BOTTOM))));
+    vRes      = SIMD_T::cmpgt_ps(vertex.y, vertex.w);
+    clipCodes = SIMD_T::or_ps(
+        clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_BOTTOM))));
  
      if (state.rastState.depthClipEnable)
      {
@@ -98,50 +108,66 @@ void ComputeClipCodes(const API_STATE &state, const Vec4<SIMD_T> &vertex, Float<
          {
              vRes = SIMD_T::cmplt_ps(vertex.z, vNegW);
          }
-        clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_NEAR))));
+        clipCodes = SIMD_T::or_ps(
+            clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_NEAR))));
  
          // FRUSTUM_FAR
-        vRes = SIMD_T::cmpgt_ps(vertex.z, vertex.w);
-        clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_FAR))));
+        vRes      = SIMD_T::cmpgt_ps(vertex.z, vertex.w);
+        clipCodes = SIMD_T::or_ps(
+            clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_FAR))));
      }
  
      // NEGW
      vRes = SIMD_T::cmple_ps(vertex.w, SIMD_T::setzero_ps());
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(NEGW))));
+    clipCodes =
+        SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(NEGW))));
  
      // GUARDBAND_LEFT
-    Float<SIMD_T> gbMult = SIMD_T::mul_ps(vNegW, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.left[0], viewportIndexes));
-    vRes = SIMD_T::cmplt_ps(vertex.x, gbMult);
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_LEFT))));
+    Float<SIMD_T> gbMult = SIMD_T::mul_ps(vNegW,
+                                          SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+                                              &state.gbState.left[0], viewportIndexes));
+    vRes                 = SIMD_T::cmplt_ps(vertex.x, gbMult);
+    clipCodes            = SIMD_T::or_ps(
+        clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_LEFT))));
  
      // GUARDBAND_TOP
-    gbMult = SIMD_T::mul_ps(vNegW, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.top[0], viewportIndexes));
-    vRes = SIMD_T::cmplt_ps(vertex.y, gbMult);
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_TOP))));
+    gbMult    = SIMD_T::mul_ps(vNegW,
+                            SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+                                &state.gbState.top[0], viewportIndexes));
+    vRes      = SIMD_T::cmplt_ps(vertex.y, gbMult);
+    clipCodes = SIMD_T::or_ps(
+        clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_TOP))));
  
      // GUARDBAND_RIGHT
-    gbMult = SIMD_T::mul_ps(vertex.w, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.right[0], viewportIndexes));
-    vRes = SIMD_T::cmpgt_ps(vertex.x, gbMult);
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_RIGHT))));
+    gbMult    = SIMD_T::mul_ps(vertex.w,
+                            SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+                                &state.gbState.right[0], viewportIndexes));
+    vRes      = SIMD_T::cmpgt_ps(vertex.x, gbMult);
+    clipCodes = SIMD_T::or_ps(
+        clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_RIGHT))));
  
      // GUARDBAND_BOTTOM
-    gbMult = SIMD_T::mul_ps(vertex.w, SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(&state.gbState.bottom[0], viewportIndexes));
-    vRes = SIMD_T::cmpgt_ps(vertex.y, gbMult);
-    clipCodes = SIMD_T::or_ps(clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_BOTTOM))));
+    gbMult    = SIMD_T::mul_ps(vertex.w,
+                            SIMD_T::template i32gather_ps<ScaleFactor<SIMD_T>(4)>(
+                                &state.gbState.bottom[0], viewportIndexes));
+    vRes      = SIMD_T::cmpgt_ps(vertex.y, gbMult);
+    clipCodes = SIMD_T::or_ps(
+        clipCodes, SIMD_T::and_ps(vRes, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_BOTTOM))));
  }
  
-template<typename SIMD_T>
+template <typename SIMD_T>
  struct BinnerChooser
  {
  };
  
-template<>
+template <>
  struct BinnerChooser<SIMD256>
  {
      PFN_PROCESS_PRIMS pfnBinFunc;
  
      BinnerChooser(uint32_t numVertsPerPrim, uint32_t conservativeRast)
-        :pfnBinFunc(nullptr)
+        :
+        pfnBinFunc(nullptr)
      {
          if (numVertsPerPrim == 3)
          {
@@ -159,7 +185,8 @@ struct BinnerChooser<SIMD256>
      }
  
      BinnerChooser(PRIMITIVE_TOPOLOGY topology, uint32_t conservativeRast)
-        :pfnBinFunc(nullptr)
+        :
+        pfnBinFunc(nullptr)
      {
          switch (topology)
          {
@@ -179,7 +206,14 @@ struct BinnerChooser<SIMD256>
          };
      }
  
-    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD256::Vec4 prims[], uint32_t primMask, SIMD256::Integer const &primID, SIMD256::Integer &viewportIdx, SIMD256::Integer &rtIdx)
+    void BinFunc(DRAW_CONTEXT*           pDC,
+                 PA_STATE&               pa,
+                 uint32_t                workerId,
+                 SIMD256::Vec4           prims[],
+                 uint32_t                primMask,
+                 SIMD256::Integer const& primID,
+                 SIMD256::Integer&       viewportIdx,
+                 SIMD256::Integer&       rtIdx)
      {
          SWR_ASSERT(pfnBinFunc != nullptr);
  
@@ -188,13 +222,14 @@ struct BinnerChooser<SIMD256>
  };
  
  #if USE_SIMD16_FRONTEND
-template<>
+template <>
  struct BinnerChooser<SIMD512>
  {
      PFN_PROCESS_PRIMS_SIMD16 pfnBinFunc;
  
      BinnerChooser(uint32_t numVertsPerPrim, uint32_t conservativeRast)
-        :pfnBinFunc(nullptr)
+        :
+        pfnBinFunc(nullptr)
      {
          if (numVertsPerPrim == 3)
          {
@@ -212,7 +247,8 @@ struct BinnerChooser<SIMD512>
      }
  
      BinnerChooser(PRIMITIVE_TOPOLOGY topology, uint32_t conservativeRast)
-        :pfnBinFunc(nullptr)
+        :
+        pfnBinFunc(nullptr)
      {
          switch (topology)
          {
@@ -232,7 +268,14 @@ struct BinnerChooser<SIMD512>
          };
      }
  
-    void BinFunc(DRAW_CONTEXT *pDC, PA_STATE &pa, uint32_t workerId, SIMD512::Vec4 prims[], uint32_t primMask, SIMD512::Integer const &primID, SIMD512::Integer &viewportIdx, SIMD512::Integer &rtIdx)
+    void BinFunc(DRAW_CONTEXT*           pDC,
+                 PA_STATE&               pa,
+                 uint32_t                workerId,
+                 SIMD512::Vec4           prims[],
+                 uint32_t                primMask,
+                 SIMD512::Integer const& primID,
+                 SIMD512::Integer&       viewportIdx,
+                 SIMD512::Integer&       rtIdx)
      {
          SWR_ASSERT(pfnBinFunc != nullptr);
  
@@ -241,18 +284,15 @@ struct BinnerChooser<SIMD512>
  };
  
  #endif
-template<typename SIMD_T>
+template <typename SIMD_T>
  struct SimdHelper
  {
  };
  
-template<>
+template <>
  struct SimdHelper<SIMD256>
  {
-    static SIMD256::Float insert_lo_ps(SIMD256::Float a)
-    {
-        return a;
-    }
+    static SIMD256::Float insert_lo_ps(SIMD256::Float a) { return a; }
  
      static SIMD256::Mask cmpeq_ps_mask(SIMD256::Float a, SIMD256::Float b)
      {
@@ -261,7 +301,7 @@ struct SimdHelper<SIMD256>
  };
  
  #if USE_SIMD16_FRONTEND
-template<>
+template <>
  struct SimdHelper<SIMD512>
  {
      static SIMD512::Float insert_lo_ps(SIMD256::Float a)
@@ -277,32 +317,26 @@ struct SimdHelper<SIMD512>
  
  #endif
  // Temp storage used by the clipper
-template<typename SIMD_T>
+template <typename SIMD_T>
  struct ClipHelper
  {
  };
  
-template<>
+template <>
  struct ClipHelper<SIMD256>
  {
-    static SIMDVERTEX_T<SIMD256> *GetTempVertices()
-    {
-        return tlsTempVertices;
-    }
+    static SIMDVERTEX_T<SIMD256>* GetTempVertices() { return tlsTempVertices; }
  };
  
  #if USE_SIMD16_FRONTEND
-template<>
+template <>
  struct ClipHelper<SIMD512>
  {
-    static SIMDVERTEX_T<SIMD512> *GetTempVertices()
-    {
-        return tlsTempVertices_simd16;
-    }
+    static SIMDVERTEX_T<SIMD512>* GetTempVertices() { return tlsTempVertices_simd16; }
  };
  
  #endif
-template<typename SIMD_T, uint32_t NumVertsPerPrim>
+template <typename SIMD_T, uint32_t NumVertsPerPrim>
  class Clipper
  {
  public:
@@ -312,7 +346,7 @@ public:
          static_assert(NumVertsPerPrim >= 1 && NumVertsPerPrim <= 3, "Invalid NumVertsPerPrim");
      }
  
-    void ComputeClipCodes(Vec4<SIMD_T> vertex[], const Integer<SIMD_T> &viewportIndexes)
+    void ComputeClipCodes(Vec4<SIMD_T> vertex[], const Integer<SIMD_T>& viewportIndexes)
      {
          for (uint32_t i = 0; i < NumVertsPerPrim; ++i)
          {
@@ -348,7 +382,8 @@ public:
      {
          Float<SIMD_T> clipUnion = ComputeClipCodeUnion();
  
-        clipUnion = SIMD_T::and_ps(clipUnion, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_CLIP_MASK)));
+        clipUnion =
+            SIMD_T::and_ps(clipUnion, SIMD_T::castsi_ps(SIMD_T::set1_epi32(GUARDBAND_CLIP_MASK)));
  
          return SIMD_T::movemask_ps(SIMD_T::cmpneq_ps(clipUnion, SIMD_T::setzero_ps()));
      }
@@ -360,19 +395,21 @@ public:
  
          for (uint32_t e = 0; e < NumVertsPerPrim; ++e)
          {
-            Float<SIMD_T> vNan01 = SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[0], prim[e].v[1]);
+            Float<SIMD_T> vNan01 =
+                SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[0], prim[e].v[1]);
              vNanMask = SIMD_T::or_ps(vNanMask, vNan01);
  
-            Float<SIMD_T> vNan23 = SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[2], prim[e].v[3]);
+            Float<SIMD_T> vNan23 =
+                SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(prim[e].v[2], prim[e].v[3]);
              vNanMask = SIMD_T::or_ps(vNanMask, vNan23);
          }
  
          return SIMD_T::movemask_ps(vNanMask);
      }
  
-    int ComputeUserClipCullMask(PA_STATE &pa, Vec4<SIMD_T> prim[])
+    int ComputeUserClipCullMask(PA_STATE& pa, Vec4<SIMD_T> prim[])
      {
-        uint8_t cullMask = state.backendState.cullDistanceMask;
+        uint8_t  cullMask             = state.backendState.cullDistanceMask;
          uint32_t vertexClipCullOffset = state.backendState.vertexClipCullOffset;
  
          Float<SIMD_T> vClipCullMask = SIMD_T::setzero_ps();
@@ -387,7 +424,7 @@ public:
          while (_BitScanForward(&index, cullMask))
          {
              cullMask &= ~(1 << index);
-            uint32_t slot = index >> 2;
+            uint32_t slot      = index >> 2;
              uint32_t component = index & 0x3;
  
              Float<SIMD_T> vCullMaskElem = SIMD_T::set1_ps(-1.0f);
@@ -404,7 +441,8 @@ public:
                  }
  
                  // cull if cull distance < 0 || NAN
-                Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(SIMD_T::setzero_ps(), vCullComp);
+                Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(
+                    SIMD_T::setzero_ps(), vCullComp);
                  vCullMaskElem = SIMD_T::and_ps(vCullMaskElem, vCull);
              }
              vClipCullMask = SIMD_T::or_ps(vClipCullMask, vCullMaskElem);
@@ -415,7 +453,7 @@ public:
          while (_BitScanForward(&index, clipMask))
          {
              clipMask &= ~(1 << index);
-            uint32_t slot = index >> 2;
+            uint32_t slot      = index >> 2;
              uint32_t component = index & 0x3;
  
              Float<SIMD_T> vCullMaskElem = SIMD_T::set1_ps(-1.0f);
@@ -431,8 +469,10 @@ public:
                      vClipComp = vClipCullDistHi[e][component];
                  }
  
-                Float<SIMD_T> vClip = SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(vClipComp, vClipComp);
-                Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(SIMD_T::setzero_ps(), vClipComp);
+                Float<SIMD_T> vClip =
+                    SIMD_T::template cmp_ps<SIMD_T::CompareType::UNORD_Q>(vClipComp, vClipComp);
+                Float<SIMD_T> vCull = SIMD_T::template cmp_ps<SIMD_T::CompareType::NLE_UQ>(
+                    SIMD_T::setzero_ps(), vClipComp);
                  vCullMaskElem = SIMD_T::and_ps(vCullMaskElem, vCull);
                  vClipCullMask = SIMD_T::or_ps(vClipCullMask, vClip);
              }
@@ -442,14 +482,19 @@ public:
          return SIMD_T::movemask_ps(vClipCullMask);
      }
  
-    void ClipSimd(const Vec4<SIMD_T> prim[], const Float<SIMD_T> &vPrimMask, const Float<SIMD_T> &vClipMask, PA_STATE &pa,
-                  const Integer<SIMD_T> &vPrimId, const Integer<SIMD_T> &vViewportIdx, const Integer<SIMD_T> &vRtIdx)
+    void ClipSimd(const Vec4<SIMD_T>     prim[],
+                  const Float<SIMD_T>&   vPrimMask,
+                  const Float<SIMD_T>&   vClipMask,
+                  PA_STATE&              pa,
+                  const Integer<SIMD_T>& vPrimId,
+                  const Integer<SIMD_T>& vViewportIdx,
+                  const Integer<SIMD_T>& vRtIdx)
      {
          // input/output vertex store for clipper
          SIMDVERTEX_T<SIMD_T> vertices[7]; // maximum 7 verts generated per triangle
  
          uint32_t constantInterpMask = state.backendState.constantInterpolationMask;
-        uint32_t provokingVertex = 0;
+        uint32_t provokingVertex    = 0;
          if (pa.binTopology == TOP_TRIANGLE_FAN)
          {
              provokingVertex = state.frontendState.provokingVertex.triFan;
@@ -470,8 +515,9 @@ public:
          for (uint32_t slot = 0; slot < backendState.numAttributes; ++slot)
          {
              // Compute absolute attrib slot in vertex array
-            uint32_t mapSlot = backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
-            maxSlot = std::max<int32_t>(maxSlot, mapSlot);
+            uint32_t mapSlot =
+                backendState.swizzleEnable ? backendState.swizzleMap[slot].sourceAttrib : slot;
+            maxSlot            = std::max<int32_t>(maxSlot, mapSlot);
              uint32_t inputSlot = backendState.vertexAttribOffset + mapSlot;
  
              pa.Assemble(inputSlot, tmpVector);
@@ -516,9 +562,11 @@ public:
  
          uint32_t numAttribs = maxSlot + 1;
  
-        Integer<SIMD_T> vNumClippedVerts = ClipPrims((float*)&vertices[0], vPrimMask, vClipMask, numAttribs);
+        Integer<SIMD_T> vNumClippedVerts =
+            ClipPrims((float*)&vertices[0], vPrimMask, vClipMask, numAttribs);
  
-        BinnerChooser<SIMD_T> binner(NumVertsPerPrim, pa.pDC->pState->state.rastState.conservativeRast);
+        BinnerChooser<SIMD_T> binner(NumVertsPerPrim,
+                                     pa.pDC->pState->state.rastState.conservativeRast);
  
          // set up new PA for binning clipped primitives
          PRIMITIVE_TOPOLOGY clipTopology = TOP_UNKNOWN;
@@ -545,20 +593,20 @@ public:
              SWR_ASSERT(0 && "Unexpected points in clipper.");
          }
  
-        const uint32_t *pVertexCount = reinterpret_cast<const uint32_t *>(&vNumClippedVerts);
-        const uint32_t *pPrimitiveId = reinterpret_cast<const uint32_t *>(&vPrimId);
-        const uint32_t *pViewportIdx = reinterpret_cast<const uint32_t *>(&vViewportIdx);
-        const uint32_t *pRtIdx = reinterpret_cast<const uint32_t *>(&vRtIdx);
-
-        const SIMD256::Integer vOffsets = SIMD256::set_epi32(
-            0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
-            6 * sizeof(SIMDVERTEX_T<SIMD_T>),
-            5 * sizeof(SIMDVERTEX_T<SIMD_T>),
-            4 * sizeof(SIMDVERTEX_T<SIMD_T>),
-            3 * sizeof(SIMDVERTEX_T<SIMD_T>),
-            2 * sizeof(SIMDVERTEX_T<SIMD_T>),
-            1 * sizeof(SIMDVERTEX_T<SIMD_T>),
-            0 * sizeof(SIMDVERTEX_T<SIMD_T>));
+        const uint32_t* pVertexCount = reinterpret_cast<const uint32_t*>(&vNumClippedVerts);
+        const uint32_t* pPrimitiveId = reinterpret_cast<const uint32_t*>(&vPrimId);
+        const uint32_t* pViewportIdx = reinterpret_cast<const uint32_t*>(&vViewportIdx);
+        const uint32_t* pRtIdx       = reinterpret_cast<const uint32_t*>(&vRtIdx);
+
+        const SIMD256::Integer vOffsets =
+            SIMD256::set_epi32(0 * sizeof(SIMDVERTEX_T<SIMD_T>), // unused lane
+                               6 * sizeof(SIMDVERTEX_T<SIMD_T>),
+                               5 * sizeof(SIMDVERTEX_T<SIMD_T>),
+                               4 * sizeof(SIMDVERTEX_T<SIMD_T>),
+                               3 * sizeof(SIMDVERTEX_T<SIMD_T>),
+                               2 * sizeof(SIMDVERTEX_T<SIMD_T>),
+                               1 * sizeof(SIMDVERTEX_T<SIMD_T>),
+                               0 * sizeof(SIMDVERTEX_T<SIMD_T>));
  
          // only need to gather 7 verts
          // @todo dynamic mask based on actual # of verts generated per lane
@@ -571,14 +619,16 @@ public:
          // for triangle fan
  
  #if defined(_DEBUG)
-        // TODO: need to increase stack size, allocating SIMD16-widened transposedPrims causes stack overflow in debug builds
-        SIMDVERTEX_T<SIMD_T> *transposedPrims = reinterpret_cast<SIMDVERTEX_T<SIMD_T> *>(AlignedMalloc(sizeof(SIMDVERTEX_T<SIMD_T>) * 2, 64));
+        // TODO: need to increase stack size, allocating SIMD16-widened transposedPrims causes stack
+        // overflow in debug builds
+        SIMDVERTEX_T<SIMD_T>* transposedPrims = reinterpret_cast<SIMDVERTEX_T<SIMD_T>*>(
+            AlignedMalloc(sizeof(SIMDVERTEX_T<SIMD_T>) * 2, 64));
  
  #else
-        SIMDVERTEX_T<SIMD_T> transposedPrims[2];
+        SIMDVERTEX_T<SIMD_T>  transposedPrims[2];
  
  #endif
-        uint32_t numInputPrims = pa.NumPrims();
+        uint32_t              numInputPrims = pa.NumPrims();
          for (uint32_t inputPrim = 0; inputPrim < numInputPrims; ++inputPrim)
          {
              uint32_t numEmittedVerts = pVertexCount[inputPrim];
@@ -598,7 +648,8 @@ public:
              // for triangle fan
  
              // transpose pos
-            uint8_t *pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[VERTEX_POSITION_SLOT]) + sizeof(float) * inputPrim;
+            uint8_t* pBase = reinterpret_cast<uint8_t*>(&vertices[0].attrib[VERTEX_POSITION_SLOT]) +
+                             sizeof(float) * inputPrim;
  
  #if 0
              // TEMPORARY WORKAROUND for bizarre VS2015 code-gen bug
@@ -607,13 +658,17 @@ public:
  #endif
              for (uint32_t c = 0; c < 4; ++c)
              {
-                SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
-                transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+                SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                    SIMD256::setzero_ps(), reinterpret_cast<const float*>(pBase), vOffsets, vMask);
+                transposedPrims[0].attrib[VERTEX_POSITION_SLOT][c] =
+                    SimdHelper<SIMD_T>::insert_lo_ps(temp);
                  pBase += sizeof(Float<SIMD_T>);
              }
  
              // transpose attribs
-            pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[backendState.vertexAttribOffset]) + sizeof(float) * inputPrim;
+            pBase =
+                reinterpret_cast<uint8_t*>(&vertices[0].attrib[backendState.vertexAttribOffset]) +
+                sizeof(float) * inputPrim;
  
              for (uint32_t attrib = 0; attrib < numAttribs; ++attrib)
              {
@@ -621,8 +676,14 @@ public:
  
                  for (uint32_t c = 0; c < 4; ++c)
                  {
-                    SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
-                    transposedPrims[0].attrib[attribSlot][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+                    SIMD256::Float temp =
+                        SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                            SIMD256::setzero_ps(),
+                            reinterpret_cast<const float*>(pBase),
+                            vOffsets,
+                            vMask);
+                    transposedPrims[0].attrib[attribSlot][c] =
+                        SimdHelper<SIMD_T>::insert_lo_ps(temp);
                      pBase += sizeof(Float<SIMD_T>);
                  }
              }
@@ -631,40 +692,60 @@ public:
              uint32_t vertexClipCullSlot = backendState.vertexClipCullOffset;
              if (state.backendState.clipDistanceMask & 0x0f)
              {
-                pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[vertexClipCullSlot]) + sizeof(float) * inputPrim;
+                pBase = reinterpret_cast<uint8_t*>(&vertices[0].attrib[vertexClipCullSlot]) +
+                        sizeof(float) * inputPrim;
  
                  for (uint32_t c = 0; c < 4; ++c)
                  {
-                    SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
-                    transposedPrims[0].attrib[vertexClipCullSlot][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+                    SIMD256::Float temp =
+                        SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                            SIMD256::setzero_ps(),
+                            reinterpret_cast<const float*>(pBase),
+                            vOffsets,
+                            vMask);
+                    transposedPrims[0].attrib[vertexClipCullSlot][c] =
+                        SimdHelper<SIMD_T>::insert_lo_ps(temp);
                      pBase += sizeof(Float<SIMD_T>);
                  }
              }
  
              if (state.backendState.clipDistanceMask & 0xf0)
              {
-                pBase = reinterpret_cast<uint8_t *>(&vertices[0].attrib[vertexClipCullSlot + 1]) + sizeof(float) * inputPrim;
+                pBase = reinterpret_cast<uint8_t*>(&vertices[0].attrib[vertexClipCullSlot + 1]) +
+                        sizeof(float) * inputPrim;
  
                  for (uint32_t c = 0; c < 4; ++c)
                  {
-                    SIMD256::Float temp = SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD256::setzero_ps(), reinterpret_cast<const float *>(pBase), vOffsets, vMask);
-                    transposedPrims[0].attrib[vertexClipCullSlot + 1][c] = SimdHelper<SIMD_T>::insert_lo_ps(temp);
+                    SIMD256::Float temp =
+                        SIMD256::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                            SIMD256::setzero_ps(),
+                            reinterpret_cast<const float*>(pBase),
+                            vOffsets,
+                            vMask);
+                    transposedPrims[0].attrib[vertexClipCullSlot + 1][c] =
+                        SimdHelper<SIMD_T>::insert_lo_ps(temp);
                      pBase += sizeof(Float<SIMD_T>);
                  }
              }
  
-            PA_STATE_OPT clipPA(pDC, numEmittedPrims, reinterpret_cast<uint8_t *>(&transposedPrims[0]), numEmittedVerts, SWR_VTX_NUM_SLOTS, true, NumVertsPerPrim, clipTopology);
+            PA_STATE_OPT clipPA(pDC,
+                                numEmittedPrims,
+                                reinterpret_cast<uint8_t*>(&transposedPrims[0]),
+                                numEmittedVerts,
+                                SWR_VTX_NUM_SLOTS,
+                                true,
+                                NumVertsPerPrim,
+                                clipTopology);
              clipPA.viewportArrayActive = pa.viewportArrayActive;
-            clipPA.rtArrayActive = pa.rtArrayActive;
+            clipPA.rtArrayActive       = pa.rtArrayActive;
  
-            static const uint32_t primMaskMap[] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f };
+            static const uint32_t primMaskMap[] = {0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f};
  
              const uint32_t primMask = primMaskMap[numEmittedPrims];
  
-            const Integer<SIMD_T> primID = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
+            const Integer<SIMD_T> primID      = SIMD_T::set1_epi32(pPrimitiveId[inputPrim]);
              const Integer<SIMD_T> viewportIdx = SIMD_T::set1_epi32(pViewportIdx[inputPrim]);
-            const Integer<SIMD_T> rtIdx = SIMD_T::set1_epi32(pRtIdx[inputPrim]);
-
+            const Integer<SIMD_T> rtIdx       = SIMD_T::set1_epi32(pRtIdx[inputPrim]);
  
              while (clipPA.GetNextStreamOutput())
              {
@@ -676,7 +757,8 @@ public:
  
                      if (assemble)
                      {
-                        binner.pfnBinFunc(pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx);
+                        binner.pfnBinFunc(
+                            pDC, clipPA, workerId, attrib, primMask, primID, viewportIdx, rtIdx);
                      }
  
                  } while (clipPA.NextPrim());
@@ -691,12 +773,17 @@ public:
          UPDATE_STAT_FE(CPrimitives, numClippedPrims);
      }
  
-    void ExecuteStage(PA_STATE &pa, Vec4<SIMD_T> prim[], uint32_t primMask,
-                      Integer<SIMD_T> const &primId, Integer<SIMD_T> const &viewportIdx, Integer<SIMD_T> const &rtIdx)
+    void ExecuteStage(PA_STATE&              pa,
+                      Vec4<SIMD_T>           prim[],
+                      uint32_t               primMask,
+                      Integer<SIMD_T> const& primId,
+                      Integer<SIMD_T> const& viewportIdx,
+                      Integer<SIMD_T> const& rtIdx)
      {
          SWR_ASSERT(pa.pDC != nullptr);
  
-        BinnerChooser<SIMD_T> binner(pa.binTopology, pa.pDC->pState->state.rastState.conservativeRast);
+        BinnerChooser<SIMD_T> binner(pa.binTopology,
+                                     pa.pDC->pState->state.rastState.conservativeRast);
  
          // update clipper invocations pipeline stat
          uint32_t numInvoc = _mm_popcnt_u32(primMask);
@@ -707,7 +794,7 @@ public:
          // cull prims with NAN coords
          primMask &= ~ComputeNaNMask(prim);
  
-        // user cull distance cull 
+        // user cull distance cull
          if (state.backendState.cullDistanceMask | state.backendState.clipDistanceMask)
          {
              primMask &= ~ComputeUserClipCullMask(pa, prim);
@@ -715,10 +802,12 @@ public:
  
          Float<SIMD_T> clipIntersection = ComputeClipCodeIntersection();
          // Mask out non-frustum codes
-        clipIntersection = SIMD_T::and_ps(clipIntersection, SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_CLIP_MASK)));
+        clipIntersection = SIMD_T::and_ps(clipIntersection,
+                                          SIMD_T::castsi_ps(SIMD_T::set1_epi32(FRUSTUM_CLIP_MASK)));
  
          // cull prims outside view frustum
-        int validMask = primMask & SimdHelper<SIMD_T>::cmpeq_ps_mask(clipIntersection, SIMD_T::setzero_ps());
+        int validMask =
+            primMask & SimdHelper<SIMD_T>::cmpeq_ps_mask(clipIntersection, SIMD_T::setzero_ps());
  
          // skip clipping for points
          uint32_t clipMask = 0;
@@ -734,7 +823,13 @@ public:
              RDTSC_BEGIN(FEGuardbandClip, pa.pDC->drawId);
              // we have to clip tris, execute the clipper, which will also
              // call the binner
-            ClipSimd(prim, SIMD_T::vmask_ps(validMask), SIMD_T::vmask_ps(clipMask), pa, primId, viewportIdx, rtIdx);
+            ClipSimd(prim,
+                     SIMD_T::vmask_ps(validMask),
+                     SIMD_T::vmask_ps(clipMask),
+                     pa,
+                     primId,
+                     viewportIdx,
+                     rtIdx);
              RDTSC_END(FEGuardbandClip, 1);
          }
          else if (validMask)
@@ -743,24 +838,26 @@ public:
              UPDATE_STAT_FE(CPrimitives, _mm_popcnt_u32(validMask));
  
              // forward valid prims directly to binner
-            binner.pfnBinFunc(this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx);
+            binner.pfnBinFunc(
+                this->pDC, pa, this->workerId, prim, validMask, primId, viewportIdx, rtIdx);
          }
      }
  
  private:
-    Float<SIMD_T> ComputeInterpFactor(Float<SIMD_T> const &boundaryCoord0, Float<SIMD_T> const &boundaryCoord1)
+    Float<SIMD_T> ComputeInterpFactor(Float<SIMD_T> const& boundaryCoord0,
+                                      Float<SIMD_T> const& boundaryCoord1)
      {
          return SIMD_T::div_ps(boundaryCoord0, SIMD_T::sub_ps(boundaryCoord0, boundaryCoord1));
      }
  
-    Integer<SIMD_T> ComputeOffsets(uint32_t attrib, Integer<SIMD_T> const &vIndices, uint32_t component)
+    Integer<SIMD_T>
+    ComputeOffsets(uint32_t attrib, Integer<SIMD_T> const& vIndices, uint32_t component)
      {
          const uint32_t simdVertexStride = sizeof(SIMDVERTEX_T<SIMD_T>);
          const uint32_t componentStride  = sizeof(Float<SIMD_T>);
          const uint32_t attribStride     = sizeof(Vec4<SIMD_T>);
  
-        static const OSALIGNSIMD16(uint32_t) elemOffset[16] =
-        {
+        static const OSALIGNSIMD16(uint32_t) elemOffset[16] = {
              0 * sizeof(float),
              1 * sizeof(float),
              2 * sizeof(float),
@@ -779,15 +876,19 @@ private:
              15 * sizeof(float),
          };
  
-        static_assert(sizeof(Integer<SIMD_T>) <= sizeof(elemOffset), "Clipper::ComputeOffsets, Increase number of element offsets.");
+        static_assert(sizeof(Integer<SIMD_T>) <= sizeof(elemOffset),
+                      "Clipper::ComputeOffsets, Increase number of element offsets.");
  
-        Integer<SIMD_T> vElemOffset = SIMD_T::loadu_si(reinterpret_cast<const Integer<SIMD_T> *>(elemOffset));
+        Integer<SIMD_T> vElemOffset =
+            SIMD_T::loadu_si(reinterpret_cast<const Integer<SIMD_T>*>(elemOffset));
  
          // step to the simdvertex
-        Integer<SIMD_T> vOffsets = SIMD_T::mullo_epi32(vIndices, SIMD_T::set1_epi32(simdVertexStride));
+        Integer<SIMD_T> vOffsets =
+            SIMD_T::mullo_epi32(vIndices, SIMD_T::set1_epi32(simdVertexStride));
  
          // step to the attribute and component
-        vOffsets = SIMD_T::add_epi32(vOffsets, SIMD_T::set1_epi32(attribStride * attrib + componentStride * component));
+        vOffsets = SIMD_T::add_epi32(
+            vOffsets, SIMD_T::set1_epi32(attribStride * attrib + componentStride * component));
  
          // step to the lane
          vOffsets = SIMD_T::add_epi32(vOffsets, vElemOffset);
@@ -795,53 +896,71 @@ private:
          return vOffsets;
      }
  
-    Float<SIMD_T> GatherComponent(const float* pBuffer, uint32_t attrib, Float<SIMD_T> const &vMask, Integer<SIMD_T> const &vIndices, uint32_t component)
+    Float<SIMD_T> GatherComponent(const float*           pBuffer,
+                                  uint32_t               attrib,
+                                  Float<SIMD_T> const&   vMask,
+                                  Integer<SIMD_T> const& vIndices,
+                                  uint32_t               component)
      {
          Integer<SIMD_T> vOffsets = ComputeOffsets(attrib, vIndices, component);
-        Float<SIMD_T> vSrc = SIMD_T::setzero_ps();
+        Float<SIMD_T>   vSrc     = SIMD_T::setzero_ps();
  
-        return SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(vSrc, pBuffer, vOffsets, vMask);
+        return SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+            vSrc, pBuffer, vOffsets, vMask);
      }
  
-    void ScatterComponent(const float* pBuffer, uint32_t attrib, Float<SIMD_T> const &vMask, Integer<SIMD_T> const &vIndices, uint32_t component, Float<SIMD_T> const &vSrc)
+    void ScatterComponent(const float*           pBuffer,
+                          uint32_t               attrib,
+                          Float<SIMD_T> const&   vMask,
+                          Integer<SIMD_T> const& vIndices,
+                          uint32_t               component,
+                          Float<SIMD_T> const&   vSrc)
      {
          Integer<SIMD_T> vOffsets = ComputeOffsets(attrib, vIndices, component);
  
-        const uint32_t *pOffsets = reinterpret_cast<const uint32_t *>(&vOffsets);
-        const float *pSrc = reinterpret_cast<const float *>(&vSrc);
-        uint32_t mask = SIMD_T::movemask_ps(vMask);
-        DWORD lane;
+        const uint32_t* pOffsets = reinterpret_cast<const uint32_t*>(&vOffsets);
+        const float*    pSrc     = reinterpret_cast<const float*>(&vSrc);
+        uint32_t        mask     = SIMD_T::movemask_ps(vMask);
+        DWORD           lane;
          while (_BitScanForward(&lane, mask))
          {
              mask &= ~(1 << lane);
-            const uint8_t *pBuf = reinterpret_cast<const uint8_t *>(pBuffer) + pOffsets[lane];
-            *(float *)pBuf = pSrc[lane];
+            const uint8_t* pBuf = reinterpret_cast<const uint8_t*>(pBuffer) + pOffsets[lane];
+            *(float*)pBuf       = pSrc[lane];
          }
      }
  
-    template<SWR_CLIPCODES ClippingPlane>
-    void intersect(
-        const Float<SIMD_T> &vActiveMask,  // active lanes to operate on
-        const Integer<SIMD_T> &s,          // index to first edge vertex v0 in pInPts.
-        const Integer<SIMD_T> &p,          // index to second edge vertex v1 in pInPts.
-        const Vec4<SIMD_T> &v1,            // vertex 0 position
-        const Vec4<SIMD_T> &v2,            // vertex 1 position
-        Integer<SIMD_T> &outIndex,         // output index.
-        const float *pInVerts,                      // array of all the input positions.
-        uint32_t numInAttribs,                      // number of attributes per vertex.
-        float *pOutVerts)                           // array of output positions. We'll write our new intersection point at i*4.
+    template <SWR_CLIPCODES ClippingPlane>
+    void intersect(const Float<SIMD_T>&   vActiveMask,  // active lanes to operate on
+                   const Integer<SIMD_T>& s,            // index to first edge vertex v0 in pInPts.
+                   const Integer<SIMD_T>& p,            // index to second edge vertex v1 in pInPts.
+                   const Vec4<SIMD_T>&    v1,           // vertex 0 position
+                   const Vec4<SIMD_T>&    v2,           // vertex 1 position
+                   Integer<SIMD_T>&       outIndex,     // output index.
+                   const float*           pInVerts,     // array of all the input positions.
+                   uint32_t               numInAttribs, // number of attributes per vertex.
+                   float* pOutVerts) // array of output positions. We'll write our new intersection
+                                     // point at i*4.
      {
-        uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
+        uint32_t vertexAttribOffset   = this->state.backendState.vertexAttribOffset;
          uint32_t vertexClipCullOffset = this->state.backendState.vertexClipCullOffset;
  
          // compute interpolation factor
          Float<SIMD_T> t;
          switch (ClippingPlane)
          {
-        case FRUSTUM_LEFT:      t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[0]), SIMD_T::add_ps(v2[3], v2[0])); break;
-        case FRUSTUM_RIGHT:     t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[0]), SIMD_T::sub_ps(v2[3], v2[0])); break;
-        case FRUSTUM_TOP:       t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[1]), SIMD_T::add_ps(v2[3], v2[1])); break;
-        case FRUSTUM_BOTTOM:    t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[1]), SIMD_T::sub_ps(v2[3], v2[1])); break;
+        case FRUSTUM_LEFT:
+            t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[0]), SIMD_T::add_ps(v2[3], v2[0]));
+            break;
+        case FRUSTUM_RIGHT:
+            t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[0]), SIMD_T::sub_ps(v2[3], v2[0]));
+            break;
+        case FRUSTUM_TOP:
+            t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[1]), SIMD_T::add_ps(v2[3], v2[1]));
+            break;
+        case FRUSTUM_BOTTOM:
+            t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[1]), SIMD_T::sub_ps(v2[3], v2[1]));
+            break;
          case FRUSTUM_NEAR:
              // DX Znear plane is 0, GL is -w
              if (this->state.rastState.clipHalfZ)
@@ -853,8 +972,11 @@ private:
                  t = ComputeInterpFactor(SIMD_T::add_ps(v1[3], v1[2]), SIMD_T::add_ps(v2[3], v2[2]));
              }
              break;
-        case FRUSTUM_FAR:       t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[2]), SIMD_T::sub_ps(v2[3], v2[2])); break;
-        default: SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
+        case FRUSTUM_FAR:
+            t = ComputeInterpFactor(SIMD_T::sub_ps(v1[3], v1[2]), SIMD_T::sub_ps(v2[3], v2[2]));
+            break;
+        default:
+            SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
          };
  
          // interpolate position and store
@@ -872,7 +994,8 @@ private:
              {
                  Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
                  Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
-                Float<SIMD_T> vOutAttrib = SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+                Float<SIMD_T> vOutAttrib =
+                    SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
                  ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
              }
          }
@@ -885,7 +1008,8 @@ private:
              {
                  Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
                  Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
-                Float<SIMD_T> vOutAttrib = SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+                Float<SIMD_T> vOutAttrib =
+                    SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
                  ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
              }
          }
@@ -897,44 +1021,58 @@ private:
              {
                  Float<SIMD_T> vAttrib0 = GatherComponent(pInVerts, attribSlot, vActiveMask, s, c);
                  Float<SIMD_T> vAttrib1 = GatherComponent(pInVerts, attribSlot, vActiveMask, p, c);
-                Float<SIMD_T> vOutAttrib = SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
+                Float<SIMD_T> vOutAttrib =
+                    SIMD_T::fmadd_ps(SIMD_T::sub_ps(vAttrib1, vAttrib0), t, vAttrib0);
                  ScatterComponent(pOutVerts, attribSlot, vActiveMask, outIndex, c, vOutAttrib);
              }
          }
      }
  
-    template<SWR_CLIPCODES ClippingPlane>
-    Float<SIMD_T> inside(const Vec4<SIMD_T> &v)
+    template <SWR_CLIPCODES ClippingPlane>
+    Float<SIMD_T> inside(const Vec4<SIMD_T>& v)
      {
          switch (ClippingPlane)
          {
-        case FRUSTUM_LEFT:      return SIMD_T::cmpge_ps(v[0], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
-        case FRUSTUM_RIGHT:     return SIMD_T::cmple_ps(v[0], v[3]);
-        case FRUSTUM_TOP:       return SIMD_T::cmpge_ps(v[1], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
-        case FRUSTUM_BOTTOM:    return SIMD_T::cmple_ps(v[1], v[3]);
-        case FRUSTUM_NEAR:      return SIMD_T::cmpge_ps(v[2], this->state.rastState.clipHalfZ ? SIMD_T::setzero_ps() : SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
-        case FRUSTUM_FAR:       return SIMD_T::cmple_ps(v[2], v[3]);
+        case FRUSTUM_LEFT:
+            return SIMD_T::cmpge_ps(v[0], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
+        case FRUSTUM_RIGHT:
+            return SIMD_T::cmple_ps(v[0], v[3]);
+        case FRUSTUM_TOP:
+            return SIMD_T::cmpge_ps(v[1], SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
+        case FRUSTUM_BOTTOM:
+            return SIMD_T::cmple_ps(v[1], v[3]);
+        case FRUSTUM_NEAR:
+            return SIMD_T::cmpge_ps(v[2],
+                                    this->state.rastState.clipHalfZ
+                                        ? SIMD_T::setzero_ps()
+                                        : SIMD_T::mul_ps(v[3], SIMD_T::set1_ps(-1.0f)));
+        case FRUSTUM_FAR:
+            return SIMD_T::cmple_ps(v[2], v[3]);
          default:
              SWR_INVALID("invalid clipping plane: %d", ClippingPlane);
              return SIMD_T::setzero_ps();
          }
      }
  
-    template<SWR_CLIPCODES ClippingPlane>
-    Integer<SIMD_T> ClipTriToPlane(const float *pInVerts, const Integer<SIMD_T> &vNumInPts, uint32_t numInAttribs, float *pOutVerts)
+    template <SWR_CLIPCODES ClippingPlane>
+    Integer<SIMD_T> ClipTriToPlane(const float*           pInVerts,
+                                   const Integer<SIMD_T>& vNumInPts,
+                                   uint32_t               numInAttribs,
+                                   float*                 pOutVerts)
      {
          uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
  
-        Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
-        Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
-        Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
+        Integer<SIMD_T> vCurIndex   = SIMD_T::setzero_si();
+        Integer<SIMD_T> vOutIndex   = SIMD_T::setzero_si();
+        Float<SIMD_T>   vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
  
          while (!SIMD_T::testz_ps(vActiveMask, vActiveMask)) // loop until activeMask is empty
          {
-            Integer<SIMD_T> s = vCurIndex;
-            Integer<SIMD_T> p = SIMD_T::add_epi32(s, SIMD_T::set1_epi32(1));
+            Integer<SIMD_T> s             = vCurIndex;
+            Integer<SIMD_T> p             = SIMD_T::add_epi32(s, SIMD_T::set1_epi32(1));
              Integer<SIMD_T> underFlowMask = SIMD_T::cmpgt_epi32(vNumInPts, p);
-            p = SIMD_T::castps_si(SIMD_T::blendv_ps(SIMD_T::setzero_ps(), SIMD_T::castsi_ps(p), SIMD_T::castsi_ps(underFlowMask)));
+            p                             = SIMD_T::castps_si(SIMD_T::blendv_ps(
+                SIMD_T::setzero_ps(), SIMD_T::castsi_ps(p), SIMD_T::castsi_ps(underFlowMask)));
  
              // gather position
              Vec4<SIMD_T> vInPos0, vInPos1;
@@ -950,7 +1088,7 @@ private:
  
              // compute intersection mask (s_in != p_in)
              Float<SIMD_T> intersectMask = SIMD_T::xor_ps(s_in, p_in);
-            intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
+            intersectMask               = SIMD_T::and_ps(intersectMask, vActiveMask);
  
              // store s if inside
              s_in = SIMD_T::and_ps(s_in, vActiveMask);
@@ -959,7 +1097,8 @@ private:
                  // store position
                  for (uint32_t c = 0; c < 4; ++c)
                  {
-                    ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
+                    ScatterComponent(
+                        pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
                  }
  
                  // store attribs
@@ -996,34 +1135,47 @@ private:
                  }
  
                  // increment outIndex
-                vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
+                vOutIndex = SIMD_T::blendv_epi32(
+                    vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
              }
  
              // compute and store intersection
              if (!SIMD_T::testz_ps(intersectMask, intersectMask))
              {
-                intersect<ClippingPlane>(intersectMask, s, p, vInPos0, vInPos1, vOutIndex, pInVerts, numInAttribs, pOutVerts);
+                intersect<ClippingPlane>(intersectMask,
+                                         s,
+                                         p,
+                                         vInPos0,
+                                         vInPos1,
+                                         vOutIndex,
+                                         pInVerts,
+                                         numInAttribs,
+                                         pOutVerts);
  
                  // increment outIndex for active lanes
-                vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
+                vOutIndex = SIMD_T::blendv_epi32(
+                    vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
              }
  
              // increment loop index and update active mask
-            vCurIndex = SIMD_T::add_epi32(vCurIndex, SIMD_T::set1_epi32(1));
+            vCurIndex   = SIMD_T::add_epi32(vCurIndex, SIMD_T::set1_epi32(1));
              vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
          }
  
          return vOutIndex;
      }
  
-    template<SWR_CLIPCODES ClippingPlane>
-    Integer<SIMD_T> ClipLineToPlane(const float *pInVerts, const Integer<SIMD_T> &vNumInPts, uint32_t numInAttribs, float *pOutVerts)
+    template <SWR_CLIPCODES ClippingPlane>
+    Integer<SIMD_T> ClipLineToPlane(const float*           pInVerts,
+                                    const Integer<SIMD_T>& vNumInPts,
+                                    uint32_t               numInAttribs,
+                                    float*                 pOutVerts)
      {
          uint32_t vertexAttribOffset = this->state.backendState.vertexAttribOffset;
  
-        Integer<SIMD_T> vCurIndex = SIMD_T::setzero_si();
-        Integer<SIMD_T> vOutIndex = SIMD_T::setzero_si();
-        Float<SIMD_T> vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
+        Integer<SIMD_T> vCurIndex   = SIMD_T::setzero_si();
+        Integer<SIMD_T> vOutIndex   = SIMD_T::setzero_si();
+        Float<SIMD_T>   vActiveMask = SIMD_T::castsi_ps(SIMD_T::cmplt_epi32(vCurIndex, vNumInPts));
  
          if (!SIMD_T::testz_ps(vActiveMask, vActiveMask))
          {
@@ -1044,7 +1196,7 @@ private:
  
              // compute intersection mask (s_in != p_in)
              Float<SIMD_T> intersectMask = SIMD_T::xor_ps(s_in, p_in);
-            intersectMask = SIMD_T::and_ps(intersectMask, vActiveMask);
+            intersectMask               = SIMD_T::and_ps(intersectMask, vActiveMask);
  
              // store s if inside
              s_in = SIMD_T::and_ps(s_in, vActiveMask);
@@ -1052,7 +1204,8 @@ private:
              {
                  for (uint32_t c = 0; c < 4; ++c)
                  {
-                    ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
+                    ScatterComponent(
+                        pOutVerts, VERTEX_POSITION_SLOT, s_in, vOutIndex, c, vInPos0[c]);
                  }
  
                  // interpolate attributes and store
@@ -1067,16 +1220,26 @@ private:
                  }
  
                  // increment outIndex
-                vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
+                vOutIndex = SIMD_T::blendv_epi32(
+                    vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), s_in);
              }
  
              // compute and store intersection
              if (!SIMD_T::testz_ps(intersectMask, intersectMask))
              {
-                intersect<ClippingPlane>(intersectMask, s, p, vInPos0, vInPos1, vOutIndex, pInVerts, numInAttribs, pOutVerts);
+                intersect<ClippingPlane>(intersectMask,
+                                         s,
+                                         p,
+                                         vInPos0,
+                                         vInPos1,
+                                         vOutIndex,
+                                         pInVerts,
+                                         numInAttribs,
+                                         pOutVerts);
  
                  // increment outIndex for active lanes
-                vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
+                vOutIndex = SIMD_T::blendv_epi32(
+                    vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), intersectMask);
              }
  
              // store p if inside
@@ -1085,7 +1248,8 @@ private:
              {
                  for (uint32_t c = 0; c < 4; ++c)
                  {
-                    ScatterComponent(pOutVerts, VERTEX_POSITION_SLOT, p_in, vOutIndex, c, vInPos1[c]);
+                    ScatterComponent(
+                        pOutVerts, VERTEX_POSITION_SLOT, p_in, vOutIndex, c, vInPos1[c]);
                  }
  
                  // interpolate attributes and store
@@ -1100,17 +1264,21 @@ private:
                  }
  
                  // increment outIndex
-                vOutIndex = SIMD_T::blendv_epi32(vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), p_in);
+                vOutIndex = SIMD_T::blendv_epi32(
+                    vOutIndex, SIMD_T::add_epi32(vOutIndex, SIMD_T::set1_epi32(1)), p_in);
              }
          }
  
          return vOutIndex;
      }
  
-    Integer<SIMD_T> ClipPrims(float *pVertices, const Float<SIMD_T> &vPrimMask, const Float<SIMD_T> &vClipMask, int numAttribs)
+    Integer<SIMD_T> ClipPrims(float*               pVertices,
+                              const Float<SIMD_T>& vPrimMask,
+                              const Float<SIMD_T>& vClipMask,
+                              int                  numAttribs)
      {
          // temp storage
-        float *pTempVerts = reinterpret_cast<float *>(ClipHelper<SIMD_T>::GetTempVertices());
+        float* pTempVerts = reinterpret_cast<float*>(ClipHelper<SIMD_T>::GetTempVertices());
  
          // zero out num input verts for non-active lanes
          Integer<SIMD_T> vNumInPts = SIMD_T::set1_epi32(NumVertsPerPrim);
@@ -1122,45 +1290,109 @@ private:
          {
              vNumOutPts = ClipTriToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
              vNumOutPts = ClipTriToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
-            vNumOutPts = ClipTriToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
-            vNumOutPts = ClipTriToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
-            vNumOutPts = ClipTriToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+            vNumOutPts =
+                ClipTriToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+            vNumOutPts =
+                ClipTriToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+            vNumOutPts =
+                ClipTriToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
              vNumOutPts = ClipTriToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
          }
          else
          {
              SWR_ASSERT(NumVertsPerPrim == 2);
-            vNumOutPts = ClipLineToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
-            vNumOutPts = ClipLineToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
-            vNumOutPts = ClipLineToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
-            vNumOutPts = ClipLineToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
-            vNumOutPts = ClipLineToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
-            vNumOutPts = ClipLineToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+            vNumOutPts =
+                ClipLineToPlane<FRUSTUM_NEAR>(pVertices, vNumInPts, numAttribs, pTempVerts);
+            vNumOutPts =
+                ClipLineToPlane<FRUSTUM_FAR>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+            vNumOutPts =
+                ClipLineToPlane<FRUSTUM_LEFT>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+            vNumOutPts =
+                ClipLineToPlane<FRUSTUM_RIGHT>(pTempVerts, vNumOutPts, numAttribs, pVertices);
+            vNumOutPts =
+                ClipLineToPlane<FRUSTUM_BOTTOM>(pVertices, vNumOutPts, numAttribs, pTempVerts);
+            vNumOutPts =
+                ClipLineToPlane<FRUSTUM_TOP>(pTempVerts, vNumOutPts, numAttribs, pVertices);
          }
  
          // restore num verts for non-clipped, active lanes
          Float<SIMD_T> vNonClippedMask = SIMD_T::andnot_ps(vClipMask, vPrimMask);
-        vNumOutPts = SIMD_T::blendv_epi32(vNumOutPts, SIMD_T::set1_epi32(NumVertsPerPrim), vNonClippedMask);
+        vNumOutPts =
+            SIMD_T::blendv_epi32(vNumOutPts, SIMD_T::set1_epi32(NumVertsPerPrim), vNonClippedMask);
  
          return vNumOutPts;
      }
  
-    const uint32_t workerId{ 0 };
-    DRAW_CONTEXT *pDC{ nullptr };
-    const API_STATE &state;
-    Float<SIMD_T> clipCodes[NumVertsPerPrim];
+    const uint32_t   workerId{0};
+    DRAW_CONTEXT*    pDC{nullptr};
+    const API_STATE& state;
+    Float<SIMD_T>    clipCodes[NumVertsPerPrim];
  };
  
-
  // pipeline stage functions
-void ClipRectangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void ClipTriangles(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void ClipLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void ClipPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], uint32_t primMask, simdscalari const &primId, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+void ClipRectangles(DRAW_CONTEXT*      pDC,
+                    PA_STATE&          pa,
+                    uint32_t           workerId,
+                    simdvector         prims[],
+                    uint32_t           primMask,
+                    simdscalari const& primId,
+                    simdscalari const& viewportIdx,
+                    simdscalari const& rtIdx);
+void ClipTriangles(DRAW_CONTEXT*      pDC,
+                   PA_STATE&          pa,
+                   uint32_t           workerId,
+                   simdvector         prims[],
+                   uint32_t           primMask,
+                   simdscalari const& primId,
+                   simdscalari const& viewportIdx,
+                   simdscalari const& rtIdx);
+void ClipLines(DRAW_CONTEXT*      pDC,
+               PA_STATE&          pa,
+               uint32_t           workerId,
+               simdvector         prims[],
+               uint32_t           primMask,
+               simdscalari const& primId,
+               simdscalari const& viewportIdx,
+               simdscalari const& rtIdx);
+void ClipPoints(DRAW_CONTEXT*      pDC,
+                PA_STATE&          pa,
+                uint32_t           workerId,
+                simdvector         prims[],
+                uint32_t           primMask,
+                simdscalari const& primId,
+                simdscalari const& viewportIdx,
+                simdscalari const& rtIdx);
  #if USE_SIMD16_FRONTEND
-void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL ClipLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[], uint32_t primMask, simd16scalari const &primId, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL ClipRectangles_simd16(DRAW_CONTEXT*        pDC,
+                                    PA_STATE&            pa,
+                                    uint32_t             workerId,
+                                    simd16vector         prims[],
+                                    uint32_t             primMask,
+                                    simd16scalari const& primId,
+                                    simd16scalari const& viewportIdx,
+                                    simd16scalari const& rtIdx);
+void SIMDCALL ClipTriangles_simd16(DRAW_CONTEXT*        pDC,
+                                   PA_STATE&            pa,
+                                   uint32_t             workerId,
+                                   simd16vector         prims[],
+                                   uint32_t             primMask,
+                                   simd16scalari const& primId,
+                                   simd16scalari const& viewportIdx,
+                                   simd16scalari const& rtIdx);
+void SIMDCALL ClipLines_simd16(DRAW_CONTEXT*        pDC,
+                               PA_STATE&            pa,
+                               uint32_t             workerId,
+                               simd16vector         prims[],
+                               uint32_t             primMask,
+                               simd16scalari const& primId,
+                               simd16scalari const& viewportIdx,
+                               simd16scalari const& rtIdx);
+void SIMDCALL ClipPoints_simd16(DRAW_CONTEXT*        pDC,
+                                PA_STATE&            pa,
+                                uint32_t             workerId,
+                                simd16vector         prims[],
+                                uint32_t             primMask,
+                                simd16scalari const& primId,
+                                simd16scalari const& viewportIdx,
+                                simd16scalari const& rtIdx);
  #endif
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h

index 00c3a87c1889b296df8bb3ad2b7935388cae007c..9e7f96cdeac818783e3292e3047e13b23cb25c55 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
+++ b/src/gallium/drivers/swr/rasterizer/core/conservativeRast.h
@@ -1,28 +1,28 @@
  /****************************************************************************
-* Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file conservativerast.h
-*
-******************************************************************************/
+ * Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file conservativerast.h
+ *
+ ******************************************************************************/
  #pragma once
  #include <type_traits>
  #include "common/simdintrin.h"
@@ -38,77 +38,82 @@ enum FixedPointFmt
  //////////////////////////////////////////////////////////////////////////
  /// @brief convenience typedefs for supported Fixed Point precisions
  typedef std::integral_constant<uint32_t, FP_UNINIT> Fixed_Uninit;
-typedef std::integral_constant<uint32_t, _16_8> Fixed_16_8;
-typedef std::integral_constant<uint32_t, _16_9> Fixed_16_9;
-typedef std::integral_constant<uint32_t, _X_16> Fixed_X_16;
+typedef std::integral_constant<uint32_t, _16_8>     Fixed_16_8;
+typedef std::integral_constant<uint32_t, _16_9>     Fixed_16_9;
+typedef std::integral_constant<uint32_t, _X_16>     Fixed_X_16;
  
  //////////////////////////////////////////////////////////////////////////
  /// @struct FixedPointTraits
-/// @brief holds constants relating to converting between FP and Fixed point 
+/// @brief holds constants relating to converting between FP and Fixed point
  /// @tparam FT: fixed precision type
-template<typename FT>
-struct FixedPointTraits{};
+template <typename FT>
+struct FixedPointTraits
+{
+};
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Fixed_16_8 specialization of FixedPointTraits
-template<>
+template <>
  struct FixedPointTraits<Fixed_16_8>
  {
      /// multiplier to go from FP32 to Fixed Point 16.8
      typedef std::integral_constant<uint32_t, 256> ScaleT;
      /// number of bits to shift to go from 16.8 fixed => int32
      typedef std::integral_constant<uint32_t, 8> BitsT;
-    typedef Fixed_16_8 TypeT;
+    typedef Fixed_16_8                          TypeT;
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Fixed_16_9 specialization of FixedPointTraits
-template<>
+template <>
  struct FixedPointTraits<Fixed_16_9>
  {
      /// multiplier to go from FP32 to Fixed Point 16.9
      typedef std::integral_constant<uint32_t, 512> ScaleT;
      /// number of bits to shift to go from 16.9 fixed => int32
      typedef std::integral_constant<uint32_t, 9> BitsT;
-    typedef Fixed_16_9 TypeT;
+    typedef Fixed_16_9                          TypeT;
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Fixed_16_9 specialization of FixedPointTraits
-template<>
+template <>
  struct FixedPointTraits<Fixed_X_16>
  {
      /// multiplier to go from FP32 to Fixed Point X.16
      typedef std::integral_constant<uint32_t, 65536> ScaleT;
      /// number of bits to shift to go from X.16 fixed => int32
      typedef std::integral_constant<uint32_t, 16> BitsT;
-    typedef Fixed_X_16 TypeT;
+    typedef Fixed_X_16                           TypeT;
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for conservative rasterization modes  
+/// @brief convenience typedefs for conservative rasterization modes
  typedef std::false_type StandardRastT;
-typedef std::true_type ConservativeRastT;
+typedef std::true_type  ConservativeRastT;
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for Input Coverage rasterization modes  
-typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE> NoInputCoverageT;
+/// @brief convenience typedefs for Input Coverage rasterization modes
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NONE>   NoInputCoverageT;
  typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_NORMAL> OuterConservativeCoverageT;
-typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE> InnerConservativeCoverageT;
+typedef std::integral_constant<uint32_t, SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>
+    InnerConservativeCoverageT;
  
  //////////////////////////////////////////////////////////////////////////
  /// @struct ConservativeRastTraits
  /// @brief primary ConservativeRastTraits template. Shouldn't be instantiated
  /// @tparam ConservativeT: type of conservative rasterization
  template <typename ConservativeT>
-struct ConservativeRastFETraits {};
+struct ConservativeRastFETraits
+{
+};
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief StandardRast specialization of ConservativeRastTraits
  template <>
  struct ConservativeRastFETraits<StandardRastT>
  {
-    typedef std::false_type IsConservativeT;
+    typedef std::false_type                     IsConservativeT;
      typedef std::integral_constant<uint32_t, 0> BoundingBoxOffsetT;
  };
  
@@ -117,13 +122,13 @@ struct ConservativeRastFETraits<StandardRastT>
  template <>
  struct ConservativeRastFETraits<ConservativeRastT>
  {
-    typedef std::true_type IsConservativeT;
+    typedef std::true_type                      IsConservativeT;
      typedef std::integral_constant<uint32_t, 1> BoundingBoxOffsetT;
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief convenience typedefs for ConservativeRastFETraits 
-typedef ConservativeRastFETraits<StandardRastT> FEStandardRastT;
+/// @brief convenience typedefs for ConservativeRastFETraits
+typedef ConservativeRastFETraits<StandardRastT>     FEStandardRastT;
  typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
  
  //////////////////////////////////////////////////////////////////////////
@@ -133,10 +138,11 @@ typedef ConservativeRastFETraits<ConservativeRastT> FEConservativeRastT;
  /// @tparam ConservativeT: type of conservative rasterization
  /// @tparam InputCoverageT: type of input coverage requested, if any
  template <typename ConservativeT, typename _InputCoverageT>
-struct ConservativeRastBETraits {
-    typedef std::false_type IsConservativeT;
-    typedef _InputCoverageT InputCoverageT;
-    typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
+struct ConservativeRastBETraits
+{
+    typedef std::false_type                    IsConservativeT;
+    typedef _InputCoverageT                    InputCoverageT;
+    typedef FixedPointTraits<Fixed_16_8>       ConservativePrecisionT;
      typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
      typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
  };
@@ -146,9 +152,9 @@ struct ConservativeRastBETraits {
  template <typename _InputCoverageT>
  struct ConservativeRastBETraits<StandardRastT, _InputCoverageT>
  {
-    typedef std::false_type IsConservativeT;
-    typedef _InputCoverageT InputCoverageT;
-    typedef FixedPointTraits<Fixed_16_8> ConservativePrecisionT;
+    typedef std::false_type                    IsConservativeT;
+    typedef _InputCoverageT                    InputCoverageT;
+    typedef FixedPointTraits<Fixed_16_8>       ConservativePrecisionT;
      typedef std::integral_constant<int32_t, 0> ConservativeEdgeOffsetT;
      typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
  };
@@ -159,16 +165,17 @@ struct ConservativeRastBETraits<StandardRastT, _InputCoverageT>
  template <>
  struct ConservativeRastBETraits<ConservativeRastT, NoInputCoverageT>
  {
-    typedef std::true_type IsConservativeT;
+    typedef std::true_type   IsConservativeT;
      typedef NoInputCoverageT InputCoverageT;
  
      typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
  
      /// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
-    /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of 
-    /// of having to compare individual edges to pixel corners to check if any part of the triangle 
-    /// intersects a pixel
-    typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
+    /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
+    /// of of having to compare individual edges to pixel corners to check if any part of the
+    /// triangle intersects a pixel
+    typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
+                                               ConservativeEdgeOffsetT;
      typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
  };
  
@@ -178,18 +185,18 @@ struct ConservativeRastBETraits<ConservativeRastT, NoInputCoverageT>
  template <>
  struct ConservativeRastBETraits<ConservativeRastT, OuterConservativeCoverageT>
  {
-    typedef std::true_type IsConservativeT;
+    typedef std::true_type             IsConservativeT;
      typedef OuterConservativeCoverageT InputCoverageT;
  
      typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
  
      /// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
-    /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of 
-    /// of having to compare individual edges to pixel corners to check if any part of the triangle 
-    /// intersects a pixel
-    typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
+    /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
+    /// of of having to compare individual edges to pixel corners to check if any part of the
+    /// triangle intersects a pixel
+    typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
+                                               ConservativeEdgeOffsetT;
      typedef std::integral_constant<int32_t, 0> InnerConservativeEdgeOffsetT;
-
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -198,19 +205,25 @@ struct ConservativeRastBETraits<ConservativeRastT, OuterConservativeCoverageT>
  template <>
  struct ConservativeRastBETraits<ConservativeRastT, InnerConservativeCoverageT>
  {
-    typedef std::true_type IsConservativeT;
+    typedef std::true_type             IsConservativeT;
      typedef InnerConservativeCoverageT InputCoverageT;
  
      typedef FixedPointTraits<Fixed_16_9> ConservativePrecisionT;
  
      /// offset edge away from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
-    /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of 
-    /// of having to compare individual edges to pixel corners to check if any part of the triangle 
-    /// intersects a pixel
-    typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value/2) + 1> ConservativeEdgeOffsetT;
-
-    /// undo the outer conservative offset and offset edge towards from pixel center by 1/2 pixel + 1/512, in Fixed 16.9 precision
-    /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead of 
-    /// of having to compare individual edges to pixel corners to check if a pixel is fully covered by a triangle
-    typedef std::integral_constant<int32_t, static_cast<int32_t>(-((ConservativePrecisionT::ScaleT::value/2) + 1) - ConservativeEdgeOffsetT::value)> InnerConservativeEdgeOffsetT;
+    /// this allows the rasterizer to do the 3 edge coverage tests against a single point, instead
+    /// of of having to compare individual edges to pixel corners to check if any part of the
+    /// triangle intersects a pixel
+    typedef std::integral_constant<int32_t, (ConservativePrecisionT::ScaleT::value / 2) + 1>
+        ConservativeEdgeOffsetT;
+
+    /// undo the outer conservative offset and offset edge towards from pixel center by 1/2 pixel +
+    /// 1/512, in Fixed 16.9 precision this allows the rasterizer to do the 3 edge coverage tests
+    /// against a single point, instead of of having to compare individual edges to pixel corners to
+    /// check if a pixel is fully covered by a triangle
+    typedef std::integral_constant<int32_t,
+                                   static_cast<int32_t>(
+                                       -((ConservativePrecisionT::ScaleT::value / 2) + 1) -
+                                       ConservativeEdgeOffsetT::value)>
+        InnerConservativeEdgeOffsetT;
  };
 \ No newline at end of file
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h b/src/gallium/drivers/swr/rasterizer/core/context.h

index 2cd61e4abbb283826e61194e6bb9327a5665a124..6d378ed36e40bfab276a14582a7b07243cc0d4cf 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -1,34 +1,34 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file context.h
-*
-* @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
-*        The SWR_CONTEXT is our global context and contains the DC ring,
-*        thread state, etc.
-*
-*        The DRAW_CONTEXT contains all state associated with a draw operation.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file context.h
+ *
+ * @brief Definitions for SWR_CONTEXT and DRAW_CONTEXT
+ *        The SWR_CONTEXT is our global context and contains the DC ring,
+ *        thread state, etc.
+ *
+ *        The DRAW_CONTEXT contains all state associated with a draw operation.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include <condition_variable>
@@ -59,9 +59,9 @@ struct TRI_FLAGS
  {
      uint32_t frontFacing : 1;
      uint32_t yMajor : 1;
-    uint32_t coverageMask : (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
+    uint32_t coverageMask : (SIMD_TILE_X_DIM* SIMD_TILE_Y_DIM);
      uint32_t reserved : 32 - 1 - 1 - (SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM);
-    float pointSize;
+    float    pointSize;
      uint32_t renderTargetArrayIndex;
      uint32_t viewportIndex;
  };
@@ -77,14 +77,15 @@ struct SWR_TRIANGLE_DESC
      float OneOverW[3];
      float recipDet;
  
-    float *pRecipW;
-    float *pAttribs;
-    float *pPerspAttribs;
-    float *pSamplePos;
-    float *pUserClipBuffer;
+    float* pRecipW;
+    float* pAttribs;
+    float* pPerspAttribs;
+    float* pSamplePos;
+    float* pUserClipBuffer;
  
      uint64_t coverageMask[SWR_MAX_NUM_MULTISAMPLES];
-    uint64_t innerCoverageMask; // Conservative rasterization inner coverage: marked covered if entire pixel is covered
+    uint64_t innerCoverageMask; // Conservative rasterization inner coverage: marked covered if
+                                // entire pixel is covered
      uint64_t anyCoveredSamples;
  
      TRI_FLAGS triFlags;
@@ -92,10 +93,10 @@ struct SWR_TRIANGLE_DESC
  
  struct TRIANGLE_WORK_DESC
  {
-    float *pTriBuffer;
-    float *pAttribs;
-    float *pUserClipBuffer;
-    uint32_t numAttribs;
+    float* pTriBuffer;
+    float* pAttribs;
+    float* pUserClipBuffer;
+    uint32_t  numAttribs;
      TRI_FLAGS triFlags;
  };
  
@@ -104,33 +105,33 @@ struct CLEAR_DESC
      SWR_RECT rect;
      uint32_t attachmentMask;
      uint32_t renderTargetArrayIndex;
-    float clearRTColor[4];  // RGBA_32F
-    float clearDepth;   // [0..1]
-    uint8_t clearStencil;
+    float    clearRTColor[4]; // RGBA_32F
+    float    clearDepth;      // [0..1]
+    uint8_t  clearStencil;
  };
  
  struct DISCARD_INVALIDATE_TILES_DESC
  {
-    uint32_t attachmentMask;
-    SWR_RECT rect;
+    uint32_t       attachmentMask;
+    SWR_RECT       rect;
      SWR_TILE_STATE newTileState;
-    bool createNewTiles;
-    bool fullTilesOnly;
+    bool           createNewTiles;
+    bool           fullTilesOnly;
  };
  
  struct SYNC_DESC
  {
      PFN_CALLBACK_FUNC pfnCallbackFunc;
-    uint64_t userData;
-    uint64_t userData2;
-    uint64_t userData3;
+    uint64_t          userData;
+    uint64_t          userData2;
+    uint64_t          userData3;
  };
  
  struct STORE_TILES_DESC
  {
-    uint32_t attachmentMask;
+    uint32_t       attachmentMask;
      SWR_TILE_STATE postStoreTileState;
-    SWR_RECT rect;
+    SWR_RECT       rect;
  };
  
  struct COMPUTE_DESC
@@ -140,7 +141,10 @@ struct COMPUTE_DESC
      uint32_t threadGroupCountZ;
  };
  
-typedef void(*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc);
+typedef void (*PFN_WORK_FUNC)(DRAW_CONTEXT* pDC,
+                              uint32_t      workerId,
+                              uint32_t      macroTile,
+                              void*         pDesc);
  
  enum WORK_TYPE
  {
@@ -154,51 +158,55 @@ enum WORK_TYPE
  
  OSALIGNSIMD(struct) BE_WORK
  {
-    WORK_TYPE type;
+    WORK_TYPE     type;
      PFN_WORK_FUNC pfnWork;
      union
      {
-        SYNC_DESC sync;
-        TRIANGLE_WORK_DESC tri;
-        CLEAR_DESC clear;
+        SYNC_DESC                     sync;
+        TRIANGLE_WORK_DESC            tri;
+        CLEAR_DESC                    clear;
          DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
-        STORE_TILES_DESC storeTiles;
+        STORE_TILES_DESC              storeTiles;
      } desc;
  };
  
  struct DRAW_WORK
  {
-    DRAW_CONTEXT*   pDC;
+    DRAW_CONTEXT* pDC;
      union
      {
-        uint32_t   numIndices;      // DrawIndexed: Number of indices for draw.
-        uint32_t   numVerts;        // Draw: Number of verts (triangles, lines, etc)
+        uint32_t numIndices; // DrawIndexed: Number of indices for draw.
+        uint32_t numVerts;   // Draw: Number of verts (triangles, lines, etc)
      };
      union
      {
-        gfxptr_t   xpIB;              // DrawIndexed: App supplied int32 indices 
-        uint32_t   startVertex;    // Draw: Starting vertex in VB to render from.
+        gfxptr_t xpIB;        // DrawIndexed: App supplied int32 indices
+        uint32_t startVertex; // Draw: Starting vertex in VB to render from.
      };
-    int32_t    baseVertex;
-    uint32_t   numInstances;        // Number of instances
-    uint32_t   startInstance;       // Instance offset
-    uint32_t   startPrimID;         // starting primitiveID for this draw batch
-    uint32_t   startVertexID;       // starting VertexID for this draw batch (only needed for non-indexed draws)
-    SWR_FORMAT type;                // index buffer type
+    int32_t  baseVertex;
+    uint32_t numInstances;  // Number of instances
+    uint32_t startInstance; // Instance offset
+    uint32_t startPrimID;   // starting primitiveID for this draw batch
+    uint32_t
+               startVertexID; // starting VertexID for this draw batch (only needed for non-indexed draws)
+    SWR_FORMAT type;          // index buffer type
  };
  
-typedef void(*PFN_FE_WORK_FUNC)(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pDesc);
+typedef void (*PFN_FE_WORK_FUNC)(SWR_CONTEXT*  pContext,
+                                 DRAW_CONTEXT* pDC,
+                                 uint32_t      workerId,
+                                 void*         pDesc);
  struct FE_WORK
  {
-    WORK_TYPE type;
+    WORK_TYPE        type;
      PFN_FE_WORK_FUNC pfnWork;
      union
      {
-        SYNC_DESC sync;
-        DRAW_WORK draw;
-        CLEAR_DESC clear;
+        SYNC_DESC                     sync;
+        DRAW_WORK                     draw;
+        CLEAR_DESC                    clear;
          DISCARD_INVALIDATE_TILES_DESC discardInvalidateTiles;
-        STORE_TILES_DESC storeTiles;
+        STORE_TILES_DESC              storeTiles;
      } desc;
  };
  
@@ -213,13 +221,25 @@ struct GUARDBANDS
  struct PA_STATE;
  
  // function signature for pipeline stages that execute after primitive assembly
-typedef void(*PFN_PROCESS_PRIMS)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[], 
-    uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+typedef void (*PFN_PROCESS_PRIMS)(DRAW_CONTEXT*      pDC,
+                                  PA_STATE&          pa,
+                                  uint32_t           workerId,
+                                  simdvector         prims[],
+                                  uint32_t           primMask,
+                                  simdscalari const& primID,
+                                  simdscalari const& viewportIdx,
+                                  simdscalari const& rtIdx);
  
  #if ENABLE_AVX512_SIMD16
  // function signature for pipeline stages that execute after primitive assembly
-typedef void(SIMDCALL *PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[],
-    uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+typedef void(SIMDCALL* PFN_PROCESS_PRIMS_SIMD16)(DRAW_CONTEXT*        pDC,
+                                                 PA_STATE&            pa,
+                                                 uint32_t             workerId,
+                                                 simd16vector         prims[],
+                                                 uint32_t             primMask,
+                                                 simd16scalari const& primID,
+                                                 simd16scalari const& viewportIdx,
+                                                 simd16scalari const& rtIdx);
  
  #endif
  OSALIGNLINE(struct) API_STATE
@@ -228,86 +248,85 @@ OSALIGNLINE(struct) API_STATE
      SWR_VERTEX_BUFFER_STATE vertexBuffers[KNOB_NUM_STREAMS];
  
      // GS - Geometry Shader State
-    SWR_GS_STATE            gsState;
-    PFN_GS_FUNC             pfnGsFunc;
+    SWR_GS_STATE gsState;
+    PFN_GS_FUNC  pfnGsFunc;
  
      // FS - Fetch Shader State
-    PFN_FETCH_FUNC          pfnFetchFunc;
+    PFN_FETCH_FUNC pfnFetchFunc;
  
      // VS - Vertex Shader State
-    PFN_VERTEX_FUNC         pfnVertexFunc;
+    PFN_VERTEX_FUNC pfnVertexFunc;
  
      // Index Buffer
-    SWR_INDEX_BUFFER_STATE  indexBuffer;
+    SWR_INDEX_BUFFER_STATE indexBuffer;
  
      // CS - Compute Shader
-    PFN_CS_FUNC             pfnCsFunc;
-    uint32_t                totalThreadsInGroup;
-    uint32_t                totalSpillFillSize;
-    uint32_t                scratchSpaceSize;
-    uint32_t                scratchSpaceNumInstances;
+    PFN_CS_FUNC pfnCsFunc;
+    uint32_t    totalThreadsInGroup;
+    uint32_t    totalSpillFillSize;
+    uint32_t    scratchSpaceSize;
+    uint32_t    scratchSpaceNumInstances;
  
      // FE - Frontend State
-    SWR_FRONTEND_STATE      frontendState;
+    SWR_FRONTEND_STATE frontendState;
  
      // SOS - Streamout Shader State
-    PFN_SO_FUNC             pfnSoFunc[MAX_SO_STREAMS];
+    PFN_SO_FUNC pfnSoFunc[MAX_SO_STREAMS];
  
      // Streamout state
-    SWR_STREAMOUT_STATE     soState;
+    SWR_STREAMOUT_STATE          soState;
      mutable SWR_STREAMOUT_BUFFER soBuffer[MAX_SO_STREAMS];
  
      // Tessellation State
-    PFN_HS_FUNC             pfnHsFunc;
-    PFN_DS_FUNC             pfnDsFunc;
-    SWR_TS_STATE            tsState;
+    PFN_HS_FUNC  pfnHsFunc;
+    PFN_DS_FUNC  pfnDsFunc;
+    SWR_TS_STATE tsState;
  
      // Number of attributes used by the frontend (vs, so, gs)
-    uint32_t                feNumAttributes;
-
+    uint32_t feNumAttributes;
  
      // RS - Rasterizer State
-    SWR_RASTSTATE           rastState;
+    SWR_RASTSTATE rastState;
      // floating point multisample offsets
      float samplePos[SWR_MAX_NUM_MULTISAMPLES * 2];
  
-    GUARDBANDS               gbState;
+    GUARDBANDS gbState;
  
-    SWR_VIEWPORT            vp[KNOB_NUM_VIEWPORTS_SCISSORS];
-    SWR_VIEWPORT_MATRICES   vpMatrices;
+    SWR_VIEWPORT          vp[KNOB_NUM_VIEWPORTS_SCISSORS];
+    SWR_VIEWPORT_MATRICES vpMatrices;
  
-    SWR_RECT                scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
-    SWR_RECT                scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
-    bool                    scissorsTileAligned;
+    SWR_RECT scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
+    SWR_RECT scissorsInFixedPoint[KNOB_NUM_VIEWPORTS_SCISSORS];
+    bool     scissorsTileAligned;
  
-    bool                    forceFront;
-    PRIMITIVE_TOPOLOGY      topology;
+    bool               forceFront;
+    PRIMITIVE_TOPOLOGY topology;
  
  
      // Backend state
      OSALIGNLINE(SWR_BACKEND_STATE) backendState;
  
-    SWR_DEPTH_BOUNDS_STATE  depthBoundsState;
+    SWR_DEPTH_BOUNDS_STATE depthBoundsState;
  
      // PS - Pixel shader state
-    SWR_PS_STATE            psState;
+    SWR_PS_STATE psState;
  
      SWR_DEPTH_STENCIL_STATE depthStencilState;
  
      // OM - Output Merger State
-    SWR_BLEND_STATE         blendState;
-    PFN_BLEND_JIT_FUNC      pfnBlendFunc[SWR_NUM_RENDERTARGETS];
+    SWR_BLEND_STATE    blendState;
+    PFN_BLEND_JIT_FUNC pfnBlendFunc[SWR_NUM_RENDERTARGETS];
  
      struct
      {
-        uint32_t enableStatsFE : 1;             // Enable frontend pipeline stats
-        uint32_t enableStatsBE : 1;             // Enable backend pipeline stats
-        uint32_t colorHottileEnable : 8;        // Bitmask of enabled color hottiles
-        uint32_t depthHottileEnable: 1;         // Enable depth buffer hottile
-        uint32_t stencilHottileEnable : 1;      // Enable stencil buffer hottile
+        uint32_t enableStatsFE : 1;        // Enable frontend pipeline stats
+        uint32_t enableStatsBE : 1;        // Enable backend pipeline stats
+        uint32_t colorHottileEnable : 8;   // Bitmask of enabled color hottiles
+        uint32_t depthHottileEnable : 1;   // Enable depth buffer hottile
+        uint32_t stencilHottileEnable : 1; // Enable stencil buffer hottile
      };
  
-    PFN_QUANTIZE_DEPTH      pfnQuantizeDepth;
+    PFN_QUANTIZE_DEPTH pfnQuantizeDepth;
  };
  
  class MacroTileMgr;
@@ -343,13 +362,23 @@ struct BarycentricCoeffs
  };
  
  // pipeline function pointer types
-typedef void(*PFN_BACKEND_FUNC)(DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
-typedef void(*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT &, uint8_t* (&)[SWR_NUM_RENDERTARGETS], uint32_t, const SWR_BLEND_STATE*,
-                                 const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS], simdscalar&, simdscalar const &);
-typedef void(*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &);
-typedef void(*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
-typedef void(*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT &, const uint64_t *const, const uint32_t,
-                                              simdscalar const &, simdscalar const &);
+typedef void (*PFN_BACKEND_FUNC)(
+    DRAW_CONTEXT*, uint32_t, uint32_t, uint32_t, SWR_TRIANGLE_DESC&, RenderOutputBuffers&);
+typedef void (*PFN_OUTPUT_MERGER)(SWR_PS_CONTEXT&,
+                                  uint8_t* (&)[SWR_NUM_RENDERTARGETS],
+                                  uint32_t,
+                                  const SWR_BLEND_STATE*,
+                                  const PFN_BLEND_JIT_FUNC (&)[SWR_NUM_RENDERTARGETS],
+                                  simdscalar&,
+                                  simdscalar const&);
+typedef void (*PFN_CALC_PIXEL_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
+typedef void (*PFN_CALC_SAMPLE_BARYCENTRICS)(const BarycentricCoeffs&, SWR_PS_CONTEXT&);
+typedef void (*PFN_CALC_CENTROID_BARYCENTRICS)(const BarycentricCoeffs&,
+                                               SWR_PS_CONTEXT&,
+                                               const uint64_t* const,
+                                               const uint32_t,
+                                               simdscalar const&,
+                                               simdscalar const&);
  
  struct BACKEND_FUNCS
  {
@@ -361,16 +390,16 @@ struct DRAW_STATE
  {
      API_STATE state;
  
-    void* pPrivateState;  // Its required the driver sets this up for each draw.
+    void* pPrivateState; // Its required the driver sets this up for each draw.
  
      // pipeline function pointers, filled in by API thread when setting up the draw
-    BACKEND_FUNCS backendFuncs;
+    BACKEND_FUNCS     backendFuncs;
      PFN_PROCESS_PRIMS pfnProcessPrims;
  #if USE_SIMD16_FRONTEND
      PFN_PROCESS_PRIMS_SIMD16 pfnProcessPrims_simd16;
  #endif
  
-    CachingArena* pArena;     // This should only be used by API thread.
+    CachingArena* pArena; // This should only be used by API thread.
  };
  
  struct DRAW_DYNAMIC_STATE
@@ -386,7 +415,7 @@ struct DRAW_DYNAMIC_STATE
      uint32_t SoWriteOffset[4];
      bool     SoWriteOffsetDirty[4];
  
-    SWR_STATS_FE statsFE;   // Only one FE thread per DC.
+    SWR_STATS_FE statsFE; // Only one FE thread per DC.
      SWR_STATS*   pStats;
  };
  
@@ -395,30 +424,30 @@ struct DRAW_DYNAMIC_STATE
  //    This draw context maintains all of the state needed for the draw operation.
  struct DRAW_CONTEXT
  {
-    SWR_CONTEXT*    pContext;
+    SWR_CONTEXT* pContext;
      union
      {
-        MacroTileMgr*   pTileMgr;
-        DispatchQueue*  pDispatch;      // Queue for thread groups. (isCompute)
+        MacroTileMgr*  pTileMgr;
+        DispatchQueue* pDispatch; // Queue for thread groups. (isCompute)
      };
-    DRAW_STATE*     pState;             // Read-only state. Core should not update this outside of API thread.
-    CachingArena*   pArena;
+    DRAW_STATE*   pState; // Read-only state. Core should not update this outside of API thread.
+    CachingArena* pArena;
  
-    uint32_t        drawId;
-    bool            dependentFE;    // Frontend work is dependent on all previous FE
-    bool            dependent;      // Backend work is dependent on all previous BE
-    bool            isCompute;      // Is this DC a compute context?
-    bool            cleanupState;   // True if this is the last draw using an entry in the state ring.
+    uint32_t drawId;
+    bool     dependentFE;  // Frontend work is dependent on all previous FE
+    bool     dependent;    // Backend work is dependent on all previous BE
+    bool     isCompute;    // Is this DC a compute context?
+    bool     cleanupState; // True if this is the last draw using an entry in the state ring.
  
-    FE_WORK         FeWork;
+    FE_WORK FeWork;
  
-    SYNC_DESC       retireCallback; // Call this func when this DC is retired.
+    SYNC_DESC retireCallback; // Call this func when this DC is retired.
  
      DRAW_DYNAMIC_STATE dynState;
  
-    volatile OSALIGNLINE(bool)       doneFE;         // Is FE work done for this draw?
-    volatile OSALIGNLINE(uint32_t)   FeLock;
-    volatile OSALIGNLINE(uint32_t)   threadsDone;
+    volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
+    volatile OSALIGNLINE(uint32_t) FeLock;
+    volatile OSALIGNLINE(uint32_t) threadsDone;
  };
  
  static_assert((sizeof(DRAW_CONTEXT) & 63) == 0, "Invalid size for DRAW_CONTEXT");
@@ -444,14 +473,14 @@ class HotTileMgr;
  struct SWR_CONTEXT
  {
      // Draw Context Ring
-    //  Each draw needs its own state in order to support mulitple draws in flight across multiple threads.
-    //  We maintain N draw contexts configured as a ring. The size of the ring limits the maximum number
-    //  of draws that can be in flight at any given time.
+    //  Each draw needs its own state in order to support mulitple draws in flight across multiple
+    //  threads. We maintain N draw contexts configured as a ring. The size of the ring limits the
+    //  maximum number of draws that can be in flight at any given time.
      //
      //  Description:
      //  1. State - When an application first sets state we'll request a new draw context to use.
-    //     a. If there are no available draw contexts then we'll have to wait until one becomes free.
-    //     b. If one is available then set pCurDrawContext to point to it and mark it in use.
+    //     a. If there are no available draw contexts then we'll have to wait until one becomes
+    //     free. b. If one is available then set pCurDrawContext to point to it and mark it in use.
      //     c. All state calls set state on pCurDrawContext.
      //  2. Draw - Creates submits a work item that is associated with current draw context.
      //     a. Set pPrevDrawContext = pCurDrawContext
@@ -461,10 +490,11 @@ struct SWR_CONTEXT
      //     b. State is copied from prev draw context to current.
      RingBuffer<DRAW_CONTEXT> dcRing;
  
-    DRAW_CONTEXT *pCurDrawContext;    // This points to DC entry in ring for an unsubmitted draw.
-    DRAW_CONTEXT *pPrevDrawContext;   // This points to DC entry for the previous context submitted that we can copy state from.
+    DRAW_CONTEXT* pCurDrawContext;  // This points to DC entry in ring for an unsubmitted draw.
+    DRAW_CONTEXT* pPrevDrawContext; // This points to DC entry for the previous context submitted
+                                    // that we can copy state from.
  
-    MacroTileMgr* pMacroTileManagerArray;
+    MacroTileMgr*  pMacroTileManagerArray;
      DispatchQueue* pDispatchQueueArray;
  
      // Draw State Ring
@@ -474,33 +504,33 @@ struct SWR_CONTEXT
      //  to reference a single entry in the DS ring.
      RingBuffer<DRAW_STATE> dsRing;
  
-    uint32_t curStateId;               // Current index to the next available entry in the DS ring.
+    uint32_t curStateId; // Current index to the next available entry in the DS ring.
  
      uint32_t NumWorkerThreads;
      uint32_t NumFEThreads;
      uint32_t NumBEThreads;
  
-    THREAD_POOL threadPool; // Thread pool associated with this context
-    SWR_THREADING_INFO threadInfo;
-    SWR_API_THREADING_INFO apiThreadInfo;
+    THREAD_POOL              threadPool; // Thread pool associated with this context
+    SWR_THREADING_INFO       threadInfo;
+    SWR_API_THREADING_INFO   apiThreadInfo;
      SWR_WORKER_PRIVATE_STATE workerPrivateState;
  
      uint32_t MAX_DRAWS_IN_FLIGHT;
  
      std::condition_variable FifosNotEmpty;
-    std::mutex WaitLock;
+    std::mutex              WaitLock;
  
      uint32_t privateStateSize;
  
-    HotTileMgr *pHotTileMgr;
+    HotTileMgr* pHotTileMgr;
  
      // Callback functions, passed in at create context time
-    PFN_LOAD_TILE               pfnLoadTile;
-    PFN_STORE_TILE              pfnStoreTile;
-    PFN_CLEAR_TILE              pfnClearTile;
-    PFN_UPDATE_SO_WRITE_OFFSET  pfnUpdateSoWriteOffset;
-    PFN_UPDATE_STATS            pfnUpdateStats;
-    PFN_UPDATE_STATS_FE         pfnUpdateStatsFE;
+    PFN_LOAD_TILE              pfnLoadTile;
+    PFN_STORE_TILE             pfnStoreTile;
+    PFN_CLEAR_TILE             pfnClearTile;
+    PFN_UPDATE_SO_WRITE_OFFSET pfnUpdateSoWriteOffset;
+    PFN_UPDATE_STATS           pfnUpdateStats;
+    PFN_UPDATE_STATS_FE        pfnUpdateStatsFE;
  
  
      // Global Stats
@@ -509,7 +539,7 @@ struct SWR_CONTEXT
      // Scratch space for workers.
      uint8_t** ppScratch;
  
-    volatile OSALIGNLINE(uint32_t)  drawsOutstandingFE;
+    volatile OSALIGNLINE(uint32_t) drawsOutstandingFE;
  
      OSALIGNLINE(CachingAllocator) cachingArenaAllocator;
      uint32_t frameCount;
@@ -522,27 +552,35 @@ struct SWR_CONTEXT
      HANDLE* pArContext;
  };
  
-#define UPDATE_STAT_BE(name, count) if (GetApiState(pDC).enableStatsBE) { pDC->dynState.pStats[workerId].name += count; }
-#define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStatsFE) { pDC->dynState.statsFE.name += count; }
+#define UPDATE_STAT_BE(name, count)                   \
+    if (GetApiState(pDC).enableStatsBE)               \
+    {                                                 \
+        pDC->dynState.pStats[workerId].name += count; \
+    }
+#define UPDATE_STAT_FE(name, count)          \
+    if (GetApiState(pDC).enableStatsFE)      \
+    {                                        \
+        pDC->dynState.statsFE.name += count; \
+    }
  
  // ArchRast instrumentation framework
-#define AR_WORKER_CTX  pDC->pContext->pArContext[workerId]
-#define AR_API_CTX     pDC->pContext->pArContext[pContext->NumWorkerThreads]
+#define AR_WORKER_CTX pDC->pContext->pArContext[workerId]
+#define AR_API_CTX pDC->pContext->pArContext[pContext->NumWorkerThreads]
  
  #ifdef KNOB_ENABLE_RDTSC
  #define RDTSC_BEGIN(type, drawid) RDTSC_START(type)
-#define RDTSC_END(type, count)   RDTSC_STOP(type, count, 0)
+#define RDTSC_END(type, count) RDTSC_STOP(type, count, 0)
  #else
  #define RDTSC_BEGIN(type, count)
  #define RDTSC_END(type, count)
  #endif
  
  #ifdef KNOB_ENABLE_AR
-    #define _AR_EVENT(ctx, event)       ArchRast::Dispatch(ctx, ArchRast::event)
-    #define _AR_FLUSH(ctx, id)          ArchRast::FlushDraw(ctx, id)
+#define _AR_EVENT(ctx, event) ArchRast::Dispatch(ctx, ArchRast::event)
+#define _AR_FLUSH(ctx, id) ArchRast::FlushDraw(ctx, id)
  #else
-    #define _AR_EVENT(ctx, event)
-    #define _AR_FLUSH(ctx, id)
+#define _AR_EVENT(ctx, event)
+#define _AR_FLUSH(ctx, id)
  #endif
  
  // Use these macros for api thread.
diff --git a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h

index fafc36d1de681256dc6499ab168ea5beb2935c27..54a3489205a33f42f10b3976ab0e904cd5ac96c9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
+++ b/src/gallium/drivers/swr/rasterizer/core/depthstencil.h
@@ -1,36 +1,39 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file depthstencil.h
-*
-* @brief Implements depth/stencil functionality
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file depthstencil.h
+ *
+ * @brief Implements depth/stencil functionality
+ *
+ ******************************************************************************/
  #pragma once
  #include "common/os.h"
  #include "format_conversion.h"
  
  INLINE
-void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps)
+void StencilOp(SWR_STENCILOP     op,
+               simdscalar const& mask,
+               simdscalar const& stencilRefps,
+               simdscalar&       stencilps)
  {
      simdscalari stencil = _simd_castps_si(stencilps);
  
@@ -47,30 +50,31 @@ void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stenc
      case STENCILOP_INCRSAT:
      {
          simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
-        stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
+        stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
          break;
      }
      case STENCILOP_DECRSAT:
      {
          simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
-        stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
+        stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
          break;
      }
      case STENCILOP_INCR:
      {
          simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
-        stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
+        stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
          break;
      }
      case STENCILOP_DECR:
      {
          simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
-        stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
+        stencilps               = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
          break;
      }
      case STENCILOP_INVERT:
      {
-        simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
+        simdscalar stencilinvert =
+            _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
          stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
          break;
      }
@@ -79,12 +83,11 @@ void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stenc
      }
  }
  
-
-template<SWR_FORMAT depthFormatT>
-simdscalar QuantizeDepth(simdscalar const &depth)
+template <SWR_FORMAT depthFormatT>
+simdscalar QuantizeDepth(simdscalar const& depth)
  {
      SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
-    uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
+    uint32_t depthBpc  = FormatTraits<depthFormatT>::GetBPC(0);
  
      if (depthType == SWR_TYPE_FLOAT)
      {
@@ -98,11 +101,11 @@ simdscalar QuantizeDepth(simdscalar const &depth)
      // should be unorm depth if not float
      SWR_ASSERT(depthType == SWR_TYPE_UNORM);
  
-    float quantize = (float)((1 << depthBpc) - 1);
-    simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
-    result = _simd_add_ps(result, _simd_set1_ps(0.5f));
-    result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
-    
+    float      quantize = (float)((1 << depthBpc) - 1);
+    simdscalar result   = _simd_mul_ps(depth, _simd_set1_ps(quantize));
+    result              = _simd_add_ps(result, _simd_set1_ps(0.5f));
+    result              = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
+
      if (depthBpc > 16)
      {
          result = _simd_div_ps(result, _simd_set1_ps(quantize));
@@ -116,42 +119,62 @@ simdscalar QuantizeDepth(simdscalar const &depth)
  }
  
  INLINE
-simdscalar DepthStencilTest(const API_STATE* pState,
-                 bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask,
-                 uint8_t *pStencilBase, simdscalar* pStencilMask)
+simdscalar DepthStencilTest(const API_STATE*  pState,
+                            bool              frontFacing,
+                            uint32_t          viewportIndex,
+                            simdscalar const& iZ,
+                            uint8_t*          pDepthBase,
+                            simdscalar const& coverageMask,
+                            uint8_t*          pStencilBase,
+                            simdscalar*       pStencilMask)
  {
      static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
      static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
  
-    const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
-    const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
+    const SWR_DEPTH_STENCIL_STATE* pDSState  = &pState->depthStencilState;
+    const SWR_VIEWPORT*            pViewport = &pState->vp[viewportIndex];
  
      simdscalar depthResult = _simd_set1_ps(-1.0f);
      simdscalar zbuf;
  
      // clamp Z to viewport [minZ..maxZ]
-    simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
-    simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
+    simdscalar vMinZ   = _simd_broadcast_ss(&pViewport->minZ);
+    simdscalar vMaxZ   = _simd_broadcast_ss(&pViewport->maxZ);
      simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
-    
+
      if (pDSState->depthTestEnable)
      {
          switch (pDSState->depthTestFunc)
          {
-        case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break;
-        case ZFUNC_ALWAYS: break;
+        case ZFUNC_NEVER:
+            depthResult = _simd_setzero_ps();
+            break;
+        case ZFUNC_ALWAYS:
+            break;
          default:
              zbuf = _simd_load_ps((const float*)pDepthBase);
          }
  
          switch (pDSState->depthTestFunc)
          {
-        case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break;
-        case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break;
-        case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break;
-        case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break;
-        case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break;
-        case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break;
+        case ZFUNC_LE:
+            depthResult = _simd_cmple_ps(interpZ, zbuf);
+            break;
+        case ZFUNC_LT:
+            depthResult = _simd_cmplt_ps(interpZ, zbuf);
+            break;
+        case ZFUNC_GT:
+            depthResult = _simd_cmpgt_ps(interpZ, zbuf);
+            break;
+        case ZFUNC_GE:
+            depthResult = _simd_cmpge_ps(interpZ, zbuf);
+            break;
+        case ZFUNC_EQ:
+            depthResult = _simd_cmpeq_ps(interpZ, zbuf);
+            break;
+        case ZFUNC_NE:
+            depthResult = _simd_cmpneq_ps(interpZ, zbuf);
+            break;
          }
      }
  
@@ -159,9 +182,9 @@ simdscalar DepthStencilTest(const API_STATE* pState,
  
      if (pDSState->stencilTestEnable)
      {
-        uint8_t stencilRefValue;
+        uint8_t  stencilRefValue;
          uint32_t stencilTestFunc;
-        uint8_t stencilTestMask;
+        uint8_t  stencilTestMask;
          if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
          {
              stencilRefValue = pDSState->stencilRefValue;
@@ -178,15 +201,19 @@ simdscalar DepthStencilTest(const API_STATE* pState,
          simdvector sbuf;
          simdscalar stencilWithMask;
          simdscalar stencilRef;
-        switch(stencilTestFunc)
+        switch (stencilTestFunc)
          {
-        case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break;
-        case ZFUNC_ALWAYS: break;
+        case ZFUNC_NEVER:
+            stencilMask = _simd_setzero_ps();
+            break;
+        case ZFUNC_ALWAYS:
+            break;
          default:
              LoadSOA<R8_UINT>(pStencilBase, sbuf);
-            
+
              // apply stencil read mask
-            stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
+            stencilWithMask = _simd_castsi_ps(
+                _simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
  
              // do stencil compare in float to avoid simd integer emulation in AVX1
              stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
@@ -195,34 +222,52 @@ simdscalar DepthStencilTest(const API_STATE* pState,
              break;
          }
  
-        switch(stencilTestFunc)
+        switch (stencilTestFunc)
          {
-        case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break;
-        case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break;
-        case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break;
-        case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break;
-        case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break;
-        case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break;
+        case ZFUNC_LE:
+            stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask);
+            break;
+        case ZFUNC_LT:
+            stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask);
+            break;
+        case ZFUNC_GT:
+            stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask);
+            break;
+        case ZFUNC_GE:
+            stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask);
+            break;
+        case ZFUNC_EQ:
+            stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask);
+            break;
+        case ZFUNC_NE:
+            stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask);
+            break;
          }
      }
  
      simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
-    depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
+    depthWriteMask            = _simd_and_ps(depthWriteMask, coverageMask);
  
      *pStencilMask = stencilMask;
      return depthWriteMask;
  }
  
  INLINE
-void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
-        bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
-        uint8_t *pStencilBase, const simdscalar& stencilMask)
+void DepthStencilWrite(const SWR_VIEWPORT*            pViewport,
+                       const SWR_DEPTH_STENCIL_STATE* pDSState,
+                       bool                           frontFacing,
+                       simdscalar const&              iZ,
+                       uint8_t*                       pDepthBase,
+                       const simdscalar&              depthMask,
+                       const simdscalar&              coverageMask,
+                       uint8_t*                       pStencilBase,
+                       const simdscalar&              stencilMask)
  {
      if (pDSState->depthWriteEnable)
      {
          // clamp Z to viewport [minZ..maxZ]
-        simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
-        simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
+        simdscalar vMinZ   = _simd_broadcast_ss(&pViewport->minZ);
+        simdscalar vMaxZ   = _simd_broadcast_ss(&pViewport->maxZ);
          simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
  
          simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
@@ -235,49 +280,56 @@ void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_ST
          LoadSOA<R8_UINT>(pStencilBase, sbuf);
          simdscalar stencilbuf = sbuf.v[0];
  
-        uint8_t stencilRefValue;
+        uint8_t  stencilRefValue;
          uint32_t stencilFailOp;
          uint32_t stencilPassDepthPassOp;
          uint32_t stencilPassDepthFailOp;
-        uint8_t stencilWriteMask;
+        uint8_t  stencilWriteMask;
          if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
          {
-            stencilRefValue = pDSState->stencilRefValue;
-            stencilFailOp = pDSState->stencilFailOp;
+            stencilRefValue        = pDSState->stencilRefValue;
+            stencilFailOp          = pDSState->stencilFailOp;
              stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
              stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
-            stencilWriteMask = pDSState->stencilWriteMask;
+            stencilWriteMask       = pDSState->stencilWriteMask;
          }
          else
          {
-            stencilRefValue = pDSState->backfaceStencilRefValue;
-            stencilFailOp = pDSState->backfaceStencilFailOp;
+            stencilRefValue        = pDSState->backfaceStencilRefValue;
+            stencilFailOp          = pDSState->backfaceStencilFailOp;
              stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
              stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
-            stencilWriteMask = pDSState->backfaceStencilWriteMask;
+            stencilWriteMask       = pDSState->backfaceStencilWriteMask;
          }
  
-        simdscalar stencilps = stencilbuf;
+        simdscalar stencilps    = stencilbuf;
          simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
  
-        simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
+        simdscalar stencilFailMask          = _simd_andnot_ps(stencilMask, coverageMask);
          simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
-        simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
+        simdscalar stencilPassDepthFailMask =
+            _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
  
          simdscalar origStencil = stencilps;
  
          StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
-        StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps);
-        StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps);
+        StencilOp((SWR_STENCILOP)stencilPassDepthFailOp,
+                  stencilPassDepthFailMask,
+                  stencilRefps,
+                  stencilps);
+        StencilOp((SWR_STENCILOP)stencilPassDepthPassOp,
+                  stencilPassDepthPassMask,
+                  stencilRefps,
+                  stencilps);
  
          // apply stencil write mask
          simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
-        stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
-        stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
+        stencilps              = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
+        stencilps =
+            _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
  
          simdvector stencilResult;
          stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
          StoreSOA<R8_UINT>(stencilResult, pStencilBase);
      }
-
  }
diff --git a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp b/src/gallium/drivers/swr/rasterizer/core/fifo.hpp

index 43d3a8322678edc0a9c239f2109214d959febe0c..9a9cc2635df389f666c2af5a4afffd528cf9ddf4 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/fifo.hpp
+++ b/src/gallium/drivers/swr/rasterizer/core/fifo.hpp
@@ -1,53 +1,52 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file fifo.hpp
-*
-* @brief Definitions for our fifos used for thread communication.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file fifo.hpp
+ *
+ * @brief Definitions for our fifos used for thread communication.
+ *
+ ******************************************************************************/
  #pragma once
  
-
  #include "common/os.h"
  #include "arena.h"
  
  #include <vector>
  #include <cassert>
  
-template<class T>
+template <class T>
  struct QUEUE
  {
-    OSALIGNLINE(volatile uint32_t) mLock{ 0 };
-    OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
+    OSALIGNLINE(volatile uint32_t) mLock{0};
+    OSALIGNLINE(volatile uint32_t) mNumEntries{0};
      std::vector<T*> mBlocks;
-    T* mCurBlock{ nullptr };
-    uint32_t mHead{ 0 };
-    uint32_t mTail{ 0 };
-    uint32_t mCurBlockIdx{ 0 };
+    T*              mCurBlock{nullptr};
+    uint32_t        mHead{0};
+    uint32_t        mTail{0};
+    uint32_t        mCurBlockIdx{0};
  
      // power of 2
      static const uint32_t mBlockSizeShift = 6;
-    static const uint32_t mBlockSize = 1 << mBlockSizeShift;
+    static const uint32_t mBlockSize      = 1 << mBlockSizeShift;
  
      template <typename ArenaT>
      void clear(ArenaT& arena)
@@ -55,18 +54,15 @@ struct QUEUE
          mHead = 0;
          mTail = 0;
          mBlocks.clear();
-        T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
+        T* pNewBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
          mBlocks.push_back(pNewBlock);
-        mCurBlock = pNewBlock;
+        mCurBlock    = pNewBlock;
          mCurBlockIdx = 0;
-        mNumEntries = 0;
-        mLock = 0;
+        mNumEntries  = 0;
+        mLock        = 0;
      }
  
-    uint32_t getNumQueued()
-    {
-        return mNumEntries;
-    }
+    uint32_t getNumQueued() { return mNumEntries; }
  
      bool tryLock()
      {
@@ -80,10 +76,7 @@ struct QUEUE
          return (initial == 0);
      }
  
-    void unlock()
-    {
-        mLock = 0;
-    }
+    void unlock() { mLock = 0; }
  
      T* peek()
      {
@@ -92,34 +85,33 @@ struct QUEUE
              return nullptr;
          }
          uint32_t block = mHead >> mBlockSizeShift;
-        return &mBlocks[block][mHead & (mBlockSize-1)];
+        return &mBlocks[block][mHead & (mBlockSize - 1)];
      }
  
      void dequeue_noinc()
      {
-        mHead ++;
-        mNumEntries --;
+        mHead++;
+        mNumEntries--;
      }
  
      template <typename ArenaT>
      bool enqueue_try_nosync(ArenaT& arena, const T* entry)
      {
          const float* pSrc = (const float*)entry;
-        float* pDst = (float*)&mCurBlock[mTail];
+        float*       pDst = (float*)&mCurBlock[mTail];
  
-        auto lambda = [&](int32_t i)
-        {
-            __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
-            _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
+        auto lambda = [&](int32_t i) {
+            __m256 vSrc = _mm256_load_ps(pSrc + i * KNOB_SIMD_WIDTH);
+            _mm256_stream_ps(pDst + i * KNOB_SIMD_WIDTH, vSrc);
          };
  
-        const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
+        const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH * 4);
          static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
-            "FIFO element size should be multiple of SIMD width.");
+                      "FIFO element size should be multiple of SIMD width.");
  
          UnrollerL<0, numSimdLines, 1>::step(lambda);
  
-        mTail ++;
+        mTail++;
          if (mTail == mBlockSize)
          {
              if (++mCurBlockIdx < mBlocks.size())
@@ -128,7 +120,7 @@ struct QUEUE
              }
              else
              {
-                T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
+                T* newBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
                  SWR_ASSERT(newBlock);
  
                  mBlocks.push_back(newBlock);
@@ -138,12 +130,9 @@ struct QUEUE
              mTail = 0;
          }
  
-        mNumEntries ++;
+        mNumEntries++;
          return true;
      }
  
-    void destroy()
-    {
-    }
-
+    void destroy() {}
  };
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_conversion.h b/src/gallium/drivers/swr/rasterizer/core/format_conversion.h

index 72843f59062385a7b81c85cbfb6fc60643ef22d4..90bf118727e8c0dac8f3794268e0914f6f20c3d6 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/format_conversion.h
+++ b/src/gallium/drivers/swr/rasterizer/core/format_conversion.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file format_conversion.h
-*
-* @brief API implementation
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file format_conversion.h
+ *
+ * @brief API implementation
+ *
+ ******************************************************************************/
  #include "format_types.h"
  #include "format_traits.h"
  
@@ -33,15 +33,15 @@
  ///        SOA RGBA32_FLOAT format.
  /// @param pSrc - source data in SOA form
  /// @param dst - output data in SOA form
-template<SWR_FORMAT SrcFormat>
-INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst)
+template <SWR_FORMAT SrcFormat>
+INLINE void LoadSOA(const uint8_t* pSrc, simdvector& dst)
  {
      // fast path for float32
-    if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
+    if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+        (FormatTraits<SrcFormat>::GetBPC(0) == 32))
      {
-        auto lambda = [&](int comp)
-        {
-            simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp*sizeof(simdscalar)));
+        auto lambda = [&](int comp) {
+            simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp * sizeof(simdscalar)));
  
              dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
          };
@@ -50,8 +50,7 @@ INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst)
          return;
      }
  
-    auto lambda = [&](int comp)
-    {
+    auto lambda = [&](int comp) {
          // load SIMD components
          simdscalar vComp = FormatTraits<SrcFormat>::loadSOA(comp, pSrc);
  
@@ -74,12 +73,12 @@ INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst)
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Clamps the given component based on the requirements on the 
+/// @brief Clamps the given component based on the requirements on the
  ///        Format template arg
  /// @param vComp - SIMD vector of floats
  /// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simdscalar Clamp(simdscalar const &vC, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simdscalar Clamp(simdscalar const& vC, uint32_t Component)
  {
      simdscalar vComp = vC;
      if (FormatTraits<Format>::isNormalized(Component))
@@ -99,21 +98,21 @@ INLINE simdscalar Clamp(simdscalar const &vC, uint32_t Component)
      {
          if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
          {
-            int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
-            int iMin = 0;
+            int         iMax   = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
+            int         iMin   = 0;
              simdscalari vCompi = _simd_castps_si(vComp);
-            vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin));
-            vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax));
-            vComp = _simd_castsi_ps(vCompi);
+            vCompi             = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin));
+            vCompi             = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax));
+            vComp              = _simd_castsi_ps(vCompi);
          }
          else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
          {
-            int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
-            int iMin = -1 - iMax;
+            int         iMax   = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
+            int         iMin   = -1 - iMax;
              simdscalari vCompi = _simd_castps_si(vComp);
-            vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin));
-            vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax));
-            vComp = _simd_castsi_ps(vCompi);
+            vCompi             = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin));
+            vCompi             = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax));
+            vComp              = _simd_castsi_ps(vCompi);
          }
      }
  
@@ -125,8 +124,8 @@ INLINE simdscalar Clamp(simdscalar const &vC, uint32_t Component)
  ///        Format template arg
  /// @param vComp - SIMD vector of floats
  /// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simdscalar Normalize(simdscalar const &vC, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simdscalar Normalize(simdscalar const& vC, uint32_t Component)
  {
      simdscalar vComp = vC;
      if (FormatTraits<Format>::isNormalized(Component))
@@ -142,11 +141,12 @@ INLINE simdscalar Normalize(simdscalar const &vC, uint32_t Component)
  ///        RGBA32_FLOAT to SOA format
  /// @param src - source data in SOA form
  /// @param dst - output data in SOA form
-template<SWR_FORMAT DstFormat>
-INLINE void StoreSOA(const simdvector &src, uint8_t *pDst)
+template <SWR_FORMAT DstFormat>
+INLINE void StoreSOA(const simdvector& src, uint8_t* pDst)
  {
      // fast path for float32
-    if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
+    if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+        (FormatTraits<DstFormat>::GetBPC(0) == 32))
      {
          for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
          {
@@ -155,25 +155,24 @@ INLINE void StoreSOA(const simdvector &src, uint8_t *pDst)
              // Gamma-correct
              if (FormatTraits<DstFormat>::isSRGB)
              {
-                if (comp < 3)  // Input format is always RGBA32_FLOAT.
+                if (comp < 3) // Input format is always RGBA32_FLOAT.
                  {
                      vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
                  }
              }
  
-            _simd_store_ps((float*)(pDst + comp*sizeof(simdscalar)), vComp);
+            _simd_store_ps((float*)(pDst + comp * sizeof(simdscalar)), vComp);
          }
          return;
      }
  
-    auto lambda = [&](int comp)
-    {
+    auto lambda = [&](int comp) {
          simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
  
          // Gamma-correct
          if (FormatTraits<DstFormat>::isSRGB)
          {
-            if (comp < 3)  // Input format is always RGBA32_FLOAT.
+            if (comp < 3) // Input format is always RGBA32_FLOAT.
              {
                  vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
              }
@@ -203,15 +202,16 @@ INLINE void StoreSOA(const simdvector &src, uint8_t *pDst)
  ///        SOA RGBA32_FLOAT format.
  /// @param pSrc - source data in SOA form
  /// @param dst - output data in SOA form
-template<SWR_FORMAT SrcFormat>
-INLINE void SIMDCALL LoadSOA(const uint8_t *pSrc, simd16vector &dst)
+template <SWR_FORMAT SrcFormat>
+INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simd16vector& dst)
  {
      // fast path for float32
-    if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
+    if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+        (FormatTraits<SrcFormat>::GetBPC(0) == 32))
      {
-        auto lambda = [&](int comp)
-        {
-            simd16scalar vComp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc + comp * sizeof(simd16scalar)));
+        auto lambda = [&](int comp) {
+            simd16scalar vComp =
+                _simd16_load_ps(reinterpret_cast<const float*>(pSrc + comp * sizeof(simd16scalar)));
  
              dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
          };
@@ -220,8 +220,7 @@ INLINE void SIMDCALL LoadSOA(const uint8_t *pSrc, simd16vector &dst)
          return;
      }
  
-    auto lambda = [&](int comp)
-    {
+    auto lambda = [&](int comp) {
          // load SIMD components
          simd16scalar vComp = FormatTraits<SrcFormat>::loadSOA_16(comp, pSrc);
  
@@ -244,12 +243,12 @@ INLINE void SIMDCALL LoadSOA(const uint8_t *pSrc, simd16vector &dst)
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Clamps the given component based on the requirements on the 
+/// @brief Clamps the given component based on the requirements on the
  ///        Format template arg
  /// @param vComp - SIMD vector of floats
  /// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simd16scalar SIMDCALL Clamp(simd16scalar const &v, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simd16scalar SIMDCALL Clamp(simd16scalar const& v, uint32_t Component)
  {
      simd16scalar vComp = v;
      if (FormatTraits<Format>::isNormalized(Component))
@@ -269,21 +268,21 @@ INLINE simd16scalar SIMDCALL Clamp(simd16scalar const &v, uint32_t Component)
      {
          if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
          {
-            int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
-            int iMin = 0;
+            int           iMax   = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
+            int           iMin   = 0;
              simd16scalari vCompi = _simd16_castps_si(vComp);
-            vCompi = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin));
-            vCompi = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax));
-            vComp = _simd16_castsi_ps(vCompi);
+            vCompi               = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin));
+            vCompi               = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax));
+            vComp                = _simd16_castsi_ps(vCompi);
          }
          else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
          {
-            int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
-            int iMin = -1 - iMax;
+            int           iMax   = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
+            int           iMin   = -1 - iMax;
              simd16scalari vCompi = _simd16_castps_si(vComp);
-            vCompi = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin));
-            vCompi = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax));
-            vComp = _simd16_castsi_ps(vCompi);
+            vCompi               = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin));
+            vCompi               = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax));
+            vComp                = _simd16_castsi_ps(vCompi);
          }
      }
  
@@ -295,8 +294,8 @@ INLINE simd16scalar SIMDCALL Clamp(simd16scalar const &v, uint32_t Component)
  ///        Format template arg
  /// @param vComp - SIMD vector of floats
  /// @param Component - component
-template<SWR_FORMAT Format>
-INLINE simd16scalar SIMDCALL Normalize(simd16scalar const &vComp, uint32_t Component)
+template <SWR_FORMAT Format>
+INLINE simd16scalar SIMDCALL Normalize(simd16scalar const& vComp, uint32_t Component)
  {
      simd16scalar r = vComp;
      if (FormatTraits<Format>::isNormalized(Component))
@@ -312,11 +311,12 @@ INLINE simd16scalar SIMDCALL Normalize(simd16scalar const &vComp, uint32_t Compo
  ///        RGBA32_FLOAT to SOA format
  /// @param src - source data in SOA form
  /// @param dst - output data in SOA form
-template<SWR_FORMAT DstFormat>
-INLINE void SIMDCALL StoreSOA(const simd16vector &src, uint8_t *pDst)
+template <SWR_FORMAT DstFormat>
+INLINE void SIMDCALL StoreSOA(const simd16vector& src, uint8_t* pDst)
  {
      // fast path for float32
-    if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
+    if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
+        (FormatTraits<DstFormat>::GetBPC(0) == 32))
      {
          for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
          {
@@ -325,25 +325,24 @@ INLINE void SIMDCALL StoreSOA(const simd16vector &src, uint8_t *pDst)
              // Gamma-correct
              if (FormatTraits<DstFormat>::isSRGB)
              {
-                if (comp < 3)  // Input format is always RGBA32_FLOAT.
+                if (comp < 3) // Input format is always RGBA32_FLOAT.
                  {
                      vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
                  }
              }
  
-            _simd16_store_ps(reinterpret_cast<float *>(pDst + comp * sizeof(simd16scalar)), vComp);
+            _simd16_store_ps(reinterpret_cast<float*>(pDst + comp * sizeof(simd16scalar)), vComp);
          }
          return;
      }
  
-    auto lambda = [&](int comp)
-    {
+    auto lambda = [&](int comp) {
          simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
  
          // Gamma-correct
          if (FormatTraits<DstFormat>::isSRGB)
          {
-            if (comp < 3)  // Input format is always RGBA32_FLOAT.
+            if (comp < 3) // Input format is always RGBA32_FLOAT.
              {
                  vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
              }
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_traits.h b/src/gallium/drivers/swr/rasterizer/core/format_traits.h

index bc585dd175a1edf8f8e04aae983c64f28bf1d273..97e7d56e48ec6a3406a81b854f580fc7710418ac 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/format_traits.h
+++ b/src/gallium/drivers/swr/rasterizer/core/format_traits.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file format_traits.h
-*
-* @brief Format Traits.  auto-generated file
-*
-* DO NOT EDIT
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file format_traits.h
+ *
+ * @brief Format Traits.  auto-generated file
+ *
+ * DO NOT EDIT
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "format_types.h"
@@ -35,13 +35,13 @@
  //////////////////////////////////////////////////////////////////////////
  /// FormatSwizzle - Component swizzle selects
  //////////////////////////////////////////////////////////////////////////
-template<uint32_t comp0 = 0, uint32_t comp1 = 0, uint32_t comp2 = 0, uint32_t comp3 = 0>
+template <uint32_t comp0 = 0, uint32_t comp1 = 0, uint32_t comp2 = 0, uint32_t comp3 = 0>
  struct FormatSwizzle
  {
      // Return swizzle select for component.
      INLINE static uint32_t swizzle(uint32_t c)
      {
-        static const uint32_t s[4] = { comp0, comp1, comp2, comp3 };
+        static const uint32_t s[4] = {comp0, comp1, comp2, comp3};
          return s[c];
      }
  };
@@ -49,41 +49,45 @@ struct FormatSwizzle
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits - Format traits
  //////////////////////////////////////////////////////////////////////////
-template<SWR_FORMAT format>
-struct FormatTraits :
-    ComponentTraits<SWR_TYPE_UNKNOWN, 0>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0>
+template <SWR_FORMAT format>
+struct FormatTraits : ComponentTraits<SWR_TYPE_UNKNOWN, 0>, FormatSwizzle<0>, Defaults<0, 0, 0, 0>
  {
-    static const uint32_t bpp{ 0 };
-    static const uint32_t numComps{ 0 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
+    static const uint32_t bpp{0};
+    static const uint32_t numComps{0};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
  
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32A32_FLOAT> - Format traits specialization for R32G32B32A32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+                                                          32,
+                                                          SWR_TYPE_FLOAT,
+                                                          32,
+                                                          SWR_TYPE_FLOAT,
+                                                          32,
+                                                          SWR_TYPE_FLOAT,
+                                                          32>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32_32    TransposeT;
      typedef Format4<32, 32, 32, 32> FormatT;
@@ -92,20 +96,21 @@ template<> struct FormatTraits<R32G32B32A32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32A32_SINT> - Format traits specialization for R32G32B32A32_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32_32    TransposeT;
      typedef Format4<32, 32, 32, 32> FormatT;
@@ -114,20 +119,21 @@ template<> struct FormatTraits<R32G32B32A32_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32A32_UINT> - Format traits specialization for R32G32B32A32_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32_32    TransposeT;
      typedef Format4<32, 32, 32, 32> FormatT;
@@ -136,20 +142,20 @@ template<> struct FormatTraits<R32G32B32A32_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R64G64_FLOAT> - Format traits specialization for R64G64_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64G64_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64G64_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
+                                    FormatSwizzle<0, 1>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose64_64  TransposeT;
      typedef Format2<64, 64> FormatT;
@@ -158,20 +164,27 @@ template<> struct FormatTraits<R64G64_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32X32_FLOAT> - Format traits specialization for R32G32B32X32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32X32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_UNUSED, 32>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+                                                          32,
+                                                          SWR_TYPE_FLOAT,
+                                                          32,
+                                                          SWR_TYPE_FLOAT,
+                                                          32,
+                                                          SWR_TYPE_UNUSED,
+                                                          32>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32_32    TransposeT;
      typedef Format4<32, 32, 32, 32> FormatT;
@@ -180,20 +193,27 @@ template<> struct FormatTraits<R32G32B32X32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32A32_SSCALED> - Format traits specialization for R32G32B32A32_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+                                                            32,
+                                                            SWR_TYPE_SSCALED,
+                                                            32,
+                                                            SWR_TYPE_SSCALED,
+                                                            32,
+                                                            SWR_TYPE_SSCALED,
+                                                            32>,
+                                            FormatSwizzle<0, 1, 2, 3>,
+                                            Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32_32    TransposeT;
      typedef Format4<32, 32, 32, 32> FormatT;
@@ -202,20 +222,27 @@ template<> struct FormatTraits<R32G32B32A32_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32A32_USCALED> - Format traits specialization for R32G32B32A32_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+                                                            32,
+                                                            SWR_TYPE_USCALED,
+                                                            32,
+                                                            SWR_TYPE_USCALED,
+                                                            32,
+                                                            SWR_TYPE_USCALED,
+                                                            32>,
+                                            FormatSwizzle<0, 1, 2, 3>,
+                                            Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32_32    TransposeT;
      typedef Format4<32, 32, 32, 32> FormatT;
@@ -224,20 +251,27 @@ template<> struct FormatTraits<R32G32B32A32_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32A32_SFIXED> - Format traits specialization for R32G32B32A32_SFIXED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32A32_SFIXED> :
-    ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32A32_SFIXED> : ComponentTraits<SWR_TYPE_SFIXED,
+                                                           32,
+                                                           SWR_TYPE_SFIXED,
+                                                           32,
+                                                           SWR_TYPE_SFIXED,
+                                                           32,
+                                                           SWR_TYPE_SFIXED,
+                                                           32>,
+                                           FormatSwizzle<0, 1, 2, 3>,
+                                           Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32_32    TransposeT;
      typedef Format4<32, 32, 32, 32> FormatT;
@@ -246,20 +280,21 @@ template<> struct FormatTraits<R32G32B32A32_SFIXED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32_FLOAT> - Format traits specialization for R32G32B32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 96 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{96};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32   TransposeT;
      typedef Format3<32, 32, 32> FormatT;
@@ -268,20 +303,21 @@ template<> struct FormatTraits<R32G32B32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32_SINT> - Format traits specialization for R32G32B32_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 96 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{96};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32   TransposeT;
      typedef Format3<32, 32, 32> FormatT;
@@ -290,20 +326,21 @@ template<> struct FormatTraits<R32G32B32_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32_UINT> - Format traits specialization for R32G32B32_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 96 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{96};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32   TransposeT;
      typedef Format3<32, 32, 32> FormatT;
@@ -312,20 +349,21 @@ template<> struct FormatTraits<R32G32B32_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32_SSCALED> - Format traits specialization for R32G32B32_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 96 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_SSCALED>
+    : ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{96};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32   TransposeT;
      typedef Format3<32, 32, 32> FormatT;
@@ -334,20 +372,21 @@ template<> struct FormatTraits<R32G32B32_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32_USCALED> - Format traits specialization for R32G32B32_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 96 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_USCALED>
+    : ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{96};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32   TransposeT;
      typedef Format3<32, 32, 32> FormatT;
@@ -356,20 +395,21 @@ template<> struct FormatTraits<R32G32B32_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32B32_SFIXED> - Format traits specialization for R32G32B32_SFIXED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32B32_SFIXED> :
-    ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 96 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32B32_SFIXED>
+    : ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{96};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32_32   TransposeT;
      typedef Format3<32, 32, 32> FormatT;
@@ -378,20 +418,27 @@ template<> struct FormatTraits<R32G32B32_SFIXED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16A16_UNORM> - Format traits specialization for R16G16B16A16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+                                                          16,
+                                                          SWR_TYPE_UNORM,
+                                                          16,
+                                                          SWR_TYPE_UNORM,
+                                                          16,
+                                                          SWR_TYPE_UNORM,
+                                                          16>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -400,20 +447,27 @@ template<> struct FormatTraits<R16G16B16A16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16A16_SNORM> - Format traits specialization for R16G16B16A16_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
+                                                          16,
+                                                          SWR_TYPE_SNORM,
+                                                          16,
+                                                          SWR_TYPE_SNORM,
+                                                          16,
+                                                          SWR_TYPE_SNORM,
+                                                          16>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -422,20 +476,21 @@ template<> struct FormatTraits<R16G16B16A16_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16A16_SINT> - Format traits specialization for R16G16B16A16_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -444,20 +499,21 @@ template<> struct FormatTraits<R16G16B16A16_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16A16_UINT> - Format traits specialization for R16G16B16A16_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -466,20 +522,27 @@ template<> struct FormatTraits<R16G16B16A16_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16A16_FLOAT> - Format traits specialization for R16G16B16A16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+                                                          16,
+                                                          SWR_TYPE_FLOAT,
+                                                          16,
+                                                          SWR_TYPE_FLOAT,
+                                                          16,
+                                                          SWR_TYPE_FLOAT,
+                                                          16>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -488,20 +551,20 @@ template<> struct FormatTraits<R16G16B16A16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32_FLOAT> - Format traits specialization for R32G32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+                                    FormatSwizzle<0, 1>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -510,20 +573,20 @@ template<> struct FormatTraits<R32G32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32_SINT> - Format traits specialization for R32G32_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_SINT> : ComponentTraits<SWR_TYPE_SINT, 32, SWR_TYPE_SINT, 32>,
+                                   FormatSwizzle<0, 1>,
+                                   Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -532,42 +595,44 @@ template<> struct FormatTraits<R32G32_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32_UINT> - Format traits specialization for R32G32_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_UINT> : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UINT, 32>,
+                                   FormatSwizzle<0, 1>,
+                                   Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R32_FLOAT_X8X24_TYPELESS> - Format traits specialization for R32_FLOAT_X8X24_TYPELESS
+/// FormatTraits<R32_FLOAT_X8X24_TYPELESS> - Format traits specialization for
+/// R32_FLOAT_X8X24_TYPELESS
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_FLOAT_X8X24_TYPELESS> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_UNUSED, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
+template <>
+struct FormatTraits<R32_FLOAT_X8X24_TYPELESS>
+    : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_UNUSED, 32>,
+      FormatSwizzle<0, 1>,
+      Defaults<0, 0, 0, 0x3f800000>
  {
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -576,20 +641,21 @@ template<> struct FormatTraits<R32_FLOAT_X8X24_TYPELESS> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<X32_TYPELESS_G8X24_UINT> - Format traits specialization for X32_TYPELESS_G8X24_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<X32_TYPELESS_G8X24_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UNUSED, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<X32_TYPELESS_G8X24_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 32, SWR_TYPE_UNUSED, 32>,
+      FormatSwizzle<0, 1>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -598,20 +664,20 @@ template<> struct FormatTraits<X32_TYPELESS_G8X24_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L32A32_FLOAT> - Format traits specialization for L32A32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32A32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 1 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32A32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+                                    FormatSwizzle<0, 3>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{1};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -620,20 +686,19 @@ template<> struct FormatTraits<L32A32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R64_FLOAT> - Format traits specialization for R64_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 64>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 64>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<64> TransposeT;
      typedef Format1<64>                  FormatT;
@@ -642,20 +707,27 @@ template<> struct FormatTraits<R64_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16X16_UNORM> - Format traits specialization for R16G16B16X16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16X16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNUSED, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16X16_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+                                                          16,
+                                                          SWR_TYPE_UNORM,
+                                                          16,
+                                                          SWR_TYPE_UNORM,
+                                                          16,
+                                                          SWR_TYPE_UNUSED,
+                                                          16>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -664,20 +736,27 @@ template<> struct FormatTraits<R16G16B16X16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16X16_FLOAT> - Format traits specialization for R16G16B16X16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16X16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_UNUSED, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16X16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+                                                          16,
+                                                          SWR_TYPE_FLOAT,
+                                                          16,
+                                                          SWR_TYPE_FLOAT,
+                                                          16,
+                                                          SWR_TYPE_UNUSED,
+                                                          16>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -686,20 +765,20 @@ template<> struct FormatTraits<R16G16B16X16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L32X32_FLOAT> - Format traits specialization for L32X32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32X32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+                                    FormatSwizzle<0, 3>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -708,20 +787,20 @@ template<> struct FormatTraits<L32X32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I32X32_FLOAT> - Format traits specialization for I32X32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I32X32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I32X32_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 32, SWR_TYPE_FLOAT, 32>,
+                                    FormatSwizzle<0, 3>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -730,20 +809,27 @@ template<> struct FormatTraits<I32X32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16A16_SSCALED> - Format traits specialization for R16G16B16A16_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+                                                            16,
+                                                            SWR_TYPE_SSCALED,
+                                                            16,
+                                                            SWR_TYPE_SSCALED,
+                                                            16,
+                                                            SWR_TYPE_SSCALED,
+                                                            16>,
+                                            FormatSwizzle<0, 1, 2, 3>,
+                                            Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -752,20 +838,27 @@ template<> struct FormatTraits<R16G16B16A16_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16A16_USCALED> - Format traits specialization for R16G16B16A16_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16A16_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16A16_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+                                                            16,
+                                                            SWR_TYPE_USCALED,
+                                                            16,
+                                                            SWR_TYPE_USCALED,
+                                                            16,
+                                                            SWR_TYPE_USCALED,
+                                                            16>,
+                                            FormatSwizzle<0, 1, 2, 3>,
+                                            Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16_16    TransposeT;
      typedef Format4<16, 16, 16, 16> FormatT;
@@ -774,20 +867,20 @@ template<> struct FormatTraits<R16G16B16A16_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32_SSCALED> - Format traits specialization for R32G32_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 32, SWR_TYPE_SSCALED, 32>,
+                                      FormatSwizzle<0, 1>,
+                                      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -796,20 +889,20 @@ template<> struct FormatTraits<R32G32_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32_USCALED> - Format traits specialization for R32G32_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 32, SWR_TYPE_USCALED, 32>,
+                                      FormatSwizzle<0, 1>,
+                                      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -818,20 +911,20 @@ template<> struct FormatTraits<R32G32_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32G32_SFIXED> - Format traits specialization for R32G32_SFIXED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32G32_SFIXED> :
-    ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32G32_SFIXED> : ComponentTraits<SWR_TYPE_SFIXED, 32, SWR_TYPE_SFIXED, 32>,
+                                     FormatSwizzle<0, 1>,
+                                     Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose32_32  TransposeT;
      typedef Format2<32, 32> FormatT;
@@ -840,20 +933,21 @@ template<> struct FormatTraits<R32G32_SFIXED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B8G8R8A8_UNORM> - Format traits specialization for B8G8R8A8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8A8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8A8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -862,20 +956,21 @@ template<> struct FormatTraits<B8G8R8A8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B8G8R8A8_UNORM_SRGB> - Format traits specialization for B8G8R8A8_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8A8_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8A8_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -884,20 +979,27 @@ template<> struct FormatTraits<B8G8R8A8_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10A2_UNORM> - Format traits specialization for R10G10B10A2_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         2>,
+                                         FormatSwizzle<0, 1, 2, 3>,
+                                         Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -906,20 +1008,27 @@ template<> struct FormatTraits<R10G10B10A2_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10A2_UNORM_SRGB> - Format traits specialization for R10G10B10A2_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM,
+                                                              10,
+                                                              SWR_TYPE_UNORM,
+                                                              10,
+                                                              SWR_TYPE_UNORM,
+                                                              10,
+                                                              SWR_TYPE_UNORM,
+                                                              2>,
+                                              FormatSwizzle<0, 1, 2, 3>,
+                                              Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -928,20 +1037,21 @@ template<> struct FormatTraits<R10G10B10A2_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10A2_UINT> - Format traits specialization for R10G10B10A2_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -950,20 +1060,21 @@ template<> struct FormatTraits<R10G10B10A2_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8A8_UNORM> - Format traits specialization for R8G8B8A8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -972,20 +1083,21 @@ template<> struct FormatTraits<R8G8B8A8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8A8_UNORM_SRGB> - Format traits specialization for R8G8B8A8_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -994,20 +1106,21 @@ template<> struct FormatTraits<R8G8B8A8_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8A8_SNORM> - Format traits specialization for R8G8B8A8_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_SNORM>
+    : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1016,20 +1129,21 @@ template<> struct FormatTraits<R8G8B8A8_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8A8_SINT> - Format traits specialization for R8G8B8A8_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1038,20 +1152,21 @@ template<> struct FormatTraits<R8G8B8A8_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8A8_UINT> - Format traits specialization for R8G8B8A8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1060,20 +1175,20 @@ template<> struct FormatTraits<R8G8B8A8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16_UNORM> - Format traits specialization for R16G16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
+                                    FormatSwizzle<0, 1>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1082,20 +1197,20 @@ template<> struct FormatTraits<R16G16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16_SNORM> - Format traits specialization for R16G16_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_SNORM> : ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
+                                    FormatSwizzle<0, 1>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1104,20 +1219,20 @@ template<> struct FormatTraits<R16G16_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16_SINT> - Format traits specialization for R16G16_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_SINT> : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
+                                   FormatSwizzle<0, 1>,
+                                   Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1126,20 +1241,20 @@ template<> struct FormatTraits<R16G16_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16_UINT> - Format traits specialization for R16G16_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_UINT> : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
+                                   FormatSwizzle<0, 1>,
+                                   Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1148,20 +1263,20 @@ template<> struct FormatTraits<R16G16_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16_FLOAT> - Format traits specialization for R16G16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
+                                    FormatSwizzle<0, 1>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1170,20 +1285,27 @@ template<> struct FormatTraits<R16G16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10A2_UNORM> - Format traits specialization for B10G10R10A2_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         2>,
+                                         FormatSwizzle<2, 1, 0, 3>,
+                                         Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -1192,20 +1314,27 @@ template<> struct FormatTraits<B10G10R10A2_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10A2_UNORM_SRGB> - Format traits specialization for B10G10R10A2_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM,
+                                                              10,
+                                                              SWR_TYPE_UNORM,
+                                                              10,
+                                                              SWR_TYPE_UNORM,
+                                                              10,
+                                                              SWR_TYPE_UNORM,
+                                                              2>,
+                                              FormatSwizzle<2, 1, 0, 3>,
+                                              Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -1214,42 +1343,51 @@ template<> struct FormatTraits<B10G10R10A2_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R11G11B10_FLOAT> - Format traits specialization for R11G11B10_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R11G11B10_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 10>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R11G11B10_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 11, SWR_TYPE_FLOAT, 10>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose11_11_10   TransposeT;
      typedef Format3<11, 11, 10> FormatT;
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// FormatTraits<R10G10B10_FLOAT_A2_UNORM> - Format traits specialization for R10G10B10_FLOAT_A2_UNORM
-//////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10_FLOAT_A2_UNORM> :
-    ComponentTraits<SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_FLOAT, 10, SWR_TYPE_UNORM, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+/// FormatTraits<R10G10B10_FLOAT_A2_UNORM> - Format traits specialization for
+/// R10G10B10_FLOAT_A2_UNORM
+//////////////////////////////////////////////////////////////////////////
+template <>
+struct FormatTraits<R10G10B10_FLOAT_A2_UNORM> : ComponentTraits<SWR_TYPE_FLOAT,
+                                                                10,
+                                                                SWR_TYPE_FLOAT,
+                                                                10,
+                                                                SWR_TYPE_FLOAT,
+                                                                10,
+                                                                SWR_TYPE_UNORM,
+                                                                2>,
+                                                FormatSwizzle<0, 1, 2, 3>,
+                                                Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -1258,20 +1396,19 @@ template<> struct FormatTraits<R10G10B10_FLOAT_A2_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32_SINT> - Format traits specialization for R32_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1280,20 +1417,19 @@ template<> struct FormatTraits<R32_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32_UINT> - Format traits specialization for R32_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1302,20 +1438,19 @@ template<> struct FormatTraits<R32_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32_FLOAT> - Format traits specialization for R32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1324,20 +1459,19 @@ template<> struct FormatTraits<R32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R24_UNORM_X8_TYPELESS> - Format traits specialization for R24_UNORM_X8_TYPELESS
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R24_UNORM_X8_TYPELESS> :
-    ComponentTraits<SWR_TYPE_UNORM, 24>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R24_UNORM_X8_TYPELESS>
+    : ComponentTraits<SWR_TYPE_UNORM, 24>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<24>                  FormatT;
@@ -1346,20 +1480,19 @@ template<> struct FormatTraits<R24_UNORM_X8_TYPELESS> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<X24_TYPELESS_G8_UINT> - Format traits specialization for X24_TYPELESS_G8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<X24_TYPELESS_G8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 32>,
-    FormatSwizzle<1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<X24_TYPELESS_G8_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 32>, FormatSwizzle<1>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1368,20 +1501,19 @@ template<> struct FormatTraits<X24_TYPELESS_G8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L32_UNORM> - Format traits specialization for L32_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1390,20 +1522,20 @@ template<> struct FormatTraits<L32_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L16A16_UNORM> - Format traits specialization for L16A16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16A16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 1 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16A16_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
+                                    FormatSwizzle<0, 3>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{1};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1412,20 +1544,20 @@ template<> struct FormatTraits<L16A16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I24X8_UNORM> - Format traits specialization for I24X8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I24X8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I24X8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
+                                   FormatSwizzle<0, 3>,
+                                   Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose24_8  TransposeT;
      typedef Format2<24, 8> FormatT;
@@ -1434,20 +1566,20 @@ template<> struct FormatTraits<I24X8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L24X8_UNORM> - Format traits specialization for L24X8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L24X8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L24X8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 24, SWR_TYPE_UNORM, 8>,
+                                   FormatSwizzle<0, 3>,
+                                   Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose24_8  TransposeT;
      typedef Format2<24, 8> FormatT;
@@ -1456,20 +1588,19 @@ template<> struct FormatTraits<L24X8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I32_FLOAT> - Format traits specialization for I32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I32_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1478,20 +1609,19 @@ template<> struct FormatTraits<I32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L32_FLOAT> - Format traits specialization for L32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L32_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1500,20 +1630,19 @@ template<> struct FormatTraits<L32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<A32_FLOAT> - Format traits specialization for A32_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A32_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 32>,
-    FormatSwizzle<3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A32_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 32>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1522,20 +1651,21 @@ template<> struct FormatTraits<A32_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B8G8R8X8_UNORM> - Format traits specialization for B8G8R8X8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8X8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8X8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1544,20 +1674,21 @@ template<> struct FormatTraits<B8G8R8X8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B8G8R8X8_UNORM_SRGB> - Format traits specialization for B8G8R8X8_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B8G8R8X8_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B8G8R8X8_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1566,20 +1697,21 @@ template<> struct FormatTraits<B8G8R8X8_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8X8_UNORM> - Format traits specialization for R8G8B8X8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8X8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8X8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1588,20 +1720,21 @@ template<> struct FormatTraits<R8G8B8X8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8X8_UNORM_SRGB> - Format traits specialization for R8G8B8X8_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8X8_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8X8_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNUSED, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1610,20 +1743,21 @@ template<> struct FormatTraits<R8G8B8X8_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R9G9B9E5_SHAREDEXP> - Format traits specialization for R9G9B9E5_SHAREDEXP
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R9G9B9E5_SHAREDEXP> :
-    ComponentTraits<SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 5>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R9G9B9E5_SHAREDEXP>
+    : ComponentTraits<SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 9, SWR_TYPE_UINT, 5>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose9_9_9_5    TransposeT;
      typedef Format4<9, 9, 9, 5> FormatT;
@@ -1632,20 +1766,27 @@ template<> struct FormatTraits<R9G9B9E5_SHAREDEXP> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10X2_UNORM> - Format traits specialization for B10G10R10X2_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10X2_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNORM, 10, SWR_TYPE_UNUSED, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10X2_UNORM> : ComponentTraits<SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNORM,
+                                                         10,
+                                                         SWR_TYPE_UNUSED,
+                                                         2>,
+                                         FormatSwizzle<2, 1, 0, 3>,
+                                         Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -1654,20 +1795,20 @@ template<> struct FormatTraits<B10G10R10X2_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L16A16_FLOAT> - Format traits specialization for L16A16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16A16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 1 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16A16_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
+                                    FormatSwizzle<0, 3>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{1};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1676,20 +1817,27 @@ template<> struct FormatTraits<L16A16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10X2_USCALED> - Format traits specialization for R10G10B10X2_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10X2_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_UNUSED, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10X2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_UNUSED,
+                                                           2>,
+                                           FormatSwizzle<0, 1, 2, 3>,
+                                           Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -1698,20 +1846,27 @@ template<> struct FormatTraits<R10G10B10X2_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8A8_SSCALED> - Format traits specialization for R8G8B8A8_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+                                                        8,
+                                                        SWR_TYPE_SSCALED,
+                                                        8,
+                                                        SWR_TYPE_SSCALED,
+                                                        8,
+                                                        SWR_TYPE_SSCALED,
+                                                        8>,
+                                        FormatSwizzle<0, 1, 2, 3>,
+                                        Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1720,20 +1875,27 @@ template<> struct FormatTraits<R8G8B8A8_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8A8_USCALED> - Format traits specialization for R8G8B8A8_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8A8_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8A8_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+                                                        8,
+                                                        SWR_TYPE_USCALED,
+                                                        8,
+                                                        SWR_TYPE_USCALED,
+                                                        8,
+                                                        SWR_TYPE_USCALED,
+                                                        8>,
+                                        FormatSwizzle<0, 1, 2, 3>,
+                                        Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -1742,20 +1904,20 @@ template<> struct FormatTraits<R8G8B8A8_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16_SSCALED> - Format traits specialization for R16G16_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
+                                      FormatSwizzle<0, 1>,
+                                      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1764,20 +1926,20 @@ template<> struct FormatTraits<R16G16_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16_USCALED> - Format traits specialization for R16G16_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
+                                      FormatSwizzle<0, 1>,
+                                      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16  TransposeT;
      typedef Format2<16, 16> FormatT;
@@ -1786,20 +1948,19 @@ template<> struct FormatTraits<R16G16_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32_SSCALED> - Format traits specialization for R32_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_SSCALED>
+    : ComponentTraits<SWR_TYPE_SSCALED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1808,20 +1969,19 @@ template<> struct FormatTraits<R32_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32_USCALED> - Format traits specialization for R32_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_USCALED>
+    : ComponentTraits<SWR_TYPE_USCALED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -1830,20 +1990,21 @@ template<> struct FormatTraits<R32_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B5G6R5_UNORM> - Format traits specialization for B5G6R5_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G6R5_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
-    FormatSwizzle<2, 1, 0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G6R5_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
+      FormatSwizzle<2, 1, 0>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose5_6_5   TransposeT;
      typedef Format3<5, 6, 5> FormatT;
@@ -1852,20 +2013,21 @@ template<> struct FormatTraits<B5G6R5_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B5G6R5_UNORM_SRGB> - Format traits specialization for B5G6R5_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G6R5_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
-    FormatSwizzle<2, 1, 0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G6R5_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 6, SWR_TYPE_UNORM, 5>,
+      FormatSwizzle<2, 1, 0>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose5_6_5   TransposeT;
      typedef Format3<5, 6, 5> FormatT;
@@ -1874,20 +2036,21 @@ template<> struct FormatTraits<B5G6R5_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B5G5R5A1_UNORM> - Format traits specialization for B5G5R5A1_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5A1_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5A1_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose5_5_5_1    TransposeT;
      typedef Format4<5, 5, 5, 1> FormatT;
@@ -1896,20 +2059,21 @@ template<> struct FormatTraits<B5G5R5A1_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B5G5R5A1_UNORM_SRGB> - Format traits specialization for B5G5R5A1_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5A1_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5A1_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 1>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose5_5_5_1    TransposeT;
      typedef Format4<5, 5, 5, 1> FormatT;
@@ -1918,20 +2082,21 @@ template<> struct FormatTraits<B5G5R5A1_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B4G4R4A4_UNORM> - Format traits specialization for B4G4R4A4_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B4G4R4A4_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B4G4R4A4_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose4_4_4_4    TransposeT;
      typedef Format4<4, 4, 4, 4> FormatT;
@@ -1940,20 +2105,21 @@ template<> struct FormatTraits<B4G4R4A4_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B4G4R4A4_UNORM_SRGB> - Format traits specialization for B4G4R4A4_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B4G4R4A4_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B4G4R4A4_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose4_4_4_4    TransposeT;
      typedef Format4<4, 4, 4, 4> FormatT;
@@ -1962,20 +2128,20 @@ template<> struct FormatTraits<B4G4R4A4_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8_UNORM> - Format traits specialization for R8G8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+                                  FormatSwizzle<0, 1>,
+                                  Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -1984,20 +2150,20 @@ template<> struct FormatTraits<R8G8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8_SNORM> - Format traits specialization for R8G8_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_SNORM> : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
+                                  FormatSwizzle<0, 1>,
+                                  Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2006,20 +2172,20 @@ template<> struct FormatTraits<R8G8_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8_SINT> - Format traits specialization for R8G8_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_SINT> : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+                                 FormatSwizzle<0, 1>,
+                                 Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2028,20 +2194,20 @@ template<> struct FormatTraits<R8G8_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8_UINT> - Format traits specialization for R8G8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_UINT> : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+                                 FormatSwizzle<0, 1>,
+                                 Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2050,20 +2216,19 @@ template<> struct FormatTraits<R8G8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16_UNORM> - Format traits specialization for R16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2072,20 +2237,19 @@ template<> struct FormatTraits<R16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16_SNORM> - Format traits specialization for R16_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_SNORM>
+    : ComponentTraits<SWR_TYPE_SNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2094,20 +2258,19 @@ template<> struct FormatTraits<R16_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16_SINT> - Format traits specialization for R16_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2116,20 +2279,19 @@ template<> struct FormatTraits<R16_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16_UINT> - Format traits specialization for R16_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2138,20 +2300,19 @@ template<> struct FormatTraits<R16_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16_FLOAT> - Format traits specialization for R16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2160,20 +2321,19 @@ template<> struct FormatTraits<R16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I16_UNORM> - Format traits specialization for I16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I16_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2182,20 +2342,19 @@ template<> struct FormatTraits<I16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L16_UNORM> - Format traits specialization for L16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2204,20 +2363,19 @@ template<> struct FormatTraits<L16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<A16_UNORM> - Format traits specialization for A16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A16_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 16>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2226,20 +2384,20 @@ template<> struct FormatTraits<A16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8A8_UNORM> - Format traits specialization for L8A8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 1 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_UNORM> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+                                  FormatSwizzle<0, 3>,
+                                  Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{1};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2248,20 +2406,19 @@ template<> struct FormatTraits<L8A8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I16_FLOAT> - Format traits specialization for I16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I16_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2270,20 +2427,19 @@ template<> struct FormatTraits<I16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L16_FLOAT> - Format traits specialization for L16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L16_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2292,20 +2448,19 @@ template<> struct FormatTraits<L16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<A16_FLOAT> - Format traits specialization for A16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A16_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 16>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2314,20 +2469,20 @@ template<> struct FormatTraits<A16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8A8_UNORM_SRGB> - Format traits specialization for L8A8_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 1 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_UNORM_SRGB> : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+                                       FormatSwizzle<0, 3>,
+                                       Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{1};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2336,20 +2491,21 @@ template<> struct FormatTraits<L8A8_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B5G5R5X1_UNORM> - Format traits specialization for B5G5R5X1_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5X1_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5X1_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose5_5_5_1    TransposeT;
      typedef Format4<5, 5, 5, 1> FormatT;
@@ -2358,20 +2514,21 @@ template<> struct FormatTraits<B5G5R5X1_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B5G5R5X1_UNORM_SRGB> - Format traits specialization for B5G5R5X1_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B5G5R5X1_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B5G5R5X1_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNUSED, 1>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose5_5_5_1    TransposeT;
      typedef Format4<5, 5, 5, 1> FormatT;
@@ -2380,20 +2537,20 @@ template<> struct FormatTraits<B5G5R5X1_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8_SSCALED> - Format traits specialization for R8G8_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
+                                    FormatSwizzle<0, 1>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2402,20 +2559,20 @@ template<> struct FormatTraits<R8G8_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8_USCALED> - Format traits specialization for R8G8_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
-    FormatSwizzle<0, 1>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8_USCALED> : ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
+                                    FormatSwizzle<0, 1>,
+                                    Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2424,20 +2581,19 @@ template<> struct FormatTraits<R8G8_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16_SSCALED> - Format traits specialization for R16_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_SSCALED>
+    : ComponentTraits<SWR_TYPE_SSCALED, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2446,20 +2602,19 @@ template<> struct FormatTraits<R16_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16_USCALED> - Format traits specialization for R16_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 16>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16_USCALED>
+    : ComponentTraits<SWR_TYPE_USCALED, 16>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<16> TransposeT;
      typedef Format1<16>                  FormatT;
@@ -2468,20 +2623,21 @@ template<> struct FormatTraits<R16_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<A1B5G5R5_UNORM> - Format traits specialization for A1B5G5R5_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A1B5G5R5_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 1, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5>,
-    FormatSwizzle<3, 2, 1, 0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A1B5G5R5_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 1, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5, SWR_TYPE_UNORM, 5>,
+      FormatSwizzle<3, 2, 1, 0>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose1_5_5_5    TransposeT;
      typedef Format4<1, 5, 5, 5> FormatT;
@@ -2490,20 +2646,21 @@ template<> struct FormatTraits<A1B5G5R5_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<A4B4G4R4_UNORM> - Format traits specialization for A4B4G4R4_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A4B4G4R4_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
-    FormatSwizzle<3, 2, 1, 0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A4B4G4R4_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4, SWR_TYPE_UNORM, 4>,
+      FormatSwizzle<3, 2, 1, 0>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose4_4_4_4    TransposeT;
      typedef Format4<4, 4, 4, 4> FormatT;
@@ -2512,20 +2669,20 @@ template<> struct FormatTraits<A4B4G4R4_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8A8_UINT> - Format traits specialization for L8A8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 1 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_UINT> : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+                                 FormatSwizzle<0, 3>,
+                                 Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{1};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2534,20 +2691,20 @@ template<> struct FormatTraits<L8A8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8A8_SINT> - Format traits specialization for L8A8_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8A8_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
-    FormatSwizzle<0, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 16 };
-    static const uint32_t numComps{ 2 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 1 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8A8_SINT> : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+                                 FormatSwizzle<0, 3>,
+                                 Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{16};
+    static const uint32_t numComps{2};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{1};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8  TransposeT;
      typedef Format2<8, 8> FormatT;
@@ -2556,20 +2713,19 @@ template<> struct FormatTraits<L8A8_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8_UNORM> - Format traits specialization for R8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2578,20 +2734,19 @@ template<> struct FormatTraits<R8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8_SNORM> - Format traits specialization for R8_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_SNORM>
+    : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2600,20 +2755,19 @@ template<> struct FormatTraits<R8_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8_SINT> - Format traits specialization for R8_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2622,20 +2776,19 @@ template<> struct FormatTraits<R8_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8_UINT> - Format traits specialization for R8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2644,20 +2797,19 @@ template<> struct FormatTraits<R8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<A8_UNORM> - Format traits specialization for A8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<A8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<A8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<3>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2666,20 +2818,19 @@ template<> struct FormatTraits<A8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I8_UNORM> - Format traits specialization for I8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2688,20 +2839,19 @@ template<> struct FormatTraits<I8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8_UNORM> - Format traits specialization for L8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2710,20 +2860,19 @@ template<> struct FormatTraits<L8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8_SSCALED> - Format traits specialization for R8_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_SSCALED>
+    : ComponentTraits<SWR_TYPE_SSCALED, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2732,20 +2881,19 @@ template<> struct FormatTraits<R8_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8_USCALED> - Format traits specialization for R8_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8_USCALED>
+    : ComponentTraits<SWR_TYPE_USCALED, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2754,20 +2902,19 @@ template<> struct FormatTraits<R8_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8_UNORM_SRGB> - Format traits specialization for L8_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2776,20 +2923,19 @@ template<> struct FormatTraits<L8_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8_UINT> - Format traits specialization for L8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2798,20 +2944,19 @@ template<> struct FormatTraits<L8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<L8_SINT> - Format traits specialization for L8_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<L8_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<L8_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2820,20 +2965,19 @@ template<> struct FormatTraits<L8_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I8_UINT> - Format traits specialization for I8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I8_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2842,20 +2986,19 @@ template<> struct FormatTraits<I8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<I8_SINT> - Format traits specialization for I8_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<I8_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<I8_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2864,20 +3007,19 @@ template<> struct FormatTraits<I8_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<DXT1_RGB_SRGB> - Format traits specialization for DXT1_RGB_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<DXT1_RGB_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<DXT1_RGB_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2886,20 +3028,21 @@ template<> struct FormatTraits<DXT1_RGB_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<YCRCB_SWAPUVY> - Format traits specialization for YCRCB_SWAPUVY
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<YCRCB_SWAPUVY> :
-    ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ true };
-    static const uint32_t bcWidth{ 2 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<YCRCB_SWAPUVY>
+    : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{true};
+    static const uint32_t bcWidth{2};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -2908,20 +3051,19 @@ template<> struct FormatTraits<YCRCB_SWAPUVY> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC1_UNORM> - Format traits specialization for BC1_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC1_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC1_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2930,20 +3072,19 @@ template<> struct FormatTraits<BC1_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC2_UNORM> - Format traits specialization for BC2_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC2_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC2_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2952,20 +3093,19 @@ template<> struct FormatTraits<BC2_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC3_UNORM> - Format traits specialization for BC3_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC3_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC3_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2974,20 +3114,19 @@ template<> struct FormatTraits<BC3_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC4_UNORM> - Format traits specialization for BC4_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC4_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC4_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -2996,20 +3135,19 @@ template<> struct FormatTraits<BC4_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC5_UNORM> - Format traits specialization for BC5_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC5_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC5_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3018,20 +3156,19 @@ template<> struct FormatTraits<BC5_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC1_UNORM_SRGB> - Format traits specialization for BC1_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC1_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC1_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3040,20 +3177,19 @@ template<> struct FormatTraits<BC1_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC2_UNORM_SRGB> - Format traits specialization for BC2_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC2_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC2_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3062,20 +3198,19 @@ template<> struct FormatTraits<BC2_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC3_UNORM_SRGB> - Format traits specialization for BC3_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC3_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC3_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3084,20 +3219,21 @@ template<> struct FormatTraits<BC3_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<YCRCB_SWAPUV> - Format traits specialization for YCRCB_SWAPUV
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<YCRCB_SWAPUV> :
-    ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ true };
-    static const uint32_t bcWidth{ 2 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<YCRCB_SWAPUV>
+    : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{true};
+    static const uint32_t bcWidth{2};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8_8    TransposeT;
      typedef Format4<8, 8, 8, 8> FormatT;
@@ -3106,20 +3242,19 @@ template<> struct FormatTraits<YCRCB_SWAPUV> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<DXT1_RGB> - Format traits specialization for DXT1_RGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<DXT1_RGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<DXT1_RGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3128,20 +3263,21 @@ template<> struct FormatTraits<DXT1_RGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8_UNORM> - Format traits specialization for R8G8B8_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 24 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{24};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8   TransposeT;
      typedef Format3<8, 8, 8> FormatT;
@@ -3150,20 +3286,21 @@ template<> struct FormatTraits<R8G8B8_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8_SNORM> - Format traits specialization for R8G8B8_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 24 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_SNORM>
+    : ComponentTraits<SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8, SWR_TYPE_SNORM, 8>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{24};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8   TransposeT;
      typedef Format3<8, 8, 8> FormatT;
@@ -3172,20 +3309,21 @@ template<> struct FormatTraits<R8G8B8_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8_SSCALED> - Format traits specialization for R8G8B8_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 24 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_SSCALED>
+    : ComponentTraits<SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8, SWR_TYPE_SSCALED, 8>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{24};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8   TransposeT;
      typedef Format3<8, 8, 8> FormatT;
@@ -3194,20 +3332,21 @@ template<> struct FormatTraits<R8G8B8_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8_USCALED> - Format traits specialization for R8G8B8_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 24 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_USCALED>
+    : ComponentTraits<SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8, SWR_TYPE_USCALED, 8>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{24};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8   TransposeT;
      typedef Format3<8, 8, 8> FormatT;
@@ -3216,20 +3355,27 @@ template<> struct FormatTraits<R8G8B8_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R64G64B64A64_FLOAT> - Format traits specialization for R64G64B64A64_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64G64B64A64_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 256 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64G64B64A64_FLOAT> : ComponentTraits<SWR_TYPE_FLOAT,
+                                                          64,
+                                                          SWR_TYPE_FLOAT,
+                                                          64,
+                                                          SWR_TYPE_FLOAT,
+                                                          64,
+                                                          SWR_TYPE_FLOAT,
+                                                          64>,
+                                          FormatSwizzle<0, 1, 2, 3>,
+                                          Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{256};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose64_64_64_64    TransposeT;
      typedef Format4<64, 64, 64, 64> FormatT;
@@ -3238,20 +3384,21 @@ template<> struct FormatTraits<R64G64B64A64_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R64G64B64_FLOAT> - Format traits specialization for R64G64B64_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R64G64B64_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 192 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R64G64B64_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64, SWR_TYPE_FLOAT, 64>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{192};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose64_64_64   TransposeT;
      typedef Format3<64, 64, 64> FormatT;
@@ -3260,20 +3407,19 @@ template<> struct FormatTraits<R64G64B64_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC4_SNORM> - Format traits specialization for BC4_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC4_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 64 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC4_SNORM>
+    : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{64};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3282,20 +3428,19 @@ template<> struct FormatTraits<BC4_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC5_SNORM> - Format traits specialization for BC5_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC5_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC5_SNORM>
+    : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3304,20 +3449,21 @@ template<> struct FormatTraits<BC5_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16_FLOAT> - Format traits specialization for R16G16B16_FLOAT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_FLOAT> :
-    ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 48 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_FLOAT>
+    : ComponentTraits<SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16, SWR_TYPE_FLOAT, 16>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{48};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16   TransposeT;
      typedef Format3<16, 16, 16> FormatT;
@@ -3326,20 +3472,21 @@ template<> struct FormatTraits<R16G16B16_FLOAT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16_UNORM> - Format traits specialization for R16G16B16_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 48 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16, SWR_TYPE_UNORM, 16>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{48};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16   TransposeT;
      typedef Format3<16, 16, 16> FormatT;
@@ -3348,20 +3495,21 @@ template<> struct FormatTraits<R16G16B16_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16_SNORM> - Format traits specialization for R16G16B16_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 48 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_SNORM>
+    : ComponentTraits<SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16, SWR_TYPE_SNORM, 16>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{48};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16   TransposeT;
      typedef Format3<16, 16, 16> FormatT;
@@ -3370,20 +3518,21 @@ template<> struct FormatTraits<R16G16B16_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16_SSCALED> - Format traits specialization for R16G16B16_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 48 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_SSCALED>
+    : ComponentTraits<SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16, SWR_TYPE_SSCALED, 16>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{48};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16   TransposeT;
      typedef Format3<16, 16, 16> FormatT;
@@ -3392,20 +3541,21 @@ template<> struct FormatTraits<R16G16B16_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16_USCALED> - Format traits specialization for R16G16B16_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 48 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_USCALED>
+    : ComponentTraits<SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16, SWR_TYPE_USCALED, 16>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{48};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16   TransposeT;
      typedef Format3<16, 16, 16> FormatT;
@@ -3414,20 +3564,19 @@ template<> struct FormatTraits<R16G16B16_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC6H_SF16> - Format traits specialization for BC6H_SF16
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC6H_SF16> :
-    ComponentTraits<SWR_TYPE_SNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC6H_SF16>
+    : ComponentTraits<SWR_TYPE_SNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3436,20 +3585,19 @@ template<> struct FormatTraits<BC6H_SF16> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC7_UNORM> - Format traits specialization for BC7_UNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC7_UNORM> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC7_UNORM>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3458,20 +3606,19 @@ template<> struct FormatTraits<BC7_UNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC7_UNORM_SRGB> - Format traits specialization for BC7_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC7_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC7_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{true};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3480,20 +3627,19 @@ template<> struct FormatTraits<BC7_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<BC6H_UF16> - Format traits specialization for BC6H_UF16
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<BC6H_UF16> :
-    ComponentTraits<SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 128 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ true };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 4 };
-    static const uint32_t bcHeight{ 4 };
+template <>
+struct FormatTraits<BC6H_UF16>
+    : ComponentTraits<SWR_TYPE_UNORM, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{128};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{true};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{4};
+    static const uint32_t bcHeight{4};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
@@ -3502,20 +3648,21 @@ template<> struct FormatTraits<BC6H_UF16> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8_UNORM_SRGB> - Format traits specialization for R8G8B8_UNORM_SRGB
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_UNORM_SRGB> :
-    ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 24 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ true };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_UNORM_SRGB>
+    : ComponentTraits<SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8, SWR_TYPE_UNORM, 8>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{24};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{true};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8   TransposeT;
      typedef Format3<8, 8, 8> FormatT;
@@ -3524,20 +3671,21 @@ template<> struct FormatTraits<R8G8B8_UNORM_SRGB> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16_UINT> - Format traits specialization for R16G16B16_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 48 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16, SWR_TYPE_UINT, 16>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{48};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16   TransposeT;
      typedef Format3<16, 16, 16> FormatT;
@@ -3546,20 +3694,21 @@ template<> struct FormatTraits<R16G16B16_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R16G16B16_SINT> - Format traits specialization for R16G16B16_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R16G16B16_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 48 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R16G16B16_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16, SWR_TYPE_SINT, 16>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{48};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose16_16_16   TransposeT;
      typedef Format3<16, 16, 16> FormatT;
@@ -3568,20 +3717,19 @@ template<> struct FormatTraits<R16G16B16_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R32_SFIXED> - Format traits specialization for R32_SFIXED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R32_SFIXED> :
-    ComponentTraits<SWR_TYPE_SFIXED, 32>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R32_SFIXED>
+    : ComponentTraits<SWR_TYPE_SFIXED, 32>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<32> TransposeT;
      typedef Format1<32>                  FormatT;
@@ -3590,20 +3738,27 @@ template<> struct FormatTraits<R32_SFIXED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10A2_SNORM> - Format traits specialization for R10G10B10A2_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
+                                                         10,
+                                                         SWR_TYPE_SNORM,
+                                                         10,
+                                                         SWR_TYPE_SNORM,
+                                                         10,
+                                                         SWR_TYPE_SNORM,
+                                                         2>,
+                                         FormatSwizzle<0, 1, 2, 3>,
+                                         Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3612,20 +3767,27 @@ template<> struct FormatTraits<R10G10B10A2_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10A2_USCALED> - Format traits specialization for R10G10B10A2_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           2>,
+                                           FormatSwizzle<0, 1, 2, 3>,
+                                           Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3634,20 +3796,27 @@ template<> struct FormatTraits<R10G10B10A2_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10A2_SSCALED> - Format traits specialization for R10G10B10A2_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+                                                           10,
+                                                           SWR_TYPE_SSCALED,
+                                                           10,
+                                                           SWR_TYPE_SSCALED,
+                                                           10,
+                                                           SWR_TYPE_SSCALED,
+                                                           2>,
+                                           FormatSwizzle<0, 1, 2, 3>,
+                                           Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3656,20 +3825,21 @@ template<> struct FormatTraits<R10G10B10A2_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R10G10B10A2_SINT> - Format traits specialization for R10G10B10A2_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R10G10B10A2_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
-    FormatSwizzle<0, 1, 2, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R10G10B10A2_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
+      FormatSwizzle<0, 1, 2, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3678,20 +3848,27 @@ template<> struct FormatTraits<R10G10B10A2_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10A2_SNORM> - Format traits specialization for B10G10R10A2_SNORM
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_SNORM> :
-    ComponentTraits<SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 10, SWR_TYPE_SNORM, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_SNORM> : ComponentTraits<SWR_TYPE_SNORM,
+                                                         10,
+                                                         SWR_TYPE_SNORM,
+                                                         10,
+                                                         SWR_TYPE_SNORM,
+                                                         10,
+                                                         SWR_TYPE_SNORM,
+                                                         2>,
+                                         FormatSwizzle<2, 1, 0, 3>,
+                                         Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3700,20 +3877,27 @@ template<> struct FormatTraits<B10G10R10A2_SNORM> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10A2_USCALED> - Format traits specialization for B10G10R10A2_USCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_USCALED> :
-    ComponentTraits<SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 10, SWR_TYPE_USCALED, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_USCALED> : ComponentTraits<SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           10,
+                                                           SWR_TYPE_USCALED,
+                                                           2>,
+                                           FormatSwizzle<2, 1, 0, 3>,
+                                           Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3722,20 +3906,27 @@ template<> struct FormatTraits<B10G10R10A2_USCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10A2_SSCALED> - Format traits specialization for B10G10R10A2_SSCALED
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_SSCALED> :
-    ComponentTraits<SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 10, SWR_TYPE_SSCALED, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x3f800000>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_SSCALED> : ComponentTraits<SWR_TYPE_SSCALED,
+                                                           10,
+                                                           SWR_TYPE_SSCALED,
+                                                           10,
+                                                           SWR_TYPE_SSCALED,
+                                                           10,
+                                                           SWR_TYPE_SSCALED,
+                                                           2>,
+                                           FormatSwizzle<2, 1, 0, 3>,
+                                           Defaults<0, 0, 0, 0x3f800000>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3744,20 +3935,21 @@ template<> struct FormatTraits<B10G10R10A2_SSCALED> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10A2_UINT> - Format traits specialization for B10G10R10A2_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 10, SWR_TYPE_UINT, 2>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3766,20 +3958,21 @@ template<> struct FormatTraits<B10G10R10A2_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<B10G10R10A2_SINT> - Format traits specialization for B10G10R10A2_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<B10G10R10A2_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
-    FormatSwizzle<2, 1, 0, 3>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 32 };
-    static const uint32_t numComps{ 4 };
-    static const bool hasAlpha{ true };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<B10G10R10A2_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 10, SWR_TYPE_SINT, 2>,
+      FormatSwizzle<2, 1, 0, 3>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{32};
+    static const uint32_t numComps{4};
+    static const bool     hasAlpha{true};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose10_10_10_2    TransposeT;
      typedef Format4<10, 10, 10, 2> FormatT;
@@ -3788,20 +3981,21 @@ template<> struct FormatTraits<B10G10R10A2_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8_UINT> - Format traits specialization for R8G8B8_UINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_UINT> :
-    ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 24 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_UINT>
+    : ComponentTraits<SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8, SWR_TYPE_UINT, 8>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{24};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8   TransposeT;
      typedef Format3<8, 8, 8> FormatT;
@@ -3810,20 +4004,21 @@ template<> struct FormatTraits<R8G8B8_UINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<R8G8B8_SINT> - Format traits specialization for R8G8B8_SINT
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<R8G8B8_SINT> :
-    ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
-    FormatSwizzle<0, 1, 2>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 24 };
-    static const uint32_t numComps{ 3 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 0 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<R8G8B8_SINT>
+    : ComponentTraits<SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8, SWR_TYPE_SINT, 8>,
+      FormatSwizzle<0, 1, 2>,
+      Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{24};
+    static const uint32_t numComps{3};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{0};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef Transpose8_8_8   TransposeT;
      typedef Format3<8, 8, 8> FormatT;
@@ -3832,20 +4027,19 @@ template<> struct FormatTraits<R8G8B8_SINT> :
  //////////////////////////////////////////////////////////////////////////
  /// FormatTraits<RAW> - Format traits specialization for RAW
  //////////////////////////////////////////////////////////////////////////
-template<> struct FormatTraits<RAW> :
-    ComponentTraits<SWR_TYPE_UINT, 8>,
-    FormatSwizzle<0>,
-    Defaults<0, 0, 0, 0x1>
-{
-    static const uint32_t bpp{ 8 };
-    static const uint32_t numComps{ 1 };
-    static const bool hasAlpha{ false };
-    static const uint32_t alphaComp{ 3 };
-    static const bool isSRGB{ false };
-    static const bool isBC{ false };
-    static const bool isSubsampled{ false };
-    static const uint32_t bcWidth{ 1 };
-    static const uint32_t bcHeight{ 1 };
+template <>
+struct FormatTraits<RAW>
+    : ComponentTraits<SWR_TYPE_UINT, 8>, FormatSwizzle<0>, Defaults<0, 0, 0, 0x1>
+{
+    static const uint32_t bpp{8};
+    static const uint32_t numComps{1};
+    static const bool     hasAlpha{false};
+    static const uint32_t alphaComp{3};
+    static const bool     isSRGB{false};
+    static const bool     isBC{false};
+    static const bool     isSubsampled{false};
+    static const uint32_t bcWidth{1};
+    static const uint32_t bcHeight{1};
  
      typedef TransposeSingleComponent<8> TransposeT;
      typedef Format1<8>                  FormatT;
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h b/src/gallium/drivers/swr/rasterizer/core/format_types.h

index c3327c1d40b6fe7700e56642b6f9587ef2dbac7e..518da829d5883d514d14b9bb6445888e4dcadce4 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/format_types.h
+++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file formats.h
-*
-* @brief Definitions for SWR_FORMAT functions.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file formats.h
+ *
+ * @brief Definitions for SWR_FORMAT functions.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "utils.h"
@@ -36,16 +36,16 @@
  template <uint32_t NumBits, bool Signed = false>
  struct PackTraits
  {
-    static const uint32_t MyNumBits = NumBits;
-    static simdscalar loadSOA(const uint8_t *pSrc) = delete;
-    static void storeSOA(uint8_t *pDst, simdscalar const &src) = delete;
-    static simdscalar unpack(simdscalar &in) = delete;
-    static simdscalar pack(simdscalar &in) = delete;
+    static const uint32_t MyNumBits                                      = NumBits;
+    static simdscalar     loadSOA(const uint8_t* pSrc)                   = delete;
+    static void           storeSOA(uint8_t* pDst, simdscalar const& src) = delete;
+    static simdscalar     unpack(simdscalar& in)                         = delete;
+    static simdscalar     pack(simdscalar& in)                           = delete;
  #if ENABLE_AVX512_SIMD16
-    static simd16scalar loadSOA_16(const uint8_t *pSrc) = delete;
-    static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) = delete;
-    static simd16scalar unpack(simd16scalar &in) = delete;
-    static simd16scalar pack(simd16scalar &in) = delete;
+    static simd16scalar loadSOA_16(const uint8_t* pSrc)                   = delete;
+    static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src) = delete;
+    static simd16scalar  unpack(simd16scalar& in)                         = delete;
+    static simd16scalar  pack(simd16scalar& in)                           = delete;
  #endif
  };
  
@@ -57,15 +57,15 @@ struct PackTraits<0, false>
  {
      static const uint32_t MyNumBits = 0;
  
-    static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_setzero_ps(); }
-    static void storeSOA(uint8_t *pDst, simdscalar const &src) { return; }
-    static simdscalar unpack(simdscalar &in) { return _simd_setzero_ps(); }
-    static simdscalar pack(simdscalar &in) { return _simd_setzero_ps(); }
+    static simdscalar loadSOA(const uint8_t* pSrc) { return _simd_setzero_ps(); }
+    static void       storeSOA(uint8_t* pDst, simdscalar const& src) { return; }
+    static simdscalar unpack(simdscalar& in) { return _simd_setzero_ps(); }
+    static simdscalar pack(simdscalar& in) { return _simd_setzero_ps(); }
  #if ENABLE_AVX512_SIMD16
-    static simd16scalar loadSOA_16(const uint8_t *pSrc) { return _simd16_setzero_ps(); }
-    static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src) { return; }
-    static simd16scalar unpack(simd16scalar &in) { return _simd16_setzero_ps(); }
-    static simd16scalar pack(simd16scalar &in) { return _simd16_setzero_ps(); }
+    static simd16scalar loadSOA_16(const uint8_t* pSrc) { return _simd16_setzero_ps(); }
+    static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src) { return; }
+    static simd16scalar  unpack(simd16scalar& in) { return _simd16_setzero_ps(); }
+    static simd16scalar  pack(simd16scalar& in) { return _simd16_setzero_ps(); }
  #endif
  };
  
@@ -77,18 +77,18 @@ struct PackTraits<8, false>
  {
      static const uint32_t MyNumBits = 8;
  
-    static simdscalar loadSOA(const uint8_t *pSrc)
+    static simdscalar loadSOA(const uint8_t* pSrc)
      {
  #if KNOB_SIMD_WIDTH == 8
          __m256 result = _mm256_setzero_ps();
-        __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
+        __m128 vLo    = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
          return _mm256_insertf128_ps(result, vLo, 0);
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static void storeSOA(uint8_t *pDst, simdscalar const &src)
+    static void storeSOA(uint8_t* pDst, simdscalar const& src)
      {
          // store simd bytes
  #if KNOB_SIMD_WIDTH == 8
@@ -98,31 +98,33 @@ struct PackTraits<8, false>
  #endif
      }
  
-    static simdscalar unpack(simdscalar &in)
+    static simdscalar unpack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
  #if KNOB_ARCH <= KNOB_ARCH_AVX
-        __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+        __m128i src   = _mm_castps_si128(_mm256_castps256_ps128(in));
          __m128i resLo = _mm_cvtepu8_epi32(src);
-        __m128i resHi = _mm_shuffle_epi8(src,
-            _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
+        __m128i resHi =
+            _mm_shuffle_epi8(src, _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
  
          __m256i result = _mm256_castsi128_si256(resLo);
-        result = _mm256_insertf128_si256(result, resHi, 1);
-        return simdscalar{ _mm256_castsi256_ps(result) };
+        result         = _mm256_insertf128_si256(result, resHi, 1);
+        return simdscalar{_mm256_castsi256_ps(result)};
  #else
-        return _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+        return _mm256_castsi256_ps(
+            _mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
  #endif
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static simdscalar pack(simdscalar &in)
+    static simdscalar pack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
          simdscalari src = _simd_castps_si(in);
-        __m128i res16 = _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
+        __m128i     res16 =
+            _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
          __m128i res8 = _mm_packus_epi16(res16, _mm_undefined_si128());
          return _mm256_castsi256_ps(_mm256_castsi128_si256(res8));
  #else
@@ -131,51 +133,64 @@ struct PackTraits<8, false>
      }
  #if ENABLE_AVX512_SIMD16
  
-    static simd16scalar loadSOA_16(const uint8_t *pSrc)
+    static simd16scalar loadSOA_16(const uint8_t* pSrc)
      {
-        simd16scalar result = _simd16_setzero_ps();
-        simdscalar resultlo = _simd_setzero_ps();
+        simd16scalar result   = _simd16_setzero_ps();
+        simdscalar   resultlo = _simd_setzero_ps();
  
-        const __m128 src = _mm_load_ps(reinterpret_cast<const float *>(pSrc));
+        const __m128 src = _mm_load_ps(reinterpret_cast<const float*>(pSrc));
  
          resultlo = _mm256_insertf128_ps(resultlo, src, 0);
-        result = _simd16_insert_ps(result, resultlo, 0);
+        result   = _simd16_insert_ps(result, resultlo, 0);
  
          return result;
      }
  
-    static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+    static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
      {
          // store simd16 bytes
-        _mm_store_ps(reinterpret_cast<float *>(pDst), _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
+        _mm_store_ps(reinterpret_cast<float*>(pDst),
+                     _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
      }
  
-    static simd16scalar unpack(simd16scalar &in)
+    static simd16scalar unpack(simd16scalar& in)
      {
-        simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
+        simd4scalari  tmp    = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
          simd16scalari result = _simd16_cvtepu8_epi32(tmp);
  
          return _simd16_castsi_ps(result);
      }
  
-    static simd16scalar pack(simd16scalar &in)
+    static simd16scalar pack(simd16scalar& in)
      {
          simd16scalari result = _simd16_setzero_si();
  
-        simdscalari inlo = _simd_castps_si(_simd16_extract_ps(in, 0));          // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
-        simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1));          // r8 r9 rA rB rC rD rE rF
+        simdscalari inlo =
+            _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
+        simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
  
-        simdscalari permlo = _simd_permute2f128_si(inlo, inhi, 0x20);           // r0 r1 r2 r3 r8 r9 rA rB (32b)
-        simdscalari permhi = _simd_permute2f128_si(inlo, inhi, 0x31);           // r4 r5 r6 r7 rC rD rE rF (32b)
+        simdscalari permlo =
+            _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
+        simdscalari permhi =
+            _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
  
-        simdscalari pack = _simd_packus_epi32(permlo, permhi);                  // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
+        simdscalari pack = _simd_packus_epi32(
+            permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
  
          const simdscalari zero = _simd_setzero_si();
  
-        permlo = _simd_permute2f128_si(pack, zero, 0x20);   // (2, 0)           // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
-        permhi = _simd_permute2f128_si(pack, zero, 0x31);   // (3, 1)           // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
+        permlo = _simd_permute2f128_si(
+            pack,
+            zero,
+            0x20); // (2, 0)           // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
+        permhi = _simd_permute2f128_si(
+            pack,
+            zero,
+            0x31); // (3, 1)           // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
  
-        pack = _simd_packus_epi16(permlo, permhi);                              // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
+        pack = _simd_packus_epi16(permlo,
+                                  permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00
+                                           // 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
  
          result = _simd16_insert_si(result, pack, 0);
  
@@ -192,18 +207,18 @@ struct PackTraits<8, true>
  {
      static const uint32_t MyNumBits = 8;
  
-    static simdscalar loadSOA(const uint8_t *pSrc)
+    static simdscalar loadSOA(const uint8_t* pSrc)
      {
  #if KNOB_SIMD_WIDTH == 8
          __m256 result = _mm256_setzero_ps();
-        __m128 vLo = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
+        __m128 vLo    = _mm_castpd_ps(_mm_load_sd((double*)pSrc));
          return _mm256_insertf128_ps(result, vLo, 0);
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static void storeSOA(uint8_t *pDst, simdscalar const &src)
+    static void storeSOA(uint8_t* pDst, simdscalar const& src)
      {
          // store simd bytes
  #if KNOB_SIMD_WIDTH == 8
@@ -213,32 +228,34 @@ struct PackTraits<8, true>
  #endif
      }
  
-    static simdscalar unpack(simdscalar &in)
+    static simdscalar unpack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
  #if KNOB_ARCH <= KNOB_ARCH_AVX
          SWR_INVALID("I think this may be incorrect.");
-        __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+        __m128i src   = _mm_castps_si128(_mm256_castps256_ps128(in));
          __m128i resLo = _mm_cvtepi8_epi32(src);
-        __m128i resHi = _mm_shuffle_epi8(src,
-            _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
+        __m128i resHi =
+            _mm_shuffle_epi8(src, _mm_set_epi32(0x80808007, 0x80808006, 0x80808005, 0x80808004));
  
          __m256i result = _mm256_castsi128_si256(resLo);
-        result = _mm256_insertf128_si256(result, resHi, 1);
+        result         = _mm256_insertf128_si256(result, resHi, 1);
          return _mm256_castsi256_ps(result);
  #else
-        return _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+        return _mm256_castsi256_ps(
+            _mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
  #endif
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static simdscalar pack(simdscalar &in)
+    static simdscalar pack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
          simdscalari src = _simd_castps_si(in);
-        __m128i res16 = _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
+        __m128i     res16 =
+            _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1));
          __m128i res8 = _mm_packs_epi16(res16, _mm_undefined_si128());
          return _mm256_castsi256_ps(_mm256_castsi128_si256(res8));
  #else
@@ -247,51 +264,64 @@ struct PackTraits<8, true>
      }
  #if ENABLE_AVX512_SIMD16
  
-    static simd16scalar loadSOA_16(const uint8_t *pSrc)
+    static simd16scalar loadSOA_16(const uint8_t* pSrc)
      {
-        simd16scalar result = _simd16_setzero_ps();
-        simdscalar resultlo = _simd_setzero_ps();
+        simd16scalar result   = _simd16_setzero_ps();
+        simdscalar   resultlo = _simd_setzero_ps();
  
-        const __m128 src = _mm_load_ps(reinterpret_cast<const float *>(pSrc));
+        const __m128 src = _mm_load_ps(reinterpret_cast<const float*>(pSrc));
  
          resultlo = _mm256_insertf128_ps(resultlo, src, 0);
-        result = _simd16_insert_ps(result, resultlo, 0);
+        result   = _simd16_insert_ps(result, resultlo, 0);
  
          return result;
      }
  
-    static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+    static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
      {
          // store simd16 bytes
-        _mm_store_ps(reinterpret_cast<float *>(pDst), _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
+        _mm_store_ps(reinterpret_cast<float*>(pDst),
+                     _mm256_castps256_ps128(_simd16_extract_ps(src, 0)));
      }
  
-    static simd16scalar unpack(simd16scalar &in)
+    static simd16scalar unpack(simd16scalar& in)
      {
-        simd4scalari tmp = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
+        simd4scalari  tmp    = _mm_castps_si128(_mm256_castps256_ps128(_simd16_extract_ps(in, 0)));
          simd16scalari result = _simd16_cvtepu8_epi32(tmp);
  
          return _simd16_castsi_ps(result);
      }
  
-    static simd16scalar pack(simd16scalar &in)
+    static simd16scalar pack(simd16scalar& in)
      {
          simd16scalari result = _simd16_setzero_si();
  
-        simdscalari inlo = _simd_castps_si(_simd16_extract_ps(in, 0));          // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
-        simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1));          // r8 r9 rA rB rC rD rE rF
+        simdscalari inlo =
+            _simd_castps_si(_simd16_extract_ps(in, 0)); // r0 r1 r2 r3 r4 r5 r6 r7 (32b)
+        simdscalari inhi = _simd_castps_si(_simd16_extract_ps(in, 1)); // r8 r9 rA rB rC rD rE rF
  
-        simdscalari permlo = _simd_permute2f128_si(inlo, inhi, 0x20);           // r0 r1 r2 r3 r8 r9 rA rB (32b)
-        simdscalari permhi = _simd_permute2f128_si(inlo, inhi, 0x31);           // r4 r5 r6 r7 rC rD rE rF (32b)
+        simdscalari permlo =
+            _simd_permute2f128_si(inlo, inhi, 0x20); // r0 r1 r2 r3 r8 r9 rA rB (32b)
+        simdscalari permhi =
+            _simd_permute2f128_si(inlo, inhi, 0x31); // r4 r5 r6 r7 rC rD rE rF (32b)
  
-        simdscalari pack = _simd_packs_epi32(permlo, permhi);                   // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
+        simdscalari pack = _simd_packs_epi32(
+            permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF (16b)
  
          const simdscalari zero = _simd_setzero_si();
  
-        permlo = _simd_permute2f128_si(pack, zero, 0x20);   // (2, 0)           // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
-        permhi = _simd_permute2f128_si(pack, zero, 0x31);   // (3, 1)           // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
+        permlo = _simd_permute2f128_si(
+            pack,
+            zero,
+            0x20); // (2, 0)           // r0 r1 r2 r3 r4 r5 r6 r7 00 00 00 00 00 00 00 00 (16b)
+        permhi = _simd_permute2f128_si(
+            pack,
+            zero,
+            0x31); // (3, 1)           // r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 (16b)
  
-        pack = _simd_packs_epi16(permlo, permhi);                               // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
+        pack =
+            _simd_packs_epi16(permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00
+                                               // 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (8b)
  
          result = _simd16_insert_si(result, pack, 0);
  
@@ -308,18 +338,18 @@ struct PackTraits<16, false>
  {
      static const uint32_t MyNumBits = 16;
  
-    static simdscalar loadSOA(const uint8_t *pSrc)
+    static simdscalar loadSOA(const uint8_t* pSrc)
      {
  #if KNOB_SIMD_WIDTH == 8
          __m256 result = _mm256_setzero_ps();
-        __m128 vLo = _mm_load_ps((const float*)pSrc);
+        __m128 vLo    = _mm_load_ps((const float*)pSrc);
          return _mm256_insertf128_ps(result, vLo, 0);
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static void storeSOA(uint8_t *pDst, simdscalar const &src)
+    static void storeSOA(uint8_t* pDst, simdscalar const& src)
      {
  #if KNOB_SIMD_WIDTH == 8
          // store 16B (2B * 8)
@@ -329,31 +359,33 @@ struct PackTraits<16, false>
  #endif
      }
  
-    static simdscalar unpack(simdscalar &in)
+    static simdscalar unpack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
  #if KNOB_ARCH <= KNOB_ARCH_AVX
-        __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+        __m128i src   = _mm_castps_si128(_mm256_castps256_ps128(in));
          __m128i resLo = _mm_cvtepu16_epi32(src);
-        __m128i resHi = _mm_shuffle_epi8(src,
-            _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
+        __m128i resHi =
+            _mm_shuffle_epi8(src, _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
  
          __m256i result = _mm256_castsi128_si256(resLo);
-        result = _mm256_insertf128_si256(result, resHi, 1);
+        result         = _mm256_insertf128_si256(result, resHi, 1);
          return _mm256_castsi256_ps(result);
  #else
-        return _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+        return _mm256_castsi256_ps(
+            _mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
  #endif
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static simdscalar pack(simdscalar &in)
+    static simdscalar pack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
          simdscalari src = _simd_castps_si(in);
-        __m256i res = _mm256_castsi128_si256(_mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
+        __m256i     res = _mm256_castsi128_si256(
+            _mm_packus_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
          return _mm256_castsi256_ps(res);
  #else
  #error Unsupported vector width
@@ -361,37 +393,45 @@ struct PackTraits<16, false>
      }
  #if ENABLE_AVX512_SIMD16
  
-    static simd16scalar loadSOA_16(const uint8_t *pSrc)
+    static simd16scalar loadSOA_16(const uint8_t* pSrc)
      {
          simd16scalar result = _simd16_setzero_ps();
  
-        simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float *>(pSrc));
+        simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float*>(pSrc));
  
          result = _simd16_insert_ps(result, resultlo, 0);
  
          return result;
      }
  
-    static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+    static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
      {
-        _simd_store_ps(reinterpret_cast<float *>(pDst), _simd16_extract_ps(src, 0));
+        _simd_store_ps(reinterpret_cast<float*>(pDst), _simd16_extract_ps(src, 0));
      }
  
-    static simd16scalar unpack(simd16scalar &in)
+    static simd16scalar unpack(simd16scalar& in)
      {
          simd16scalari result = _simd16_cvtepu16_epi32(_simd_castps_si(_simd16_extract_ps(in, 0)));
  
          return _simd16_castsi_ps(result);
      }
  
-    static simd16scalar pack(simd16scalar &in)
+    static simd16scalar pack(simd16scalar& in)
      {
          const simd16scalari zero = _simd16_setzero_si();
  
-        simd16scalari permlo = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x08);  // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
-        simd16scalari permhi = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x0D);  // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
+        simd16scalari permlo = _simd16_permute2f128_si(
+            _simd16_castps_si(in),
+            zero,
+            0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
+        simd16scalari permhi = _simd16_permute2f128_si(
+            _simd16_castps_si(in),
+            zero,
+            0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
  
-        simd16scalari result = _simd16_packus_epi32(permlo, permhi);    // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (16b)
+        simd16scalari result = _simd16_packus_epi32(
+            permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00
+                             // 00 00 00 00 00 00 00 00 00 (16b)
  
          return _simd16_castsi_ps(result);
      }
@@ -406,18 +446,18 @@ struct PackTraits<16, true>
  {
      static const uint32_t MyNumBits = 16;
  
-    static simdscalar loadSOA(const uint8_t *pSrc)
+    static simdscalar loadSOA(const uint8_t* pSrc)
      {
  #if KNOB_SIMD_WIDTH == 8
          __m256 result = _mm256_setzero_ps();
-        __m128 vLo = _mm_load_ps((const float*)pSrc);
+        __m128 vLo    = _mm_load_ps((const float*)pSrc);
          return _mm256_insertf128_ps(result, vLo, 0);
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static void storeSOA(uint8_t *pDst, simdscalar const &src)
+    static void storeSOA(uint8_t* pDst, simdscalar const& src)
      {
  #if KNOB_SIMD_WIDTH == 8
          // store 16B (2B * 8)
@@ -427,32 +467,34 @@ struct PackTraits<16, true>
  #endif
      }
  
-    static simdscalar unpack(simdscalar &in)
+    static simdscalar unpack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
  #if KNOB_ARCH <= KNOB_ARCH_AVX
          SWR_INVALID("I think this may be incorrect.");
-        __m128i src = _mm_castps_si128(_mm256_castps256_ps128(in));
+        __m128i src   = _mm_castps_si128(_mm256_castps256_ps128(in));
          __m128i resLo = _mm_cvtepi16_epi32(src);
-        __m128i resHi = _mm_shuffle_epi8(src,
-            _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
+        __m128i resHi =
+            _mm_shuffle_epi8(src, _mm_set_epi32(0x80800F0E, 0x80800D0C, 0x80800B0A, 0x80800908));
  
          __m256i result = _mm256_castsi128_si256(resLo);
-        result = _mm256_insertf128_si256(result, resHi, 1);
+        result         = _mm256_insertf128_si256(result, resHi, 1);
          return _mm256_castsi256_ps(result);
  #else
-        return _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
+        return _mm256_castsi256_ps(
+            _mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
  #endif
  #else
  #error Unsupported vector width
  #endif
      }
  
-    static simdscalar pack(simdscalar &in)
+    static simdscalar pack(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
          simdscalari src = _simd_castps_si(in);
-        __m256i res = _mm256_castsi128_si256(_mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
+        __m256i     res = _mm256_castsi128_si256(
+            _mm_packs_epi32(_mm256_castsi256_si128(src), _mm256_extractf128_si256(src, 1)));
          return _mm256_castsi256_ps(res);
  #else
  #error Unsupported vector width
@@ -460,37 +502,45 @@ struct PackTraits<16, true>
      }
  #if ENABLE_AVX512_SIMD16
  
-    static simd16scalar loadSOA_16(const uint8_t *pSrc)
+    static simd16scalar loadSOA_16(const uint8_t* pSrc)
      {
          simd16scalar result = _simd16_setzero_ps();
  
-        simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float *>(pSrc));
+        simdscalar resultlo = _simd_load_ps(reinterpret_cast<const float*>(pSrc));
  
          result = _simd16_insert_ps(result, resultlo, 0);
  
          return result;
      }
  
-    static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+    static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
      {
-        _simd_store_ps(reinterpret_cast<float *>(pDst), _simd16_extract_ps(src, 0));
+        _simd_store_ps(reinterpret_cast<float*>(pDst), _simd16_extract_ps(src, 0));
      }
  
-    static simd16scalar unpack(simd16scalar &in)
+    static simd16scalar unpack(simd16scalar& in)
      {
          simd16scalari result = _simd16_cvtepu16_epi32(_simd_castps_si(_simd16_extract_ps(in, 0)));
  
          return _simd16_castsi_ps(result);
      }
  
-    static simd16scalar pack(simd16scalar &in)
+    static simd16scalar pack(simd16scalar& in)
      {
          const simd16scalari zero = _simd16_setzero_si();
  
-        simd16scalari permlo = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x08);  // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
-        simd16scalari permhi = _simd16_permute2f128_si(_simd16_castps_si(in), zero, 0x0D);  // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
+        simd16scalari permlo = _simd16_permute2f128_si(
+            _simd16_castps_si(in),
+            zero,
+            0x08); // (0, 0, 2, 0) // r0 r1 r2 r3 r8 r9 rA rB 00 00 00 00 00 00 00 00 (32b)
+        simd16scalari permhi = _simd16_permute2f128_si(
+            _simd16_castps_si(in),
+            zero,
+            0x0D); // (0, 0, 3, 1) // r4 r5 r6 r7 rC rD rE rF 00 00 00 00 00 00 00 00
  
-        simd16scalari result = _simd16_packs_epi32(permlo, permhi);     // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 (16b)
+        simd16scalari result = _simd16_packs_epi32(
+            permlo, permhi); // r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 rA rB rC rD rE rF 00 00 00 00 00 00 00
+                             // 00 00 00 00 00 00 00 00 00 (16b)
  
          return _simd16_castsi_ps(result);
      }
@@ -505,188 +555,281 @@ struct PackTraits<32, false>
  {
      static const uint32_t MyNumBits = 32;
  
-    static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_load_ps((const float*)pSrc); }
-    static void storeSOA(uint8_t *pDst, simdscalar const &src) { _simd_store_ps((float*)pDst, src); }
-    static simdscalar unpack(simdscalar &in) { return in; }
-    static simdscalar pack(simdscalar &in) { return in; }
-#if ENABLE_AVX512_SIMD16
-
-    static simd16scalar loadSOA_16(const uint8_t *pSrc)
+    static simdscalar loadSOA(const uint8_t* pSrc) { return _simd_load_ps((const float*)pSrc); }
+    static void       storeSOA(uint8_t* pDst, simdscalar const& src)
      {
-        return _simd16_load_ps(reinterpret_cast<const float *>(pSrc));
+        _simd_store_ps((float*)pDst, src);
      }
+    static simdscalar unpack(simdscalar& in) { return in; }
+    static simdscalar pack(simdscalar& in) { return in; }
+#if ENABLE_AVX512_SIMD16
  
-    static void SIMDCALL storeSOA(uint8_t *pDst, simd16scalar const &src)
+    static simd16scalar loadSOA_16(const uint8_t* pSrc)
      {
-        _simd16_store_ps(reinterpret_cast<float *>(pDst), src);
+        return _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
      }
  
-    static simd16scalar unpack(simd16scalar &in)
+    static void SIMDCALL storeSOA(uint8_t* pDst, simd16scalar const& src)
      {
-        return in;
+        _simd16_store_ps(reinterpret_cast<float*>(pDst), src);
      }
  
-    static simd16scalar pack(simd16scalar &in)
-    {
-        return in;
-    }
+    static simd16scalar unpack(simd16scalar& in) { return in; }
+
+    static simd16scalar pack(simd16scalar& in) { return in; }
  #endif
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits.
  //////////////////////////////////////////////////////////////////////////
-template<SWR_TYPE type, uint32_t NumBits>
+template <SWR_TYPE type, uint32_t NumBits>
  struct TypeTraits : PackTraits<NumBits>
  {
      static const SWR_TYPE MyType = type;
-    static float toFloat() { return 0.0; }
-    static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 0.0; }
+    static float          fromFloat()
+    {
+        SWR_NOT_IMPL;
+        return 0.0;
+    }
+    static simdscalar convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UINT8
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UINT, 8> : PackTraits<8>
+template <>
+struct TypeTraits<SWR_TYPE_UINT, 8> : PackTraits<8>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UINT;
-    static float toFloat() { return 0.0; }
-    static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 0.0; }
+    static float          fromFloat()
+    {
+        SWR_NOT_IMPL;
+        return 0.0;
+    }
+    static simdscalar convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UINT8
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SINT, 8> : PackTraits<8, true>
+template <>
+struct TypeTraits<SWR_TYPE_SINT, 8> : PackTraits<8, true>
  {
      static const SWR_TYPE MyType = SWR_TYPE_SINT;
-    static float toFloat() { return 0.0; }
-    static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 0.0; }
+    static float          fromFloat()
+    {
+        SWR_NOT_IMPL;
+        return 0.0;
+    }
+    static simdscalar convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UINT16
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UINT, 16> : PackTraits<16>
+template <>
+struct TypeTraits<SWR_TYPE_UINT, 16> : PackTraits<16>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UINT;
-    static float toFloat() { return 0.0; }
-    static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 0.0; }
+    static float          fromFloat()
+    {
+        SWR_NOT_IMPL;
+        return 0.0;
+    }
+    static simdscalar convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for SINT16
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SINT, 16> : PackTraits<16, true>
+template <>
+struct TypeTraits<SWR_TYPE_SINT, 16> : PackTraits<16, true>
  {
      static const SWR_TYPE MyType = SWR_TYPE_SINT;
-    static float toFloat() { return 0.0; }
-    static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 0.0; }
+    static float          fromFloat()
+    {
+        SWR_NOT_IMPL;
+        return 0.0;
+    }
+    static simdscalar convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UINT32
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UINT, 32> : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_UINT, 32> : PackTraits<32>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UINT;
-    static float toFloat() { return 0.0; }
-    static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 0.0; }
+    static float          fromFloat()
+    {
+        SWR_NOT_IMPL;
+        return 0.0;
+    }
+    static simdscalar convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UINT32
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SINT, 32> : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_SINT, 32> : PackTraits<32>
  {
      static const SWR_TYPE MyType = SWR_TYPE_SINT;
-    static float toFloat() { return 0.0; }
-    static float fromFloat() { SWR_NOT_IMPL; return 0.0; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 0.0; }
+    static float          fromFloat()
+    {
+        SWR_NOT_IMPL;
+        return 0.0;
+    }
+    static simdscalar convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UNORM5
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 5> : PackTraits<5>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 5> : PackTraits<5>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UNORM;
-    static float toFloat() { return 1.0f / 31.0f; }
-    static float fromFloat() { return 31.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f / 31.0f; }
+    static float          fromFloat() { return 31.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UNORM6
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 6> : PackTraits<6>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 6> : PackTraits<6>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UNORM;
-    static float toFloat() { return 1.0f / 63.0f; }
-    static float fromFloat() { return 63.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f / 63.0f; }
+    static float          fromFloat() { return 63.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UNORM8
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 8> : PackTraits<8>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 8> : PackTraits<8>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UNORM;
-    static float toFloat() { return 1.0f / 255.0f; }
-    static float fromFloat() { return 255.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f / 255.0f; }
+    static float          fromFloat() { return 255.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UNORM8
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SNORM, 8> : PackTraits<8, true>
+template <>
+struct TypeTraits<SWR_TYPE_SNORM, 8> : PackTraits<8, true>
  {
      static const SWR_TYPE MyType = SWR_TYPE_SNORM;
-    static float toFloat() { return 1.0f / 127.0f; }
-    static float fromFloat() { return 127.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f / 127.0f; }
+    static float          fromFloat() { return 127.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UNORM16
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_UNORM, 16> : PackTraits<16>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 16> : PackTraits<16>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UNORM;
-    static float toFloat() { return 1.0f / 65535.0f; }
-    static float fromFloat() { return 65535.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f / 65535.0f; }
+    static float          fromFloat() { return 65535.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for SNORM16
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_SNORM, 16> : PackTraits<16, true>
+template <>
+struct TypeTraits<SWR_TYPE_SNORM, 16> : PackTraits<16, true>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UNORM;
-    static float toFloat() { return 1.0f / 32767.0f; }
-    static float fromFloat() { return 32767.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f / 32767.0f; }
+    static float          fromFloat() { return 32767.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for UNORM24
  //////////////////////////////////////////////////////////////////////////
-template<>
-struct TypeTraits < SWR_TYPE_UNORM, 24 > : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_UNORM, 24> : PackTraits<32>
  {
      static const SWR_TYPE MyType = SWR_TYPE_UNORM;
-    static float toFloat() { return 1.0f / 16777215.0f; }
-    static float fromFloat() { return 16777215.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f / 16777215.0f; }
+    static float          fromFloat() { return 16777215.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -697,44 +840,47 @@ struct TypeTraits < SWR_TYPE_UNORM, 24 > : PackTraits<32>
  
  #include "math.h"
  
-template< unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden >
-inline static __m128 fastpow(__m128 arg) {
+template <unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden>
+inline static __m128 fastpow(__m128 arg)
+{
      __m128 ret = arg;
  
-    static const __m128 factor = _mm_set1_ps(exp2(127.0f * expden / expnum - 127.0f)
-        * powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum));
+    static const __m128 factor =
+        _mm_set1_ps(exp2(127.0f * expden / expnum - 127.0f) *
+                    powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum));
  
      // Apply a constant pre-correction factor.
      ret = _mm_mul_ps(ret, factor);
  
      // Reinterpret arg as integer to obtain logarithm.
-    //asm("cvtdq2ps %1, %0" : "=x" (ret) : "x" (ret));
+    // asm("cvtdq2ps %1, %0" : "=x" (ret) : "x" (ret));
      ret = _mm_cvtepi32_ps(_mm_castps_si128(ret));
  
      // Multiply logarithm by power.
      ret = _mm_mul_ps(ret, _mm_set1_ps(1.0f * expnum / expden));
  
      // Convert back to "integer" to exponentiate.
-    //asm("cvtps2dq %1, %0" : "=x" (ret) : "x" (ret));
+    // asm("cvtps2dq %1, %0" : "=x" (ret) : "x" (ret));
      ret = _mm_castsi128_ps(_mm_cvtps_epi32(ret));
  
      return ret;
  }
  
-inline static __m128 pow512_4(__m128 arg) {
+inline static __m128 pow512_4(__m128 arg)
+{
      // 5/12 is too small, so compute the 4th root of 20/12 instead.
      // 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow.
      // weighting coefficient: a^-1/2 = 2 a; a = 2^-2/3
-    __m128 xf = fastpow< 2, 3, int(0.629960524947437 * 1e9), int(1e9) >(arg);
+    __m128 xf    = fastpow<2, 3, int(0.629960524947437 * 1e9), int(1e9)>(arg);
      __m128 xover = _mm_mul_ps(arg, xf);
  
-    __m128 xfm1 = _mm_rsqrt_ps(xf);
-    __m128 x2 = _mm_mul_ps(arg, arg);
+    __m128 xfm1   = _mm_rsqrt_ps(xf);
+    __m128 x2     = _mm_mul_ps(arg, arg);
      __m128 xunder = _mm_mul_ps(x2, xfm1);
  
      // sqrt2 * over + 2 * sqrt2 * under
      __m128 xavg = _mm_mul_ps(_mm_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
-        _mm_add_ps(xover, xunder));
+                             _mm_add_ps(xover, xunder));
  
      xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
      xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
@@ -743,17 +889,15 @@ inline static __m128 pow512_4(__m128 arg) {
  
  inline static __m128 powf_wrapper(__m128 Base, float Exp)
  {
-    float *f = (float *)(&Base);
+    float* f = (float*)(&Base);
  
-    return _mm_set_ps(powf(f[3], Exp),
-                      powf(f[2], Exp),
-                      powf(f[1], Exp),
-                      powf(f[0], Exp));
+    return _mm_set_ps(powf(f[3], Exp), powf(f[2], Exp), powf(f[1], Exp), powf(f[0], Exp));
  }
  
  static inline __m128 ConvertFloatToSRGB2(__m128& Src)
  {
-    // create a mask with 0xFFFFFFFF in the DWORDs where the source is <= the minimal SRGB float value
+    // create a mask with 0xFFFFFFFF in the DWORDs where the source is <= the minimal SRGB float
+    // value
      __m128i CmpToSRGBThresholdMask = TO_M128i(_mm_cmpnlt_ps(_mm_set1_ps(0.0031308f), Src));
  
      // squeeze the mask down to 16 bits (4 bits per DWORD)
@@ -779,7 +923,7 @@ static inline __m128 ConvertFloatToSRGB2(__m128& Src)
  #else
          __m128 f = powf_wrapper(fSrc_0RGB, 1.0f / 2.4f);
  #endif
-        f = _mm_mul_ps(f, _mm_set1_ps(1.055f));
+        f      = _mm_mul_ps(f, _mm_set1_ps(1.055f));
          Result = _mm_sub_ps(f, _mm_set1_ps(0.055f));
      }
      else
@@ -800,11 +944,12 @@ static inline __m128 ConvertFloatToSRGB2(__m128& Src)
          f = _mm_sub_ps(f, _mm_set1_ps(0.055f));
  
          // Clear the alpha (is garbage after the sub)
-        __m128i i = _mm_and_si128(TO_M128i(f), _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
+        __m128i i = _mm_and_si128(TO_M128i(f),
+                                  _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF));
  
          __m128i LessThanPart = _mm_and_si128(CmpToSRGBThresholdMask, TO_M128i(Src_0RGB_mul_denorm));
          __m128i GreaterEqualPart = _mm_andnot_si128(CmpToSRGBThresholdMask, i);
-        __m128i CombinedParts = _mm_or_si128(LessThanPart, GreaterEqualPart);
+        __m128i CombinedParts    = _mm_or_si128(LessThanPart, GreaterEqualPart);
  
          Result = TO_M128(CombinedParts);
      }
@@ -813,43 +958,45 @@ static inline __m128 ConvertFloatToSRGB2(__m128& Src)
  }
  
  #if ENABLE_AVX512_SIMD16
-template< unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden >
-inline static simd16scalar SIMDCALL fastpow(simd16scalar const &value)
+template <unsigned expnum, unsigned expden, unsigned coeffnum, unsigned coeffden>
+inline static simd16scalar SIMDCALL fastpow(simd16scalar const& value)
  {
-    static const float factor1 = exp2(127.0f * expden / expnum - 127.0f)
-        * powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum);
+    static const float factor1 = exp2(127.0f * expden / expnum - 127.0f) *
+                                 powf(1.0f * coeffnum / coeffden, 1.0f * expden / expnum);
  
      // Apply a constant pre-correction factor.
      simd16scalar result = _simd16_mul_ps(value, _simd16_set1_ps(factor1));
  
      // Reinterpret arg as integer to obtain logarithm.
-    //asm("cvtdq2ps %1, %0" : "=x" (result) : "x" (result));
+    // asm("cvtdq2ps %1, %0" : "=x" (result) : "x" (result));
      result = _simd16_cvtepi32_ps(_simd16_castps_si(result));
  
      // Multiply logarithm by power.
      result = _simd16_mul_ps(result, _simd16_set1_ps(1.0f * expnum / expden));
  
      // Convert back to "integer" to exponentiate.
-    //asm("cvtps2dq %1, %0" : "=x" (result) : "x" (result));
+    // asm("cvtps2dq %1, %0" : "=x" (result) : "x" (result));
      result = _simd16_castsi_ps(_simd16_cvtps_epi32(result));
  
      return result;
  }
  
-inline static simd16scalar SIMDCALL pow512_4(simd16scalar const &arg)
+inline static simd16scalar SIMDCALL pow512_4(simd16scalar const& arg)
  {
      // 5/12 is too small, so compute the 4th root of 20/12 instead.
      // 20/12 = 5/3 = 1 + 2/3 = 2 - 1/3. 2/3 is a suitable argument for fastpow.
      // weighting coefficient: a^-1/2 = 2 a; a = 2^-2/3
-    simd16scalar xf = fastpow< 2, 3, int(0.629960524947437 * 1e9), int(1e9) >(arg);
+    simd16scalar xf    = fastpow<2, 3, int(0.629960524947437 * 1e9), int(1e9)>(arg);
      simd16scalar xover = _simd16_mul_ps(arg, xf);
  
-    simd16scalar xfm1 = _simd16_rsqrt_ps(xf);
-    simd16scalar x2 = _simd16_mul_ps(arg, arg);
+    simd16scalar xfm1   = _simd16_rsqrt_ps(xf);
+    simd16scalar x2     = _simd16_mul_ps(arg, arg);
      simd16scalar xunder = _simd16_mul_ps(x2, xfm1);
  
      // sqrt2 * over + 2 * sqrt2 * under
-    simd16scalar xavg = _simd16_mul_ps(_simd16_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f), _simd16_add_ps(xover, xunder));
+    simd16scalar xavg =
+        _simd16_mul_ps(_simd16_set1_ps(1.0f / (3.0f * 0.629960524947437f) * 0.999852f),
+                       _simd16_add_ps(xover, xunder));
  
      xavg = _simd16_mul_ps(xavg, _simd16_rsqrt_ps(xavg));
      xavg = _simd16_mul_ps(xavg, _simd16_rsqrt_ps(xavg));
@@ -857,28 +1004,26 @@ inline static simd16scalar SIMDCALL pow512_4(simd16scalar const &arg)
      return xavg;
  }
  
-inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar &base, float exp)
+inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar& base, float exp)
  {
-    const float *f = reinterpret_cast<const float *>(&base);
-
-    return _simd16_set_ps(
-        powf(f[15], exp),
-        powf(f[14], exp),
-        powf(f[13], exp),
-        powf(f[12], exp),
-        powf(f[11], exp),
-        powf(f[10], exp),
-        powf(f[ 9], exp),
-        powf(f[ 8], exp),
-        powf(f[ 7], exp),
-        powf(f[ 6], exp),
-        powf(f[ 5], exp),
-        powf(f[ 4], exp),
-        powf(f[ 3], exp),
-        powf(f[ 2], exp),
-        powf(f[ 1], exp),
-        powf(f[ 0], exp)
-    );
+    const float* f = reinterpret_cast<const float*>(&base);
+
+    return _simd16_set_ps(powf(f[15], exp),
+                          powf(f[14], exp),
+                          powf(f[13], exp),
+                          powf(f[12], exp),
+                          powf(f[11], exp),
+                          powf(f[10], exp),
+                          powf(f[9], exp),
+                          powf(f[8], exp),
+                          powf(f[7], exp),
+                          powf(f[6], exp),
+                          powf(f[5], exp),
+                          powf(f[4], exp),
+                          powf(f[3], exp),
+                          powf(f[2], exp),
+                          powf(f[1], exp),
+                          powf(f[0], exp));
  }
  
  // float to SRGB conversion formula
@@ -888,7 +1033,7 @@ inline static simd16scalar SIMDCALL powf_wrapper(const simd16scalar &base, float
  // else
  //     value = 1.055f * pow(value, 1.0f / 2.4f) - 0.055f;
  //
-static inline simd16scalar ConvertFloatToSRGB2(const simd16scalar &value)
+static inline simd16scalar ConvertFloatToSRGB2(const simd16scalar& value)
  {
      // create a mask where the source is < the minimal SRGB float value
      const simd16mask mask = _simd16_cmplt_ps_mask(value, _simd16_set1_ps(0.0031308f));
@@ -913,7 +1058,8 @@ static inline simd16scalar ConvertFloatToSRGB2(const simd16scalar &value)
          // only native AVX512 can directly use the computed mask for the blend operation
          result = _mm512_mask_blend_ps(mask, result2, result);
  #else
-        result = _simd16_blendv_ps(result2, result, _simd16_cmplt_ps(value, _simd16_set1_ps(0.0031308f)));
+        result               = _simd16_blendv_ps(
+            result2, result, _simd16_cmplt_ps(value, _simd16_set1_ps(0.0031308f)));
  #endif
      }
  
@@ -924,88 +1070,100 @@ static inline simd16scalar ConvertFloatToSRGB2(const simd16scalar &value)
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for FLOAT16
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
+template <>
+struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
  {
      static const SWR_TYPE MyType = SWR_TYPE_FLOAT;
-    static float toFloat() { return 1.0f; }
-    static float fromFloat() { return 1.0f; }
-    static simdscalar convertSrgb(simdscalar &in) { SWR_NOT_IMPL; return _simd_setzero_ps(); }
+    static float          toFloat() { return 1.0f; }
+    static float          fromFloat() { return 1.0f; }
+    static simdscalar     convertSrgb(simdscalar& in)
+    {
+        SWR_NOT_IMPL;
+        return _simd_setzero_ps();
+    }
  
-    static simdscalar pack(const simdscalar &in)
+    static simdscalar pack(const simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
  #if (KNOB_ARCH == KNOB_ARCH_AVX)
          // input is 8 packed float32, output is 8 packed float16
          simdscalari src = _simd_castps_si(in);
  
-        static const uint32_t FLOAT_EXP_BITS = 8;
+        static const uint32_t FLOAT_EXP_BITS      = 8;
          static const uint32_t FLOAT_MANTISSA_BITS = 23;
          static const uint32_t FLOAT_MANTISSA_MASK = (1U << FLOAT_MANTISSA_BITS) - 1;
          static const uint32_t FLOAT_EXP_MASK = ((1U << FLOAT_EXP_BITS) - 1) << FLOAT_MANTISSA_BITS;
  
-        static const uint32_t HALF_EXP_BITS = 5;
+        static const uint32_t HALF_EXP_BITS      = 5;
          static const uint32_t HALF_MANTISSA_BITS = 10;
          static const uint32_t HALF_EXP_MASK = ((1U << HALF_EXP_BITS) - 1) << HALF_MANTISSA_BITS;
  
          // minimum exponent required, exponents below this are flushed to 0.
-        static const int32_t HALF_EXP_MIN = -14;
+        static const int32_t HALF_EXP_MIN   = -14;
          static const int32_t FLOAT_EXP_BIAS = 127;
-        static const int32_t FLOAT_EXP_MIN = HALF_EXP_MIN + FLOAT_EXP_BIAS;
-        static const int32_t FLOAT_EXP_MIN_FTZ = FLOAT_EXP_MIN - (HALF_MANTISSA_BITS + 1); // +1 for the lack of implicit significand
+        static const int32_t FLOAT_EXP_MIN  = HALF_EXP_MIN + FLOAT_EXP_BIAS;
+        static const int32_t FLOAT_EXP_MIN_FTZ =
+            FLOAT_EXP_MIN - (HALF_MANTISSA_BITS + 1); // +1 for the lack of implicit significand
  
          // maximum exponent required, exponents above this are set to infinity
-        static const int32_t HALF_EXP_MAX = 15;
+        static const int32_t HALF_EXP_MAX  = 15;
          static const int32_t FLOAT_EXP_MAX = HALF_EXP_MAX + FLOAT_EXP_BIAS;
  
-        const simdscalari vSignMask     = _simd_set1_epi32(0x80000000);
-        const simdscalari vExpMask      = _simd_set1_epi32(FLOAT_EXP_MASK);
-        const simdscalari vManMask      = _simd_set1_epi32(FLOAT_MANTISSA_MASK);
-        const simdscalari vExpMin       = _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN << FLOAT_MANTISSA_BITS));
-        const simdscalari vExpMinFtz    = _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN_FTZ << FLOAT_MANTISSA_BITS));
-        const simdscalari vExpMax       = _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MAX << FLOAT_MANTISSA_BITS));
+        const simdscalari vSignMask = _simd_set1_epi32(0x80000000);
+        const simdscalari vExpMask  = _simd_set1_epi32(FLOAT_EXP_MASK);
+        const simdscalari vManMask  = _simd_set1_epi32(FLOAT_MANTISSA_MASK);
+        const simdscalari vExpMin =
+            _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN << FLOAT_MANTISSA_BITS));
+        const simdscalari vExpMinFtz =
+            _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MIN_FTZ << FLOAT_MANTISSA_BITS));
+        const simdscalari vExpMax =
+            _simd_set1_epi32(FLOAT_EXP_MASK & uint32_t(FLOAT_EXP_MAX << FLOAT_MANTISSA_BITS));
  
-        simdscalari vSign       = _simd_and_si(src, vSignMask);
-        simdscalari vExp        = _simd_and_si(src, vExpMask);
-        simdscalari vMan        = _simd_and_si(src, vManMask);
+        simdscalari vSign = _simd_and_si(src, vSignMask);
+        simdscalari vExp  = _simd_and_si(src, vExpMask);
+        simdscalari vMan  = _simd_and_si(src, vManMask);
  
          simdscalari vFTZMask    = _simd_cmplt_epi32(vExp, vExpMinFtz);
          simdscalari vDenormMask = _simd_andnot_si(vFTZMask, _simd_cmplt_epi32(vExp, vExpMin));
          simdscalari vInfMask    = _simd_cmpeq_epi32(vExpMask, vExp);
          simdscalari vClampMask  = _simd_andnot_si(vInfMask, _simd_cmplt_epi32(vExpMax, vExp));
  
-        simdscalari vHalfExp    = _simd_add_epi32(_simd_sub_epi32(vExp, vExpMin), _simd_set1_epi32(1U << FLOAT_MANTISSA_BITS));
+        simdscalari vHalfExp = _simd_add_epi32(_simd_sub_epi32(vExp, vExpMin),
+                                               _simd_set1_epi32(1U << FLOAT_MANTISSA_BITS));
  
          // pack output 16-bits into the lower 16-bits of each 32-bit channel
-        simdscalari vDst        = _simd_and_si(_simd_srli_epi32(vHalfExp, 13), _simd_set1_epi32(HALF_EXP_MASK));
-        vDst   = _simd_or_si(vDst, _simd_srli_epi32(vMan, FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
+        simdscalari vDst =
+            _simd_and_si(_simd_srli_epi32(vHalfExp, 13), _simd_set1_epi32(HALF_EXP_MASK));
+        vDst = _simd_or_si(vDst, _simd_srli_epi32(vMan, FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
  
          // Flush To Zero
-        vDst   = _simd_andnot_si(vFTZMask, vDst);
+        vDst = _simd_andnot_si(vFTZMask, vDst);
          // Apply Infinites / NaN
-        vDst   = _simd_or_si(vDst, _simd_and_si(vInfMask, _simd_set1_epi32(HALF_EXP_MASK)));
+        vDst = _simd_or_si(vDst, _simd_and_si(vInfMask, _simd_set1_epi32(HALF_EXP_MASK)));
  
          // Apply clamps
          vDst = _simd_andnot_si(vClampMask, vDst);
-        vDst = _simd_or_si(vDst,
-                _simd_and_si(vClampMask, _simd_set1_epi32(0x7BFF)));
+        vDst = _simd_or_si(vDst, _simd_and_si(vClampMask, _simd_set1_epi32(0x7BFF)));
  
          // Compute Denormals (subnormals)
          if (!_mm256_testz_si256(vDenormMask, vDenormMask))
          {
-            uint32_t *pDenormMask = (uint32_t*)&vDenormMask;
-            uint32_t *pExp = (uint32_t*)&vExp;
-            uint32_t *pMan = (uint32_t*)&vMan;
-            uint32_t *pDst = (uint32_t*)&vDst;
+            uint32_t* pDenormMask = (uint32_t*)&vDenormMask;
+            uint32_t* pExp        = (uint32_t*)&vExp;
+            uint32_t* pMan        = (uint32_t*)&vMan;
+            uint32_t* pDst        = (uint32_t*)&vDst;
              for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
              {
                  if (pDenormMask[i])
                  {
                      // Need to compute subnormal value
                      uint32_t exponent = pExp[i] >> FLOAT_MANTISSA_BITS;
-                    uint32_t mantissa = pMan[i] |
-                                        (1U << FLOAT_MANTISSA_BITS); // Denorms include no "implicit" 1s.  Make it explicit
+                    uint32_t mantissa =
+                        pMan[i] | (1U << FLOAT_MANTISSA_BITS); // Denorms include no "implicit" 1s.
+                                                               // Make it explicit
  
-                    pDst[i] = mantissa >> ((FLOAT_EXP_MIN - exponent) + (FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
+                    pDst[i] = mantissa >> ((FLOAT_EXP_MIN - exponent) +
+                                           (FLOAT_MANTISSA_BITS - HALF_MANTISSA_BITS));
                  }
              }
          }
@@ -1014,7 +1172,8 @@ template<> struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
          vDst = _simd_or_si(vDst, _simd_srli_epi32(vSign, 16));
  
          // Pack to lower 128-bits
-        vDst = _mm256_castsi128_si256(_mm_packus_epi32(_mm256_castsi256_si128(vDst), _mm256_extractf128_si256(vDst, 1)));
+        vDst = _mm256_castsi128_si256(
+            _mm_packus_epi32(_mm256_castsi256_si128(vDst), _mm256_extractf128_si256(vDst, 1)));
  
  #if 0
  #if !defined(NDEBUG)
@@ -1037,7 +1196,7 @@ template<> struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
  #endif
      }
  
-    static simdscalar unpack(const simdscalar &in)
+    static simdscalar unpack(const simdscalar& in)
      {
          // input is 8 packed float16, output is 8 packed float32
          SWR_NOT_IMPL; // @todo
@@ -1045,10 +1204,10 @@ template<> struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
      }
  #if ENABLE_AVX512_SIMD16
  
-    static simd16scalar pack(const simd16scalar &in)
+    static simd16scalar pack(const simd16scalar& in)
      {
-        simd16scalari result = _simd16_setzero_si();
-        simdscalari resultlo = _simd_setzero_si();
+        simd16scalari result   = _simd16_setzero_si();
+        simdscalari   resultlo = _simd_setzero_si();
  
  #if (KNOB_ARCH == KNOB_ARCH_AVX)
          simdscalar simdlo = pack(_simd16_extract_ps(in, 0));
@@ -1070,7 +1229,7 @@ template<> struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
          return _simd16_castsi_ps(result);
      }
  
-    static simd16scalar unpack(const simd16scalar &in)
+    static simd16scalar unpack(const simd16scalar& in)
      {
          // input is 16 packed float16, output is 16 packed float32
          SWR_NOT_IMPL; //  @todo
@@ -1082,12 +1241,13 @@ template<> struct TypeTraits<SWR_TYPE_FLOAT, 16> : PackTraits<16>
  //////////////////////////////////////////////////////////////////////////
  /// TypeTraits - Format type traits specialization for FLOAT32
  //////////////////////////////////////////////////////////////////////////
-template<> struct TypeTraits<SWR_TYPE_FLOAT, 32> : PackTraits<32>
+template <>
+struct TypeTraits<SWR_TYPE_FLOAT, 32> : PackTraits<32>
  {
-    static const SWR_TYPE MyType = SWR_TYPE_FLOAT;
-    static float toFloat() { return 1.0f; }
-    static float fromFloat() { return 1.0f; }
-    static inline simdscalar convertSrgb(simdscalar &in)
+    static const SWR_TYPE    MyType = SWR_TYPE_FLOAT;
+    static float             toFloat() { return 1.0f; }
+    static float             fromFloat() { return 1.0f; }
+    static inline simdscalar convertSrgb(simdscalar& in)
      {
  #if KNOB_SIMD_WIDTH == 8
          __m128 srcLo = _mm256_extractf128_ps(in, 0);
@@ -1105,10 +1265,7 @@ template<> struct TypeTraits<SWR_TYPE_FLOAT, 32> : PackTraits<32>
      }
  #if ENABLE_AVX512_SIMD16
  
-    static inline simd16scalar convertSrgb(simd16scalar &in)
-    {
-        return ConvertFloatToSRGB2(in);
-    }
+    static inline simd16scalar convertSrgb(simd16scalar& in) { return ConvertFloatToSRGB2(in); }
  #endif
  };
  
@@ -1139,7 +1296,7 @@ struct FormatIntType<bits, false, true>
  //////////////////////////////////////////////////////////////////////////
  /// Format1 - Bitfield for single component formats.
  //////////////////////////////////////////////////////////////////////////
-template<uint32_t x>
+template <uint32_t x>
  union Format1
  {
      typedef typename FormatIntType<x>::TYPE TYPE;
@@ -1153,11 +1310,11 @@ union Format1
      {
          TYPE g : x;
      };
-    struct 
+    struct
      {
          TYPE b : x;
      };
-    struct  
+    struct
      {
          TYPE a : x;
      };
@@ -1166,7 +1323,7 @@ union Format1
  //////////////////////////////////////////////////////////////////////////
  /// Format2 - Bitfield for 2 component formats.
  //////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y>
+template <uint32_t x, uint32_t y>
  union Format2
  {
      typedef typename FormatIntType<x + y>::TYPE TYPE;
@@ -1187,7 +1344,7 @@ union Format2
  //////////////////////////////////////////////////////////////////////////
  /// Format3 - Bitfield for 3 component formats.
  //////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y, uint32_t z>
+template <uint32_t x, uint32_t y, uint32_t z>
  union Format3
  {
      typedef typename FormatIntType<x + y + z>::TYPE TYPE;
@@ -1198,13 +1355,13 @@ union Format3
          TYPE g : y;
          TYPE b : z;
      };
-    TYPE a;  ///@note This is here to provide full template needed in Formats.
+    TYPE a; ///@note This is here to provide full template needed in Formats.
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// Format4 - Bitfield for 4 component formats.
  //////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y, uint32_t z, uint32_t w>
+template <uint32_t x, uint32_t y, uint32_t z, uint32_t w>
  struct Format4
  {
      typedef typename FormatIntType<x + y + z + w>::TYPE TYPE;
@@ -1218,12 +1375,12 @@ struct Format4
  //////////////////////////////////////////////////////////////////////////
  /// ComponentTraits - Default components
  //////////////////////////////////////////////////////////////////////////
-template<uint32_t x, uint32_t y, uint32_t z, uint32_t w>
+template <uint32_t x, uint32_t y, uint32_t z, uint32_t w>
  struct Defaults
  {
      INLINE static uint32_t GetDefault(uint32_t comp)
      {
-        static const uint32_t defaults[4]{ x, y, z, w };
+        static const uint32_t defaults[4]{x, y, z, w};
          return defaults[comp];
      }
  };
@@ -1231,25 +1388,31 @@ struct Defaults
  //////////////////////////////////////////////////////////////////////////
  /// ComponentTraits - Component type traits.
  //////////////////////////////////////////////////////////////////////////
-template<SWR_TYPE X, uint32_t NumBitsX, SWR_TYPE Y = SWR_TYPE_UNKNOWN, uint32_t NumBitsY = 0, SWR_TYPE Z = SWR_TYPE_UNKNOWN, uint32_t NumBitsZ = 0, SWR_TYPE W = SWR_TYPE_UNKNOWN, uint32_t NumBitsW = 0>
+template <SWR_TYPE X,
+          uint32_t NumBitsX,
+          SWR_TYPE Y        = SWR_TYPE_UNKNOWN,
+          uint32_t NumBitsY = 0,
+          SWR_TYPE Z        = SWR_TYPE_UNKNOWN,
+          uint32_t NumBitsZ = 0,
+          SWR_TYPE W        = SWR_TYPE_UNKNOWN,
+          uint32_t NumBitsW = 0>
  struct ComponentTraits
  {
      INLINE static SWR_TYPE GetType(uint32_t comp)
      {
-        static const SWR_TYPE CompType[4]{ X, Y, Z, W };
+        static const SWR_TYPE CompType[4]{X, Y, Z, W};
          return CompType[comp];
      }
  
      INLINE static constexpr uint32_t GetConstBPC(uint32_t comp)
      {
-        return (comp == 3) ? NumBitsW :
-            ((comp == 2) ? NumBitsZ :
-                ((comp == 1) ? NumBitsY : NumBitsX) );
+        return (comp == 3) ? NumBitsW
+                           : ((comp == 2) ? NumBitsZ : ((comp == 1) ? NumBitsY : NumBitsX));
      }
  
      INLINE static uint32_t GetBPC(uint32_t comp)
      {
-        static const uint32_t MyBpc[4]{ NumBitsX, NumBitsY, NumBitsZ, NumBitsW };
+        static const uint32_t MyBpc[4]{NumBitsX, NumBitsY, NumBitsZ, NumBitsW};
          return MyBpc[comp];
      }
  
@@ -1285,7 +1448,6 @@ struct ComponentTraits
          }
          SWR_INVALID("Invalid component: %d", comp);
          return TypeTraits<X, NumBitsX>::toFloat();
-
      }
  
      INLINE static float fromFloat(uint32_t comp)
@@ -1322,7 +1484,7 @@ struct ComponentTraits
          return TypeTraits<X, NumBitsX>::loadSOA(pSrc);
      }
  
-    INLINE static void storeSOA(uint32_t comp, uint8_t *pDst, simdscalar const &src)
+    INLINE static void storeSOA(uint32_t comp, uint8_t* pDst, simdscalar const& src)
      {
          switch (comp)
          {
@@ -1342,19 +1504,23 @@ struct ComponentTraits
          SWR_INVALID("Invalid component: %d", comp);
      }
  
-    INLINE static simdscalar unpack(uint32_t comp, simdscalar &in)
+    INLINE static simdscalar unpack(uint32_t comp, simdscalar& in)
      {
          simdscalar out;
          switch (comp)
          {
          case 0:
-            out = TypeTraits<X, NumBitsX>::unpack(in); break;
+            out = TypeTraits<X, NumBitsX>::unpack(in);
+            break;
          case 1:
-            out = TypeTraits<Y, NumBitsY>::unpack(in); break;
+            out = TypeTraits<Y, NumBitsY>::unpack(in);
+            break;
          case 2:
-            out = TypeTraits<Z, NumBitsZ>::unpack(in); break;
+            out = TypeTraits<Z, NumBitsZ>::unpack(in);
+            break;
          case 3:
-            out = TypeTraits<W, NumBitsW>::unpack(in); break;
+            out = TypeTraits<W, NumBitsW>::unpack(in);
+            break;
          default:
              SWR_INVALID("Invalid component: %d", comp);
              out = in;
@@ -1363,19 +1529,23 @@ struct ComponentTraits
          return out;
      }
  
-    INLINE static simdscalar pack(uint32_t comp, simdscalar &in)
+    INLINE static simdscalar pack(uint32_t comp, simdscalar& in)
      {
          simdscalar out;
          switch (comp)
          {
          case 0:
-            out = TypeTraits<X, NumBitsX>::pack(in); break;
+            out = TypeTraits<X, NumBitsX>::pack(in);
+            break;
          case 1:
-            out = TypeTraits<Y, NumBitsY>::pack(in); break;
+            out = TypeTraits<Y, NumBitsY>::pack(in);
+            break;
          case 2:
-            out = TypeTraits<Z, NumBitsZ>::pack(in); break;
+            out = TypeTraits<Z, NumBitsZ>::pack(in);
+            break;
          case 3:
-            out = TypeTraits<W, NumBitsW>::pack(in); break;
+            out = TypeTraits<W, NumBitsW>::pack(in);
+            break;
          default:
              SWR_INVALID("Invalid component: %d", comp);
              out = in;
@@ -1384,7 +1554,7 @@ struct ComponentTraits
          return out;
      }
  
-    INLINE static simdscalar convertSrgb(uint32_t comp, simdscalar &in)
+    INLINE static simdscalar convertSrgb(uint32_t comp, simdscalar& in)
      {
          switch (comp)
          {
@@ -1419,7 +1589,7 @@ struct ComponentTraits
          return TypeTraits<X, NumBitsX>::loadSOA_16(pSrc);
      }
  
-    INLINE static void SIMDCALL storeSOA(uint32_t comp, uint8_t *pDst, simd16scalar const &src)
+    INLINE static void SIMDCALL storeSOA(uint32_t comp, uint8_t* pDst, simd16scalar const& src)
      {
          switch (comp)
          {
@@ -1440,7 +1610,7 @@ struct ComponentTraits
          TypeTraits<X, NumBitsX>::storeSOA(pDst, src);
      }
  
-    INLINE static simd16scalar unpack(uint32_t comp, simd16scalar &in)
+    INLINE static simd16scalar unpack(uint32_t comp, simd16scalar& in)
      {
          switch (comp)
          {
@@ -1457,7 +1627,7 @@ struct ComponentTraits
          return TypeTraits<X, NumBitsX>::unpack(in);
      }
  
-    INLINE static simd16scalar pack(uint32_t comp, simd16scalar &in)
+    INLINE static simd16scalar pack(uint32_t comp, simd16scalar& in)
      {
          switch (comp)
          {
@@ -1474,7 +1644,7 @@ struct ComponentTraits
          return TypeTraits<X, NumBitsX>::pack(in);
      }
  
-    INLINE static simd16scalar convertSrgb(uint32_t comp, simd16scalar &in)
+    INLINE static simd16scalar convertSrgb(uint32_t comp, simd16scalar& in)
      {
          switch (comp)
          {
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_utils.h b/src/gallium/drivers/swr/rasterizer/core/format_utils.h

index 576f14bcafda987f0d8c78bf1d43a6d5a805f29f..b51755dab50bf589c7cd12c14304a738052bb5f3 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/format_utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/format_utils.h
@@ -1,37 +1,37 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file utils.h
-*
-* @brief Utilities used by SWR core related to pixel formats.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file utils.h
+ *
+ * @brief Utilities used by SWR core related to pixel formats.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "core/utils.h"
  #include "common/simdintrin.h"
  
  INLINE
-void vTranspose(simd4scalar &row0, simd4scalar &row1, simd4scalar &row2, simd4scalar &row3)
+void vTranspose(simd4scalar& row0, simd4scalar& row1, simd4scalar& row2, simd4scalar& row3)
  {
      simd4scalari row0i = SIMD128::castps_si(row0);
      simd4scalari row1i = SIMD128::castps_si(row1);
@@ -39,8 +39,8 @@ void vTranspose(simd4scalar &row0, simd4scalar &row1, simd4scalar &row2, simd4sc
      simd4scalari row3i = SIMD128::castps_si(row3);
  
      simd4scalari vTemp = row2i;
-    row2i = SIMD128::unpacklo_epi32(row2i, row3i);
-    vTemp = SIMD128::unpackhi_epi32(vTemp, row3i);
+    row2i              = SIMD128::unpacklo_epi32(row2i, row3i);
+    vTemp              = SIMD128::unpackhi_epi32(vTemp, row3i);
  
      row3i = row0i;
      row0i = SIMD128::unpacklo_epi32(row0i, row1i);
@@ -61,11 +61,11 @@ void vTranspose(simd4scalar &row0, simd4scalar &row1, simd4scalar &row2, simd4sc
  }
  
  INLINE
-void vTranspose(simd4scalari &row0, simd4scalari &row1, simd4scalari &row2, simd4scalari &row3)
+void vTranspose(simd4scalari& row0, simd4scalari& row1, simd4scalari& row2, simd4scalari& row3)
  {
      simd4scalari vTemp = row2;
-    row2 = SIMD128::unpacklo_epi32(row2, row3);
-    vTemp = SIMD128::unpackhi_epi32(vTemp, row3);
+    row2               = SIMD128::unpacklo_epi32(row2, row3);
+    vTemp              = SIMD128::unpackhi_epi32(vTemp, row3);
  
      row3 = row0;
      row0 = SIMD128::unpacklo_epi32(row0, row1);
@@ -82,17 +82,20 @@ void vTranspose(simd4scalari &row0, simd4scalari &row1, simd4scalari &row2, simd
  
  #if KNOB_SIMD_WIDTH == 8
  INLINE
-void vTranspose3x8(simd4scalar (&vDst)[8], const simdscalar &vSrc0, const simdscalar &vSrc1, const simdscalar &vSrc2)
+void vTranspose3x8(simd4scalar (&vDst)[8],
+                   const simdscalar& vSrc0,
+                   const simdscalar& vSrc1,
+                   const simdscalar& vSrc2)
  {
-    simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2);                  //x0z0x1z1 x4z4x5z5
-    simdscalar r1rx = _simd_unpacklo_ps(vSrc1, _simd_setzero_ps());     //y0w0y1w1 y4w4y5w5
-    simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx);              //x0y0z0w0 x4y4z4w4
-    simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx);              //x1y1z1w1 x5y5z5w5
+    simdscalar r0r2       = _simd_unpacklo_ps(vSrc0, vSrc2);              // x0z0x1z1 x4z4x5z5
+    simdscalar r1rx       = _simd_unpacklo_ps(vSrc1, _simd_setzero_ps()); // y0w0y1w1 y4w4y5w5
+    simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx);                // x0y0z0w0 x4y4z4w4
+    simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx);                // x1y1z1w1 x5y5z5w5
  
-    r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2);                             //x2z2x3z3 x6z6x7z7
-    r1rx = _simd_unpackhi_ps(vSrc1, _simd_setzero_ps());                //y2w2y3w3 y6w6yw77
-    simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx);              //x2y2z2w2 x6y6z6w6
-    simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx);              //x3y3z3w3 x7y7z7w7
+    r0r2                  = _simd_unpackhi_ps(vSrc0, vSrc2);              // x2z2x3z3 x6z6x7z7
+    r1rx                  = _simd_unpackhi_ps(vSrc1, _simd_setzero_ps()); // y2w2y3w3 y6w6yw77
+    simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx);                // x2y2z2w2 x6y6z6w6
+    simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx);                // x3y3z3w3 x7y7z7w7
  
      vDst[0] = _simd_extractf128_ps(r02r1xlolo, 0);
      vDst[1] = _simd_extractf128_ps(r02r1xlohi, 0);
@@ -106,17 +109,21 @@ void vTranspose3x8(simd4scalar (&vDst)[8], const simdscalar &vSrc0, const simdsc
  }
  
  INLINE
-void vTranspose4x8(simd4scalar (&vDst)[8], const simdscalar &vSrc0, const simdscalar &vSrc1, const simdscalar &vSrc2, const simdscalar &vSrc3)
+void vTranspose4x8(simd4scalar (&vDst)[8],
+                   const simdscalar& vSrc0,
+                   const simdscalar& vSrc1,
+                   const simdscalar& vSrc2,
+                   const simdscalar& vSrc3)
  {
-    simdscalar r0r2 = _simd_unpacklo_ps(vSrc0, vSrc2);      //x0z0x1z1 x4z4x5z5
-    simdscalar r1rx = _simd_unpacklo_ps(vSrc1, vSrc3);      //y0w0y1w1 y4w4y5w5
-    simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx);  //x0y0z0w0 x4y4z4w4
-    simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx);  //x1y1z1w1 x5y5z5w5
+    simdscalar r0r2       = _simd_unpacklo_ps(vSrc0, vSrc2); // x0z0x1z1 x4z4x5z5
+    simdscalar r1rx       = _simd_unpacklo_ps(vSrc1, vSrc3); // y0w0y1w1 y4w4y5w5
+    simdscalar r02r1xlolo = _simd_unpacklo_ps(r0r2, r1rx);   // x0y0z0w0 x4y4z4w4
+    simdscalar r02r1xlohi = _simd_unpackhi_ps(r0r2, r1rx);   // x1y1z1w1 x5y5z5w5
  
-    r0r2 = _simd_unpackhi_ps(vSrc0, vSrc2);                 //x2z2x3z3 x6z6x7z7
-    r1rx = _simd_unpackhi_ps(vSrc1, vSrc3);                 //y2w2y3w3 y6w6yw77
-    simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx);  //x2y2z2w2 x6y6z6w6
-    simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx);  //x3y3z3w3 x7y7z7w7
+    r0r2                  = _simd_unpackhi_ps(vSrc0, vSrc2); // x2z2x3z3 x6z6x7z7
+    r1rx                  = _simd_unpackhi_ps(vSrc1, vSrc3); // y2w2y3w3 y6w6yw77
+    simdscalar r02r1xhilo = _simd_unpacklo_ps(r0r2, r1rx);   // x2y2z2w2 x6y6z6w6
+    simdscalar r02r1xhihi = _simd_unpackhi_ps(r0r2, r1rx);   // x3y3z3w3 x7y7z7w7
  
      vDst[0] = _simd_extractf128_ps(r02r1xlolo, 0);
      vDst[1] = _simd_extractf128_ps(r02r1xlohi, 0);
@@ -131,9 +138,29 @@ void vTranspose4x8(simd4scalar (&vDst)[8], const simdscalar &vSrc0, const simdsc
  
  #if ENABLE_AVX512_SIMD16
  INLINE
-void vTranspose4x16(simd16scalar(&dst)[4], const simd16scalar &src0, const simd16scalar &src1, const simd16scalar &src2, const simd16scalar &src3)
+void vTranspose4x16(simd16scalar (&dst)[4],
+                    const simd16scalar& src0,
+                    const simd16scalar& src1,
+                    const simd16scalar& src2,
+                    const simd16scalar& src3)
  {
-    const simd16scalari perm = _simd16_set_epi32(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0); // pre-permute input to setup the right order after all the unpacking
+    const simd16scalari perm =
+        _simd16_set_epi32(15,
+                          11,
+                          7,
+                          3,
+                          14,
+                          10,
+                          6,
+                          2,
+                          13,
+                          9,
+                          5,
+                          1,
+                          12,
+                          8,
+                          4,
+                          0); // pre-permute input to setup the right order after all the unpacking
  
      simd16scalar pre0 = _simd16_permute_ps(src0, perm); // r
      simd16scalar pre1 = _simd16_permute_ps(src1, perm); // g
@@ -153,46 +180,69 @@ void vTranspose4x16(simd16scalar(&dst)[4], const simd16scalar &src0, const simd1
  
  #endif
  INLINE
-void vTranspose8x8(simdscalar (&vDst)[8], const simdscalar &vMask0, const simdscalar &vMask1, const simdscalar &vMask2, const simdscalar &vMask3, const simdscalar &vMask4, const simdscalar &vMask5, const simdscalar &vMask6, const simdscalar &vMask7)
+void vTranspose8x8(simdscalar (&vDst)[8],
+                   const simdscalar& vMask0,
+                   const simdscalar& vMask1,
+                   const simdscalar& vMask2,
+                   const simdscalar& vMask3,
+                   const simdscalar& vMask4,
+                   const simdscalar& vMask5,
+                   const simdscalar& vMask6,
+                   const simdscalar& vMask7)
  {
-    simdscalar __t0 = _simd_unpacklo_ps(vMask0, vMask1);
-    simdscalar __t1 = _simd_unpackhi_ps(vMask0, vMask1);
-    simdscalar __t2 = _simd_unpacklo_ps(vMask2, vMask3);
-    simdscalar __t3 = _simd_unpackhi_ps(vMask2, vMask3);
-    simdscalar __t4 = _simd_unpacklo_ps(vMask4, vMask5);
-    simdscalar __t5 = _simd_unpackhi_ps(vMask4, vMask5);
-    simdscalar __t6 = _simd_unpacklo_ps(vMask6, vMask7);
-    simdscalar __t7 = _simd_unpackhi_ps(vMask6, vMask7);
-    simdscalar __tt0 = _simd_shuffle_ps(__t0,__t2,_MM_SHUFFLE(1,0,1,0));
-    simdscalar __tt1 = _simd_shuffle_ps(__t0,__t2,_MM_SHUFFLE(3,2,3,2));
-    simdscalar __tt2 = _simd_shuffle_ps(__t1,__t3,_MM_SHUFFLE(1,0,1,0));
-    simdscalar __tt3 = _simd_shuffle_ps(__t1,__t3,_MM_SHUFFLE(3,2,3,2));
-    simdscalar __tt4 = _simd_shuffle_ps(__t4,__t6,_MM_SHUFFLE(1,0,1,0));
-    simdscalar __tt5 = _simd_shuffle_ps(__t4,__t6,_MM_SHUFFLE(3,2,3,2));
-    simdscalar __tt6 = _simd_shuffle_ps(__t5,__t7,_MM_SHUFFLE(1,0,1,0));
-    simdscalar __tt7 = _simd_shuffle_ps(__t5,__t7,_MM_SHUFFLE(3,2,3,2));
-    vDst[0] = _simd_permute2f128_ps(__tt0, __tt4, 0x20);
-    vDst[1] = _simd_permute2f128_ps(__tt1, __tt5, 0x20);
-    vDst[2] = _simd_permute2f128_ps(__tt2, __tt6, 0x20);
-    vDst[3] = _simd_permute2f128_ps(__tt3, __tt7, 0x20);
-    vDst[4] = _simd_permute2f128_ps(__tt0, __tt4, 0x31);
-    vDst[5] = _simd_permute2f128_ps(__tt1, __tt5, 0x31);
-    vDst[6] = _simd_permute2f128_ps(__tt2, __tt6, 0x31);
-    vDst[7] = _simd_permute2f128_ps(__tt3, __tt7, 0x31);
+    simdscalar __t0  = _simd_unpacklo_ps(vMask0, vMask1);
+    simdscalar __t1  = _simd_unpackhi_ps(vMask0, vMask1);
+    simdscalar __t2  = _simd_unpacklo_ps(vMask2, vMask3);
+    simdscalar __t3  = _simd_unpackhi_ps(vMask2, vMask3);
+    simdscalar __t4  = _simd_unpacklo_ps(vMask4, vMask5);
+    simdscalar __t5  = _simd_unpackhi_ps(vMask4, vMask5);
+    simdscalar __t6  = _simd_unpacklo_ps(vMask6, vMask7);
+    simdscalar __t7  = _simd_unpackhi_ps(vMask6, vMask7);
+    simdscalar __tt0 = _simd_shuffle_ps(__t0, __t2, _MM_SHUFFLE(1, 0, 1, 0));
+    simdscalar __tt1 = _simd_shuffle_ps(__t0, __t2, _MM_SHUFFLE(3, 2, 3, 2));
+    simdscalar __tt2 = _simd_shuffle_ps(__t1, __t3, _MM_SHUFFLE(1, 0, 1, 0));
+    simdscalar __tt3 = _simd_shuffle_ps(__t1, __t3, _MM_SHUFFLE(3, 2, 3, 2));
+    simdscalar __tt4 = _simd_shuffle_ps(__t4, __t6, _MM_SHUFFLE(1, 0, 1, 0));
+    simdscalar __tt5 = _simd_shuffle_ps(__t4, __t6, _MM_SHUFFLE(3, 2, 3, 2));
+    simdscalar __tt6 = _simd_shuffle_ps(__t5, __t7, _MM_SHUFFLE(1, 0, 1, 0));
+    simdscalar __tt7 = _simd_shuffle_ps(__t5, __t7, _MM_SHUFFLE(3, 2, 3, 2));
+    vDst[0]          = _simd_permute2f128_ps(__tt0, __tt4, 0x20);
+    vDst[1]          = _simd_permute2f128_ps(__tt1, __tt5, 0x20);
+    vDst[2]          = _simd_permute2f128_ps(__tt2, __tt6, 0x20);
+    vDst[3]          = _simd_permute2f128_ps(__tt3, __tt7, 0x20);
+    vDst[4]          = _simd_permute2f128_ps(__tt0, __tt4, 0x31);
+    vDst[5]          = _simd_permute2f128_ps(__tt1, __tt5, 0x31);
+    vDst[6]          = _simd_permute2f128_ps(__tt2, __tt6, 0x31);
+    vDst[7]          = _simd_permute2f128_ps(__tt3, __tt7, 0x31);
  }
  
  INLINE
-void vTranspose8x8(simdscalar (&vDst)[8], const simdscalari &vMask0, const simdscalari &vMask1, const simdscalari &vMask2, const simdscalari &vMask3, const simdscalari &vMask4, const simdscalari &vMask5, const simdscalari &vMask6, const simdscalari &vMask7)
+void vTranspose8x8(simdscalar (&vDst)[8],
+                   const simdscalari& vMask0,
+                   const simdscalari& vMask1,
+                   const simdscalari& vMask2,
+                   const simdscalari& vMask3,
+                   const simdscalari& vMask4,
+                   const simdscalari& vMask5,
+                   const simdscalari& vMask6,
+                   const simdscalari& vMask7)
  {
-    vTranspose8x8(vDst, _simd_castsi_ps(vMask0), _simd_castsi_ps(vMask1), _simd_castsi_ps(vMask2), _simd_castsi_ps(vMask3), 
-        _simd_castsi_ps(vMask4), _simd_castsi_ps(vMask5), _simd_castsi_ps(vMask6), _simd_castsi_ps(vMask7));
+    vTranspose8x8(vDst,
+                  _simd_castsi_ps(vMask0),
+                  _simd_castsi_ps(vMask1),
+                  _simd_castsi_ps(vMask2),
+                  _simd_castsi_ps(vMask3),
+                  _simd_castsi_ps(vMask4),
+                  _simd_castsi_ps(vMask5),
+                  _simd_castsi_ps(vMask6),
+                  _simd_castsi_ps(vMask7));
  }
  #endif
  
  //////////////////////////////////////////////////////////////////////////
  /// TranposeSingleComponent
  //////////////////////////////////////////////////////////////////////////
-template<uint32_t bpp>
+template <uint32_t bpp>
  struct TransposeSingleComponent
  {
      //////////////////////////////////////////////////////////////////////////
@@ -227,23 +277,38 @@ struct Transpose8_8_8_8
  
  #if KNOB_SIMD_WIDTH == 8
  #if KNOB_ARCH <= KNOB_ARCH_AVX
-        simd4scalari c0c1 = src.v4[0];                                                          // rrrrrrrrgggggggg
-        simd4scalari c2c3 = SIMD128::castps_si(_simd_extractf128_ps(_simd_castsi_ps(src), 1));  // bbbbbbbbaaaaaaaa
-        simd4scalari c0c2 = SIMD128::unpacklo_epi64(c0c1, c2c3);                                        // rrrrrrrrbbbbbbbb
-        simd4scalari c1c3 = SIMD128::unpackhi_epi64(c0c1, c2c3);                                        // ggggggggaaaaaaaa
-        simd4scalari c01 = SIMD128::unpacklo_epi8(c0c2, c1c3);                                          // rgrgrgrgrgrgrgrg
-        simd4scalari c23 = SIMD128::unpackhi_epi8(c0c2, c1c3);                                          // babababababababa
-        simd4scalari c0123lo = SIMD128::unpacklo_epi16(c01, c23);                                       // rgbargbargbargba
-        simd4scalari c0123hi = SIMD128::unpackhi_epi16(c01, c23);                                       // rgbargbargbargba
+        simd4scalari c0c1 = src.v4[0]; // rrrrrrrrgggggggg
+        simd4scalari c2c3 =
+            SIMD128::castps_si(_simd_extractf128_ps(_simd_castsi_ps(src), 1)); // bbbbbbbbaaaaaaaa
+        simd4scalari c0c2    = SIMD128::unpacklo_epi64(c0c1, c2c3);            // rrrrrrrrbbbbbbbb
+        simd4scalari c1c3    = SIMD128::unpackhi_epi64(c0c1, c2c3);            // ggggggggaaaaaaaa
+        simd4scalari c01     = SIMD128::unpacklo_epi8(c0c2, c1c3);             // rgrgrgrgrgrgrgrg
+        simd4scalari c23     = SIMD128::unpackhi_epi8(c0c2, c1c3);             // babababababababa
+        simd4scalari c0123lo = SIMD128::unpacklo_epi16(c01, c23);              // rgbargbargbargba
+        simd4scalari c0123hi = SIMD128::unpackhi_epi16(c01, c23);              // rgbargbargbargba
          SIMD128::store_si((simd4scalari*)pDst, c0123lo);
          SIMD128::store_si((simd4scalari*)(pDst + 16), c0123hi);
  #else
          simdscalari dst01 = _simd_shuffle_epi8(src,
-            _simd_set_epi32(0x0f078080, 0x0e068080, 0x0d058080, 0x0c048080, 0x80800b03, 0x80800a02, 0x80800901, 0x80800800));
+                                               _simd_set_epi32(0x0f078080,
+                                                               0x0e068080,
+                                                               0x0d058080,
+                                                               0x0c048080,
+                                                               0x80800b03,
+                                                               0x80800a02,
+                                                               0x80800901,
+                                                               0x80800800));
          simdscalari dst23 = _mm256_permute2x128_si256(src, src, 0x01);
-        dst23 = _simd_shuffle_epi8(dst23,
-            _simd_set_epi32(0x80800f07, 0x80800e06, 0x80800d05, 0x80800c04, 0x0b038080, 0x0a028080, 0x09018080, 0x08008080));
-        simdscalari dst = _simd_or_si(dst01, dst23);
+        dst23             = _simd_shuffle_epi8(dst23,
+                                   _simd_set_epi32(0x80800f07,
+                                                   0x80800e06,
+                                                   0x80800d05,
+                                                   0x80800c04,
+                                                   0x0b038080,
+                                                   0x0a028080,
+                                                   0x09018080,
+                                                   0x08008080));
+        simdscalari dst   = _simd_or_si(dst01, dst23);
          _simd_store_si((simdscalari*)pDst, dst);
  #endif
  #else
@@ -254,23 +319,28 @@ struct Transpose8_8_8_8
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simd4scalari src0 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc));     // rrrrrrrrrrrrrrrr
-        simd4scalari src1 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 1); // gggggggggggggggg
-        simd4scalari src2 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 2); // bbbbbbbbbbbbbbbb
-        simd4scalari src3 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 3); // aaaaaaaaaaaaaaaa
+        simd4scalari src0 =
+            SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+        simd4scalari src1 =
+            SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 1); // gggggggggggggggg
+        simd4scalari src2 =
+            SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
+        simd4scalari src3 =
+            SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 3); // aaaaaaaaaaaaaaaa
  
          simd16scalari cvt0 = _simd16_cvtepu8_epi32(src0);
          simd16scalari cvt1 = _simd16_cvtepu8_epi32(src1);
          simd16scalari cvt2 = _simd16_cvtepu8_epi32(src2);
          simd16scalari cvt3 = _simd16_cvtepu8_epi32(src3);
  
-        simd16scalari shl1 = _simd16_slli_epi32(cvt1,  8);
+        simd16scalari shl1 = _simd16_slli_epi32(cvt1, 8);
          simd16scalari shl2 = _simd16_slli_epi32(cvt2, 16);
          simd16scalari shl3 = _simd16_slli_epi32(cvt3, 24);
  
          simd16scalari dst = _simd16_or_si(_simd16_or_si(cvt0, shl1), _simd16_or_si(shl2, shl3));
  
-        _simd16_store_si(reinterpret_cast<simd16scalari *>(pDst), dst);             // rgbargbargbargbargbargbargbargbargbargbargbargbargbargbargbargba
+        _simd16_store_si(reinterpret_cast<simd16scalari*>(pDst),
+                         dst); // rgbargbargbargbargbargbargbargbargbargbargbargbargbargbargbargba
      }
  #endif
  };
@@ -305,9 +375,9 @@ struct Transpose8_8
  #if KNOB_SIMD_WIDTH == 8
          simdscalari src = _simd_load_si((const simdscalari*)pSrc);
  
-        simd4scalari rg = src.v4[0];           // rrrrrrrr gggggggg
-        simd4scalari g = SIMD128::unpackhi_epi64(rg, rg);             // gggggggg gggggggg
-        rg = SIMD128::unpacklo_epi8(rg, g);
+        simd4scalari rg = src.v4[0];                       // rrrrrrrr gggggggg
+        simd4scalari g  = SIMD128::unpackhi_epi64(rg, rg); // gggggggg gggggggg
+        rg              = SIMD128::unpacklo_epi8(rg, g);
          SIMD128::store_si((simd4scalari*)pDst, rg);
  #else
  #error Unsupported vector width
@@ -317,8 +387,10 @@ struct Transpose8_8
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simd4scalari src0 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc));     // rrrrrrrrrrrrrrrr
-        simd4scalari src1 = SIMD128::load_si(reinterpret_cast<const simd4scalari *>(pSrc) + 1); // gggggggggggggggg
+        simd4scalari src0 =
+            SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+        simd4scalari src1 =
+            SIMD128::load_si(reinterpret_cast<const simd4scalari*>(pSrc) + 1); // gggggggggggggggg
  
          simdscalari cvt0 = _simd_cvtepu8_epi16(src0);
          simdscalari cvt1 = _simd_cvtepu8_epi16(src1);
@@ -327,7 +399,8 @@ struct Transpose8_8
  
          simdscalari dst = _simd_or_si(cvt0, shl1);
  
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst), dst);                 // rgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrg
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst),
+                       dst); // rgrgrgrgrgrgrgrgrgrgrgrgrgrgrgrg
      }
  #endif
  };
@@ -352,13 +425,13 @@ struct Transpose32_32_32_32
          simd4scalar vDst[8];
          vTranspose4x8(vDst, src0, src1, src2, src3);
          SIMD128::store_ps((float*)pDst, vDst[0]);
-        SIMD128::store_ps((float*)pDst+4, vDst[1]);
-        SIMD128::store_ps((float*)pDst+8, vDst[2]);
-        SIMD128::store_ps((float*)pDst+12, vDst[3]);
-        SIMD128::store_ps((float*)pDst+16, vDst[4]);
-        SIMD128::store_ps((float*)pDst+20, vDst[5]);
-        SIMD128::store_ps((float*)pDst+24, vDst[6]);
-        SIMD128::store_ps((float*)pDst+28, vDst[7]);
+        SIMD128::store_ps((float*)pDst + 4, vDst[1]);
+        SIMD128::store_ps((float*)pDst + 8, vDst[2]);
+        SIMD128::store_ps((float*)pDst + 12, vDst[3]);
+        SIMD128::store_ps((float*)pDst + 16, vDst[4]);
+        SIMD128::store_ps((float*)pDst + 20, vDst[5]);
+        SIMD128::store_ps((float*)pDst + 24, vDst[6]);
+        SIMD128::store_ps((float*)pDst + 28, vDst[7]);
  #else
  #error Unsupported vector width
  #endif
@@ -367,19 +440,19 @@ struct Transpose32_32_32_32
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc));
-        simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 16);
-        simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 32);
-        simd16scalar src3 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 48);
+        simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
+        simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16);
+        simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 32);
+        simd16scalar src3 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 48);
  
          simd16scalar dst[4];
  
          vTranspose4x16(dst, src0, src1, src2, src3);
  
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) +  0, dst[0]);
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) + 16, dst[1]);
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) + 32, dst[2]);
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) + 48, dst[3]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst[0]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst[1]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 32, dst[2]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 48, dst[3]);
      }
  #endif
  };
@@ -418,19 +491,19 @@ struct Transpose32_32_32
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc));
-        simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 16);
-        simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 32);
+        simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc));
+        simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16);
+        simd16scalar src2 = _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 32);
          simd16scalar src3 = _simd16_setzero_ps();
  
          simd16scalar dst[4];
  
          vTranspose4x16(dst, src0, src1, src2, src3);
  
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) +  0, dst[0]);
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) + 16, dst[1]);
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) + 32, dst[2]);
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) + 48, dst[3]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst[0]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst[1]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 32, dst[2]);
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 48, dst[3]);
      }
  #endif
  };
@@ -447,11 +520,11 @@ struct Transpose32_32
      INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
      {
  #if KNOB_SIMD_WIDTH == 8
-        const float* pfSrc = (const float*)pSrc;
-        simd4scalar src_r0 = SIMD128::load_ps(pfSrc + 0);
-        simd4scalar src_r1 = SIMD128::load_ps(pfSrc + 4);
-        simd4scalar src_g0 = SIMD128::load_ps(pfSrc + 8);
-        simd4scalar src_g1 = SIMD128::load_ps(pfSrc + 12);
+        const float* pfSrc  = (const float*)pSrc;
+        simd4scalar  src_r0 = SIMD128::load_ps(pfSrc + 0);
+        simd4scalar  src_r1 = SIMD128::load_ps(pfSrc + 4);
+        simd4scalar  src_g0 = SIMD128::load_ps(pfSrc + 8);
+        simd4scalar  src_g1 = SIMD128::load_ps(pfSrc + 12);
  
          simd4scalar dst0 = SIMD128::unpacklo_ps(src_r0, src_g0);
          simd4scalar dst1 = SIMD128::unpackhi_ps(src_r0, src_g0);
@@ -471,20 +544,36 @@ struct Transpose32_32
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simd16scalar src0 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc));                 // rrrrrrrrrrrrrrrr
-        simd16scalar src1 = _simd16_load_ps(reinterpret_cast<const float *>(pSrc) + 16);            // gggggggggggggggg
-
-        simd16scalar tmp0 = _simd16_unpacklo_ps(src0, src1);                                        // r0 g0 r1 g1 r4 g4 r5 g5 r8 g8 r9 g9 rC gC rD gD
-        simd16scalar tmp1 = _simd16_unpackhi_ps(src0, src1);                                        // r2 g2 r3 g3 r6 g6 r7 g7 rA gA rB gB rE gE rF gF
-
-        simd16scalar per0 = _simd16_permute2f128_ps(tmp0, tmp1, 0x44);  // (1, 0, 1, 0)             // r0 g0 r1 g1 r4 g4 r5 g5 r2 g2 r3 g3 r6 g6 r7 g7
-        simd16scalar per1 = _simd16_permute2f128_ps(tmp0, tmp1, 0xEE);  // (3, 2, 3, 2)             // r8 g8 r9 g9 rC gC rD gD rA gA rB gB rE gE rF gF
-
-        simd16scalar dst0 = _simd16_permute2f128_ps(per0, per0, 0xD8);  // (3, 1, 2, 0)             // r0 g0 r1 g1 r2 g2 r3 g3 r4 g4 r5 g5 r6 g6 r7 g7
-        simd16scalar dst1 = _simd16_permute2f128_ps(per1, per1, 0xD8);  // (3, 1, 2, 0)             // r8 g8 r9 g9 rA gA rB gB rC gC rD gD rE gE rF gF
-
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) +  0, dst0);                               // rgrgrgrgrgrgrgrg
-        _simd16_store_ps(reinterpret_cast<float *>(pDst) + 16, dst1);                               // rgrgrgrgrgrgrgrg
+        simd16scalar src0 =
+            _simd16_load_ps(reinterpret_cast<const float*>(pSrc)); // rrrrrrrrrrrrrrrr
+        simd16scalar src1 =
+            _simd16_load_ps(reinterpret_cast<const float*>(pSrc) + 16); // gggggggggggggggg
+
+        simd16scalar tmp0 =
+            _simd16_unpacklo_ps(src0, src1); // r0 g0 r1 g1 r4 g4 r5 g5 r8 g8 r9 g9 rC gC rD gD
+        simd16scalar tmp1 =
+            _simd16_unpackhi_ps(src0, src1); // r2 g2 r3 g3 r6 g6 r7 g7 rA gA rB gB rE gE rF gF
+
+        simd16scalar per0 = _simd16_permute2f128_ps(
+            tmp0,
+            tmp1,
+            0x44); // (1, 0, 1, 0)             // r0 g0 r1 g1 r4 g4 r5 g5 r2 g2 r3 g3 r6 g6 r7 g7
+        simd16scalar per1 = _simd16_permute2f128_ps(
+            tmp0,
+            tmp1,
+            0xEE); // (3, 2, 3, 2)             // r8 g8 r9 g9 rC gC rD gD rA gA rB gB rE gE rF gF
+
+        simd16scalar dst0 = _simd16_permute2f128_ps(
+            per0,
+            per0,
+            0xD8); // (3, 1, 2, 0)             // r0 g0 r1 g1 r2 g2 r3 g3 r4 g4 r5 g5 r6 g6 r7 g7
+        simd16scalar dst1 = _simd16_permute2f128_ps(
+            per1,
+            per1,
+            0xD8); // (3, 1, 2, 0)             // r8 g8 r9 g9 rA gA rB gB rC gC rD gD rE gE rF gF
+
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 0, dst0);  // rgrgrgrgrgrgrgrg
+        _simd16_store_ps(reinterpret_cast<float*>(pDst) + 16, dst1); // rgrgrgrgrgrgrgrg
      }
  #endif
  };
@@ -531,30 +620,38 @@ struct Transpose16_16_16_16
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc));              // rrrrrrrrrrrrrrrr
-        simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 1);          // gggggggggggggggg
-        simdscalari src2 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 2);          // bbbbbbbbbbbbbbbb
-        simdscalari src3 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 3);          // aaaaaaaaaaaaaaaa
-
-        simdscalari pre0 = _simd_unpacklo_epi16(src0, src1);                                        // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
-        simdscalari pre1 = _simd_unpackhi_epi16(src0, src1);                                        // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
-        simdscalari pre2 = _simd_unpacklo_epi16(src2, src3);                                        // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
-        simdscalari pre3 = _simd_unpackhi_epi16(src2, src3);                                        // ba4 ba5 ba6 ba7 baC baD baE baF
-
-        simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2);                                        // rbga0 rbga1 rbga8 rbga9
-        simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2);                                        // rbga2 rbga3 rbgaA rbgaB
-        simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3);                                        // rbga4 rbga5 rgbaC rbgaD
-        simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3);                                        // rbga6 rbga7 rbgaE rbgaF
-
-        simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0)                       // rbga0 rbga1 rbga2 rbga3
-        simdscalari dst1 = _simd_permute2f128_si(tmp2, tmp3, 0x20); // (2, 0)                       // rbga4 rbga5 rbga6 rbga7
-        simdscalari dst2 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1)                       // rbga8 rbga9 rbgaA rbgaB
-        simdscalari dst3 = _simd_permute2f128_si(tmp2, tmp3, 0x31); // (3, 1)                       // rbgaC rbgaD rbgaE rbgaF
-
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 0, dst0);                            // rgbargbargbargba
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 1, dst1);                            // rgbargbargbargba
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 2, dst2);                            // rgbargbargbargba
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 3, dst3);                            // rgbargbargbargba
+        simdscalari src0 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+        simdscalari src1 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
+        simdscalari src2 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
+        simdscalari src3 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 3); // aaaaaaaaaaaaaaaa
+
+        simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
+        simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
+        simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
+        simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
+
+        simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
+        simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
+        simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
+        simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
+
+        simdscalari dst0 = _simd_permute2f128_si(
+            tmp0, tmp1, 0x20); // (2, 0)                       // rbga0 rbga1 rbga2 rbga3
+        simdscalari dst1 = _simd_permute2f128_si(
+            tmp2, tmp3, 0x20); // (2, 0)                       // rbga4 rbga5 rbga6 rbga7
+        simdscalari dst2 = _simd_permute2f128_si(
+            tmp0, tmp1, 0x31); // (3, 1)                       // rbga8 rbga9 rbgaA rbgaB
+        simdscalari dst3 = _simd_permute2f128_si(
+            tmp2, tmp3, 0x31); // (3, 1)                       // rbgaC rbgaD rbgaE rbgaF
+
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgbargbargbargba
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgbargbargbargba
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 2, dst2); // rgbargbargbargba
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 3, dst3); // rgbargbargbargba
      }
  #endif
  };
@@ -600,30 +697,37 @@ struct Transpose16_16_16
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc));              // rrrrrrrrrrrrrrrr
-        simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 1);          // gggggggggggggggg
-        simdscalari src2 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 2);          // bbbbbbbbbbbbbbbb
-        simdscalari src3 = _simd_setzero_si();                                                      // aaaaaaaaaaaaaaaa
-
-        simdscalari pre0 = _simd_unpacklo_epi16(src0, src1);                                        // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
-        simdscalari pre1 = _simd_unpackhi_epi16(src0, src1);                                        // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
-        simdscalari pre2 = _simd_unpacklo_epi16(src2, src3);                                        // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
-        simdscalari pre3 = _simd_unpackhi_epi16(src2, src3);                                        // ba4 ba5 ba6 ba7 baC baD baE baF
-
-        simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2);                                        // rbga0 rbga1 rbga8 rbga9
-        simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2);                                        // rbga2 rbga3 rbgaA rbgaB
-        simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3);                                        // rbga4 rbga5 rgbaC rbgaD
-        simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3);                                        // rbga6 rbga7 rbgaE rbgaF
-
-        simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20); // (2, 0)                       // rbga0 rbga1 rbga2 rbga3
-        simdscalari dst1 = _simd_permute2f128_si(tmp2, tmp3, 0x20); // (2, 0)                       // rbga4 rbga5 rbga6 rbga7
-        simdscalari dst2 = _simd_permute2f128_si(tmp0, tmp1, 0x31); // (3, 1)                       // rbga8 rbga9 rbgaA rbgaB
-        simdscalari dst3 = _simd_permute2f128_si(tmp2, tmp3, 0x31); // (3, 1)                       // rbgaC rbgaD rbgaE rbgaF
-
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 0, dst0);                            // rgbargbargbargba
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 1, dst1);                            // rgbargbargbargba
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 2, dst2);                            // rgbargbargbargba
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 3, dst3);                            // rgbargbargbargba
+        simdscalari src0 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+        simdscalari src1 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
+        simdscalari src2 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 2); // bbbbbbbbbbbbbbbb
+        simdscalari src3 = _simd_setzero_si();                             // aaaaaaaaaaaaaaaa
+
+        simdscalari pre0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
+        simdscalari pre1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
+        simdscalari pre2 = _simd_unpacklo_epi16(src2, src3); // ba0 ba1 ba3 ba3 ba8 ba9 baA baB
+        simdscalari pre3 = _simd_unpackhi_epi16(src2, src3); // ba4 ba5 ba6 ba7 baC baD baE baF
+
+        simdscalari tmp0 = _simd_unpacklo_epi32(pre0, pre2); // rbga0 rbga1 rbga8 rbga9
+        simdscalari tmp1 = _simd_unpackhi_epi32(pre0, pre2); // rbga2 rbga3 rbgaA rbgaB
+        simdscalari tmp2 = _simd_unpacklo_epi32(pre1, pre3); // rbga4 rbga5 rgbaC rbgaD
+        simdscalari tmp3 = _simd_unpackhi_epi32(pre1, pre3); // rbga6 rbga7 rbgaE rbgaF
+
+        simdscalari dst0 = _simd_permute2f128_si(
+            tmp0, tmp1, 0x20); // (2, 0)                       // rbga0 rbga1 rbga2 rbga3
+        simdscalari dst1 = _simd_permute2f128_si(
+            tmp2, tmp3, 0x20); // (2, 0)                       // rbga4 rbga5 rbga6 rbga7
+        simdscalari dst2 = _simd_permute2f128_si(
+            tmp0, tmp1, 0x31); // (3, 1)                       // rbga8 rbga9 rbgaA rbgaB
+        simdscalari dst3 = _simd_permute2f128_si(
+            tmp2, tmp3, 0x31); // (3, 1)                       // rbgaC rbgaD rbgaE rbgaF
+
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgbargbargbargba
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgbargbargbargba
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 2, dst2); // rgbargbargbargba
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 3, dst3); // rgbargbargbargba
      }
  #endif
  };
@@ -661,17 +765,21 @@ struct Transpose16_16
  
      INLINE static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst)
      {
-        simdscalari src0 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc));              // rrrrrrrrrrrrrrrr
-        simdscalari src1 = _simd_load_si(reinterpret_cast<const simdscalari *>(pSrc) + 1);          // gggggggggggggggg
+        simdscalari src0 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc)); // rrrrrrrrrrrrrrrr
+        simdscalari src1 =
+            _simd_load_si(reinterpret_cast<const simdscalari*>(pSrc) + 1); // gggggggggggggggg
  
-        simdscalari tmp0 = _simd_unpacklo_epi16(src0, src1);                                        // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
-        simdscalari tmp1 = _simd_unpackhi_epi16(src0, src1);                                        // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
+        simdscalari tmp0 = _simd_unpacklo_epi16(src0, src1); // rg0 rg1 rg2 rg3 rg8 rg9 rgA rgB
+        simdscalari tmp1 = _simd_unpackhi_epi16(src0, src1); // rg4 rg5 rg6 rg7 rgC rgD rgE rgF
  
-        simdscalari dst0 = _simd_permute2f128_si(tmp0, tmp1, 0x20);     // (2, 0)                   // rg0 rg1 rg2 rg3 rg4 rg5 rg6 rg7
-        simdscalari dst1 = _simd_permute2f128_si(tmp0, tmp1, 0x31);     // (3, 1)                   // rg8 rg9 rgA rgB rgC rgD rgE rgF
+        simdscalari dst0 = _simd_permute2f128_si(
+            tmp0, tmp1, 0x20); // (2, 0)                   // rg0 rg1 rg2 rg3 rg4 rg5 rg6 rg7
+        simdscalari dst1 = _simd_permute2f128_si(
+            tmp0, tmp1, 0x31); // (3, 1)                   // rg8 rg9 rgA rgB rgC rgD rgE rgF
  
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 0, dst0);                            // rgrgrgrgrgrgrgrg
-        _simd_store_si(reinterpret_cast<simdscalari *>(pDst) + 1, dst1);                            // rgrgrgrgrgrgrgrg
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 0, dst0); // rgrgrgrgrgrgrgrg
+        _simd_store_si(reinterpret_cast<simdscalari*>(pDst) + 1, dst1); // rgrgrgrgrgrgrgrg
      }
  #endif
  };
@@ -879,4 +987,3 @@ struct Transpose64_64_64_64
      static void Transpose_16(const uint8_t* pSrc, uint8_t* pDst) = delete;
  #endif
  };
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp

index 47c0662e5ee25ab6363a1b9920f9590883ea64ed..b0d9f05b91bad84e9ad7f6762e3f9fa4e1da83b7 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file frontend.cpp
-*
-* @brief Implementation for Frontend which handles vertex processing,
-*        primitive assembly, clipping, binning, etc.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file frontend.cpp
+ *
+ * @brief Implementation for Frontend which handles vertex processing,
+ *        primitive assembly, clipping, binning, etc.
+ *
+ ******************************************************************************/
  
  #include "api.h"
  #include "frontend.h"
@@ -45,7 +45,8 @@
  /// @brief Helper macro to generate a bitmask
  static INLINE uint32_t GenMask(uint32_t numBits)
  {
-    SWR_ASSERT(numBits <= (sizeof(uint32_t) * 8), "Too many bits (%d) for %s", numBits, __FUNCTION__);
+    SWR_ASSERT(
+        numBits <= (sizeof(uint32_t) * 8), "Too many bits (%d) for %s", numBits, __FUNCTION__);
      return ((1U << numBits) - 1);
  }
  
@@ -56,17 +57,13 @@ static INLINE uint32_t GenMask(uint32_t numBits)
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param pUserData - Pointer to user data passed back to sync callback.
  /// @todo This should go away when we switch this to use compute threading.
-void ProcessSync(
-    SWR_CONTEXT *pContext,
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    void *pUserData)
+void ProcessSync(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
  {
      BE_WORK work;
-    work.type = SYNC;
+    work.type    = SYNC;
      work.pfnWork = ProcessSyncBE;
  
-    MacroTileMgr *pTileMgr = pDC->pTileMgr;
+    MacroTileMgr* pTileMgr = pDC->pTileMgr;
      pTileMgr->enqueue(0, 0, &work);
  }
  
@@ -76,17 +73,13 @@ void ProcessSync(
  /// @param pDC - pointer to draw context.
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param pUserData - Pointer to user data passed back to sync callback.
-void ProcessShutdown(
-    SWR_CONTEXT *pContext,
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    void *pUserData)
+void ProcessShutdown(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
  {
      BE_WORK work;
-    work.type = SHUTDOWN;
+    work.type    = SHUTDOWN;
      work.pfnWork = ProcessShutdownBE;
  
-    MacroTileMgr *pTileMgr = pDC->pTileMgr;
+    MacroTileMgr* pTileMgr = pDC->pTileMgr;
      // Enqueue at least 1 work item for each worker thread
      // account for number of numa nodes
      uint32_t numNumaNodes = pContext->threadPool.numaMask + 1;
@@ -107,14 +100,10 @@ void ProcessShutdown(
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param pUserData - Pointer to user data passed back to clear callback.
  /// @todo This should go away when we switch this to use compute threading.
-void ProcessClear(
-    SWR_CONTEXT *pContext,
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    void *pUserData)
+void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
  {
-    CLEAR_DESC *pDesc = (CLEAR_DESC*)pUserData;
-    MacroTileMgr *pTileMgr = pDC->pTileMgr;
+    CLEAR_DESC*   pDesc    = (CLEAR_DESC*)pUserData;
+    MacroTileMgr* pTileMgr = pDC->pTileMgr;
  
      // queue a clear to each macro tile
      // compute macro tile bounds for the specified rect
@@ -124,8 +113,8 @@ void ProcessClear(
      uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
  
      BE_WORK work;
-    work.type = CLEAR;
-    work.pfnWork = ProcessClearBE;
+    work.type       = CLEAR;
+    work.pfnWork    = ProcessClearBE;
      work.desc.clear = *pDesc;
  
      for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
@@ -144,15 +133,11 @@ void ProcessClear(
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param pUserData - Pointer to user data passed back to callback.
  /// @todo This should go away when we switch this to use compute threading.
-void ProcessStoreTiles(
-    SWR_CONTEXT *pContext,
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    void *pUserData)
+void ProcessStoreTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
  {
      RDTSC_BEGIN(FEProcessStoreTiles, pDC->drawId);
-    MacroTileMgr *pTileMgr = pDC->pTileMgr;
-    STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
+    MacroTileMgr*     pTileMgr = pDC->pTileMgr;
+    STORE_TILES_DESC* pDesc    = (STORE_TILES_DESC*)pUserData;
  
      // queue a store to each macro tile
      // compute macro tile bounds for the specified rect
@@ -163,8 +148,8 @@ void ProcessStoreTiles(
  
      // store tiles
      BE_WORK work;
-    work.type = STORETILES;
-    work.pfnWork = ProcessStoreTilesBE;
+    work.type            = STORETILES;
+    work.pfnWork         = ProcessStoreTilesBE;
      work.desc.storeTiles = *pDesc;
  
      for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
@@ -185,15 +170,14 @@ void ProcessStoreTiles(
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param pUserData - Pointer to user data passed back to callback.
  /// @todo This should go away when we switch this to use compute threading.
-void ProcessDiscardInvalidateTiles(
-    SWR_CONTEXT *pContext,
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    void *pUserData)
+void ProcessDiscardInvalidateTiles(SWR_CONTEXT*  pContext,
+                                   DRAW_CONTEXT* pDC,
+                                   uint32_t      workerId,
+                                   void*         pUserData)
  {
      RDTSC_BEGIN(FEProcessInvalidateTiles, pDC->drawId);
-    DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
-    MacroTileMgr *pTileMgr = pDC->pTileMgr;
+    DISCARD_INVALIDATE_TILES_DESC* pDesc    = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
+    MacroTileMgr*                  pTileMgr = pDC->pTileMgr;
  
      // compute macro tile bounds for the specified rect
      uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM;
@@ -218,8 +202,8 @@ void ProcessDiscardInvalidateTiles(
  
      // load tiles
      BE_WORK work;
-    work.type = DISCARDINVALIDATETILES;
-    work.pfnWork = ProcessDiscardInvalidateTilesBE;
+    work.type                        = DISCARDINVALIDATETILES;
+    work.pfnWork                     = ProcessDiscardInvalidateTilesBE;
      work.desc.discardInvalidateTiles = *pDesc;
  
      for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
@@ -238,27 +222,40 @@ void ProcessDiscardInvalidateTiles(
  /// @param mode - primitive topology for draw operation.
  /// @param numPrims - number of vertices or indices for draw.
  /// @todo Frontend needs to be refactored. This will go in appropriate place then.
-uint32_t GetNumPrims(
-    PRIMITIVE_TOPOLOGY mode,
-    uint32_t numPrims)
+uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numPrims)
  {
      switch (mode)
      {
-    case TOP_POINT_LIST: return numPrims;
-    case TOP_TRIANGLE_LIST: return numPrims / 3;
-    case TOP_TRIANGLE_STRIP: return numPrims < 3 ? 0 : numPrims - 2;
-    case TOP_TRIANGLE_FAN: return numPrims < 3 ? 0 : numPrims - 2;
-    case TOP_TRIANGLE_DISC: return numPrims < 2 ? 0 : numPrims - 1;
-    case TOP_QUAD_LIST: return numPrims / 4;
-    case TOP_QUAD_STRIP: return numPrims < 4 ? 0 : (numPrims - 2) / 2;
-    case TOP_LINE_STRIP: return numPrims < 2 ? 0 : numPrims - 1;
-    case TOP_LINE_LIST: return numPrims / 2;
-    case TOP_LINE_LOOP: return numPrims;
-    case TOP_RECT_LIST: return numPrims / 3;
-    case TOP_LINE_LIST_ADJ: return numPrims / 4;
-    case TOP_LISTSTRIP_ADJ: return numPrims < 3 ? 0 : numPrims - 3;
-    case TOP_TRI_LIST_ADJ: return numPrims / 6;
-    case TOP_TRI_STRIP_ADJ: return numPrims < 4 ? 0 : (numPrims / 2) - 2;
+    case TOP_POINT_LIST:
+        return numPrims;
+    case TOP_TRIANGLE_LIST:
+        return numPrims / 3;
+    case TOP_TRIANGLE_STRIP:
+        return numPrims < 3 ? 0 : numPrims - 2;
+    case TOP_TRIANGLE_FAN:
+        return numPrims < 3 ? 0 : numPrims - 2;
+    case TOP_TRIANGLE_DISC:
+        return numPrims < 2 ? 0 : numPrims - 1;
+    case TOP_QUAD_LIST:
+        return numPrims / 4;
+    case TOP_QUAD_STRIP:
+        return numPrims < 4 ? 0 : (numPrims - 2) / 2;
+    case TOP_LINE_STRIP:
+        return numPrims < 2 ? 0 : numPrims - 1;
+    case TOP_LINE_LIST:
+        return numPrims / 2;
+    case TOP_LINE_LOOP:
+        return numPrims;
+    case TOP_RECT_LIST:
+        return numPrims / 3;
+    case TOP_LINE_LIST_ADJ:
+        return numPrims / 4;
+    case TOP_LISTSTRIP_ADJ:
+        return numPrims < 3 ? 0 : numPrims - 3;
+    case TOP_TRI_LIST_ADJ:
+        return numPrims / 6;
+    case TOP_TRI_STRIP_ADJ:
+        return numPrims < 4 ? 0 : (numPrims / 2) - 2;
  
      case TOP_PATCHLIST_1:
      case TOP_PATCHLIST_2:
@@ -314,27 +311,40 @@ uint32_t GetNumPrims(
  /// @brief Computes the number of verts given the number of primitives.
  /// @param mode - primitive topology for draw operation.
  /// @param numPrims - number of primitives for draw.
-uint32_t GetNumVerts(
-    PRIMITIVE_TOPOLOGY mode,
-    uint32_t numPrims)
+uint32_t GetNumVerts(PRIMITIVE_TOPOLOGY mode, uint32_t numPrims)
  {
      switch (mode)
      {
-    case TOP_POINT_LIST: return numPrims;
-    case TOP_TRIANGLE_LIST: return numPrims * 3;
-    case TOP_TRIANGLE_STRIP: return numPrims ? numPrims + 2 : 0;
-    case TOP_TRIANGLE_FAN: return numPrims ? numPrims + 2 : 0;
-    case TOP_TRIANGLE_DISC: return numPrims ? numPrims + 1 : 0;
-    case TOP_QUAD_LIST: return numPrims * 4;
-    case TOP_QUAD_STRIP: return numPrims ? numPrims * 2 + 2 : 0;
-    case TOP_LINE_STRIP: return numPrims ? numPrims + 1 : 0;
-    case TOP_LINE_LIST: return numPrims * 2;
-    case TOP_LINE_LOOP: return numPrims;
-    case TOP_RECT_LIST: return numPrims * 3;
-    case TOP_LINE_LIST_ADJ: return numPrims * 4;
-    case TOP_LISTSTRIP_ADJ: return numPrims ? numPrims + 3 : 0;
-    case TOP_TRI_LIST_ADJ: return numPrims * 6;
-    case TOP_TRI_STRIP_ADJ: return numPrims ? (numPrims + 2) * 2 : 0;
+    case TOP_POINT_LIST:
+        return numPrims;
+    case TOP_TRIANGLE_LIST:
+        return numPrims * 3;
+    case TOP_TRIANGLE_STRIP:
+        return numPrims ? numPrims + 2 : 0;
+    case TOP_TRIANGLE_FAN:
+        return numPrims ? numPrims + 2 : 0;
+    case TOP_TRIANGLE_DISC:
+        return numPrims ? numPrims + 1 : 0;
+    case TOP_QUAD_LIST:
+        return numPrims * 4;
+    case TOP_QUAD_STRIP:
+        return numPrims ? numPrims * 2 + 2 : 0;
+    case TOP_LINE_STRIP:
+        return numPrims ? numPrims + 1 : 0;
+    case TOP_LINE_LIST:
+        return numPrims * 2;
+    case TOP_LINE_LOOP:
+        return numPrims;
+    case TOP_RECT_LIST:
+        return numPrims * 3;
+    case TOP_LINE_LIST_ADJ:
+        return numPrims * 4;
+    case TOP_LISTSTRIP_ADJ:
+        return numPrims ? numPrims + 3 : 0;
+    case TOP_TRI_LIST_ADJ:
+        return numPrims * 6;
+    case TOP_TRI_STRIP_ADJ:
+        return numPrims ? (numPrims + 2) * 2 : 0;
  
      case TOP_PATCHLIST_1:
      case TOP_PATCHLIST_2:
@@ -465,10 +475,15 @@ INLINE uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVert
          switch (topology)
          {
          case TOP_LISTSTRIP_ADJ:
-        case TOP_LINE_LIST_ADJ: numVerts = 4; break;
+        case TOP_LINE_LIST_ADJ:
+            numVerts = 4;
+            break;
          case TOP_TRI_STRIP_ADJ:
-        case TOP_TRI_LIST_ADJ: numVerts = 6; break;
-        default: break;
+        case TOP_TRI_LIST_ADJ:
+            numVerts = 6;
+            break;
+        default:
+            break;
          }
      }
  
@@ -480,14 +495,16 @@ INLINE uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVert
  /// @param numWorkItems - Number of items being worked on by a SIMD.
  static INLINE simdscalari GenerateMask(uint32_t numItemsRemaining)
  {
-    uint32_t numActive = (numItemsRemaining >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : numItemsRemaining;
+    uint32_t numActive =
+        (numItemsRemaining >= KNOB_SIMD_WIDTH) ? KNOB_SIMD_WIDTH : numItemsRemaining;
      uint32_t mask = (numActive > 0) ? ((1 << numActive) - 1) : 0;
      return _simd_castps_si(_simd_vmask_ps(mask));
  }
  
  static INLINE simd16scalari GenerateMask16(uint32_t numItemsRemaining)
  {
-    uint32_t numActive = (numItemsRemaining >= KNOB_SIMD16_WIDTH) ? KNOB_SIMD16_WIDTH : numItemsRemaining;
+    uint32_t numActive =
+        (numItemsRemaining >= KNOB_SIMD16_WIDTH) ? KNOB_SIMD16_WIDTH : numItemsRemaining;
      uint32_t mask = (numActive > 0) ? ((1 << numActive) - 1) : 0;
      return _simd16_castps_si(_simd16_vmask_ps(mask));
  }
@@ -499,23 +516,20 @@ static INLINE simd16scalari GenerateMask16(uint32_t numItemsRemaining)
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param numPrims - Number of prims to streamout (e.g. points, lines, tris)
  static void StreamOut(
-    DRAW_CONTEXT* pDC,
-    PA_STATE& pa,
-    uint32_t workerId,
-    uint32_t* pPrimData,
-    uint32_t streamIndex)
+    DRAW_CONTEXT* pDC, PA_STATE& pa, uint32_t workerId, uint32_t* pPrimData, uint32_t streamIndex)
  {
      RDTSC_BEGIN(FEStreamout, pDC->drawId);
  
-    const API_STATE& state = GetApiState(pDC);
-    const SWR_STREAMOUT_STATE &soState = state.soState;
+    const API_STATE&           state   = GetApiState(pDC);
+    const SWR_STREAMOUT_STATE& soState = state.soState;
  
      uint32_t soVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
  
-    // The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each vertex.
+    // The pPrimData buffer is sparse in that we allocate memory for all 32 attributes for each
+    // vertex.
      uint32_t primDataDwordVertexStride = (SWR_VTX_NUM_SLOTS * sizeof(float) * 4) / sizeof(uint32_t);
  
-    SWR_STREAMOUT_CONTEXT soContext = { 0 };
+    SWR_STREAMOUT_CONTEXT soContext = {0};
  
      // Setup buffer state pointers.
      for (uint32_t i = 0; i < 4; ++i)
@@ -527,14 +541,14 @@ static void StreamOut(
  
      for (uint32_t primIndex = 0; primIndex < numPrims; ++primIndex)
      {
-        DWORD slot = 0;
+        DWORD    slot   = 0;
          uint64_t soMask = soState.streamMasks[streamIndex];
  
          // Write all entries into primitive data buffer for SOS.
          while (_BitScanForward64(&slot, soMask))
          {
-            simd4scalar attrib[MAX_NUM_VERTS_PER_PRIM];    // prim attribs (always 4 wide)
-            uint32_t paSlot = slot + soState.vertexAttribOffset[streamIndex];
+            simd4scalar attrib[MAX_NUM_VERTS_PER_PRIM]; // prim attribs (always 4 wide)
+            uint32_t    paSlot = slot + soState.vertexAttribOffset[streamIndex];
              pa.AssembleSingle(paSlot, primIndex, attrib);
  
              // Attribute offset is relative offset from start of vertex.
@@ -546,7 +560,8 @@ static void StreamOut(
              // Store each vertex's attrib at appropriate locations in pPrimData buffer.
              for (uint32_t v = 0; v < soVertsPerPrim; ++v)
              {
-                uint32_t* pPrimDataAttrib = pPrimData + primDataAttribOffset + (v * primDataDwordVertexStride);
+                uint32_t* pPrimDataAttrib =
+                    pPrimData + primDataAttribOffset + (v * primDataDwordVertexStride);
  
                  _mm_store_ps((float*)pPrimDataAttrib, attrib[v]);
              }
@@ -554,11 +569,12 @@ static void StreamOut(
              soMask &= ~(uint64_t(1) << slot);
          }
  
-        // Update pPrimData pointer 
+        // Update pPrimData pointer
          soContext.pPrimData = pPrimData;
  
          // Call SOS
-        SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr, "Trying to execute uninitialized streamout jit function.");
+        SWR_ASSERT(state.pfnSoFunc[streamIndex] != nullptr,
+                   "Trying to execute uninitialized streamout jit function.");
          state.pfnSoFunc[streamIndex](soContext);
      }
  
@@ -620,7 +636,10 @@ INLINE static T RoundDownEven(T value)
  ///
  /// note: the stride between vertexes is determinded by SWR_VTX_NUM_SLOTS
  ///
-void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex *vertex_simd16, const simdvertex *vertex, uint32_t vertexCount, uint32_t attribCount)
+void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex*     vertex_simd16,
+                                           const simdvertex* vertex,
+                                           uint32_t          vertexCount,
+                                           uint32_t          attribCount)
  {
      SWR_ASSERT(vertex);
      SWR_ASSERT(vertex_simd16);
@@ -634,11 +653,13 @@ void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex *vertex_simd16, const si
          {
              for (uint32_t k = 0; k < 4; k += 1)
              {
-                temp.attrib[j][k] = _simd16_insert_ps(_simd16_setzero_ps(), vertex[i].attrib[j][k], 0);
+                temp.attrib[j][k] =
+                    _simd16_insert_ps(_simd16_setzero_ps(), vertex[i].attrib[j][k], 0);
  
                  if ((i + 1) < vertexCount)
                  {
-                    temp.attrib[j][k] = _simd16_insert_ps(temp.attrib[j][k], vertex[i + 1].attrib[j][k], 1);
+                    temp.attrib[j][k] =
+                        _simd16_insert_ps(temp.attrib[j][k], vertex[i + 1].attrib[j][k], 1);
                  }
              }
          }
@@ -658,9 +679,7 @@ void PackPairsOfSimdVertexIntoSimd16Vertex(simd16vertex *vertex_simd16, const si
  ///        then return the remaining amount of work.
  /// @param curIndex - The start index for the SIMD.
  /// @param maxIndex - The last index for all work items.
-static INLINE uint32_t GetNumInvocations(
-    uint32_t curIndex,
-    uint32_t maxIndex)
+static INLINE uint32_t GetNumInvocations(uint32_t curIndex, uint32_t maxIndex)
  {
      uint32_t remainder = (maxIndex - curIndex);
  #if USE_SIMD16_FRONTEND
@@ -680,17 +699,20 @@ static INLINE uint32_t GetNumInvocations(
  /// @param pStreamIdBase - pointer to the stream ID buffer
  /// @param numEmittedVerts - Number of total verts emitted by the GS
  /// @param pCutBuffer - output buffer to write cuts to
-void ProcessStreamIdBuffer(uint32_t stream, uint8_t* pStreamIdBase, uint32_t numEmittedVerts, uint8_t *pCutBuffer)
+void ProcessStreamIdBuffer(uint32_t stream,
+                           uint8_t* pStreamIdBase,
+                           uint32_t numEmittedVerts,
+                           uint8_t* pCutBuffer)
  {
      SWR_ASSERT(stream < MAX_SO_STREAMS);
  
-    uint32_t numInputBytes = (numEmittedVerts * 2  + 7) / 8;
+    uint32_t numInputBytes  = (numEmittedVerts * 2 + 7) / 8;
      uint32_t numOutputBytes = std::max(numInputBytes / 2, 1U);
  
      for (uint32_t b = 0; b < numOutputBytes; ++b)
      {
-        uint8_t curInputByte = pStreamIdBase[2*b];
-        uint8_t outByte = 0;
+        uint8_t curInputByte = pStreamIdBase[2 * b];
+        uint8_t outByte      = 0;
          for (uint32_t i = 0; i < 4; ++i)
          {
              if ((curInputByte & 0x3) != stream)
@@ -720,16 +742,17 @@ struct GsBuffers
      uint8_t* pGsIn;
      uint8_t* pGsOut[KNOB_SIMD_WIDTH];
      uint8_t* pGsTransposed;
-    void* pStreamCutBuffer;
+    void*    pStreamCutBuffer;
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Transposes GS output from SOA to AOS to feed the primitive assembler
-/// @param pDst - Destination buffer in AOS form for the current SIMD width, fed into the primitive assembler
+/// @param pDst - Destination buffer in AOS form for the current SIMD width, fed into the primitive
+/// assembler
  /// @param pSrc - Buffer of vertices in SOA form written by the geometry shader
  /// @param numVerts - Number of vertices outputted by the GS
  /// @param numAttribs - Number of attributes per vertex
-template<typename SIMD_T, uint32_t SimdWidth>
+template <typename SIMD_T, uint32_t SimdWidth>
  void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t numAttribs)
  {
      uint32_t srcVertexStride = numAttribs * sizeof(float) * 4;
@@ -743,7 +766,7 @@ void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t
      }
      auto vGatherOffsets = SIMD_T::load_si((Integer<SIMD_T>*)&gatherOffsets[0]);
  
-    uint32_t numSimd = AlignUp(numVerts, SimdWidth) / SimdWidth;
+    uint32_t numSimd        = AlignUp(numVerts, SimdWidth) / SimdWidth;
      uint32_t remainingVerts = numVerts;
  
      for (uint32_t s = 0; s < numSimd; ++s)
@@ -753,21 +776,36 @@ void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t
  
          // Compute mask to prevent src overflow
          uint32_t mask = std::min(remainingVerts, SimdWidth);
-        mask = GenMask(mask);
-        auto vMask = SIMD_T::vmask_ps(mask);
-        auto viMask = SIMD_T::castps_si(vMask);
+        mask          = GenMask(mask);
+        auto vMask    = SIMD_T::vmask_ps(mask);
+        auto viMask   = SIMD_T::castps_si(vMask);
  
          for (uint32_t a = 0; a < numAttribs; ++a)
          {
-            auto attribGatherX = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask);
-            auto attribGatherY = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float)), vGatherOffsets, vMask);
-            auto attribGatherZ = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 2), vGatherOffsets, vMask);
-            auto attribGatherW = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(SIMD_T::setzero_ps(), (const float*)(pSrcBase + sizeof(float) * 3), vGatherOffsets, vMask);
+            auto attribGatherX = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                SIMD_T::setzero_ps(), (const float*)pSrcBase, vGatherOffsets, vMask);
+            auto attribGatherY = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                SIMD_T::setzero_ps(),
+                (const float*)(pSrcBase + sizeof(float)),
+                vGatherOffsets,
+                vMask);
+            auto attribGatherZ = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                SIMD_T::setzero_ps(),
+                (const float*)(pSrcBase + sizeof(float) * 2),
+                vGatherOffsets,
+                vMask);
+            auto attribGatherW = SIMD_T::template mask_i32gather_ps<ScaleFactor<SIMD_T>(1)>(
+                SIMD_T::setzero_ps(),
+                (const float*)(pSrcBase + sizeof(float) * 3),
+                vGatherOffsets,
+                vMask);
  
              SIMD_T::maskstore_ps((float*)pDstBase, viMask, attribGatherX);
              SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>)), viMask, attribGatherY);
-            SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>) * 2), viMask, attribGatherZ);
-            SIMD_T::maskstore_ps((float*)(pDstBase + sizeof(Float<SIMD_T>) * 3), viMask, attribGatherW);
+            SIMD_T::maskstore_ps(
+                (float*)(pDstBase + sizeof(Float<SIMD_T>) * 2), viMask, attribGatherZ);
+            SIMD_T::maskstore_ps(
+                (float*)(pDstBase + sizeof(Float<SIMD_T>) * 3), viMask, attribGatherW);
  
              pSrcBase += sizeof(float) * 4;
              pDstBase += sizeof(Float<SIMD_T>) * 4;
@@ -783,38 +821,35 @@ void TransposeSOAtoAOS(uint8_t* pDst, uint8_t* pSrc, uint32_t numVerts, uint32_t
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param pa - The primitive assembly object.
  /// @param pGsOut - output stream for GS
-template <
-    typename HasStreamOutT,
-    typename HasRastT>
-static void GeometryShaderStage(
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    PA_STATE& pa,
-    GsBuffers* pGsBuffers,
-    uint32_t* pSoPrimData,
+template <typename HasStreamOutT, typename HasRastT>
+static void GeometryShaderStage(DRAW_CONTEXT* pDC,
+                                uint32_t      workerId,
+                                PA_STATE&     pa,
+                                GsBuffers*    pGsBuffers,
+                                uint32_t*     pSoPrimData,
  #if USE_SIMD16_FRONTEND
-    uint32_t numPrims_simd8,
+                                uint32_t numPrims_simd8,
  #endif
-    simdscalari const &primID)
+                                simdscalari const& primID)
  {
      RDTSC_BEGIN(FEGeometryShader, pDC->drawId);
  
      void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
-    const API_STATE& state = GetApiState(pDC);
+    const API_STATE&    state  = GetApiState(pDC);
      const SWR_GS_STATE* pState = &state.gsState;
-    SWR_GS_CONTEXT gsContext;
+    SWR_GS_CONTEXT      gsContext;
  
-    static uint8_t sNullBuffer[128] = { 0 };
+    static uint8_t sNullBuffer[128] = {0};
  
      for (uint32_t i = 0; i < KNOB_SIMD_WIDTH; ++i)
      {
          gsContext.pStreams[i] = pGsBuffers->pGsOut[i];
      }
-    gsContext.pVerts = (simdvector*)pGsBuffers->pGsIn;
+    gsContext.pVerts      = (simdvector*)pGsBuffers->pGsIn;
      gsContext.PrimitiveID = primID;
  
-    uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
+    uint32_t   numVertsPerPrim = NumVertsPerPrim(pa.binTopology, true);
      simdvector attrib[MAX_NUM_VERTS_PER_PRIM];
  
      // assemble all attributes for the input primitive
@@ -822,7 +857,7 @@ static void GeometryShaderStage(
      for (uint32_t slot = 0; slot < pState->numInputAttribs; ++slot)
      {
          uint32_t srcAttribSlot = pState->srcVertexAttribOffset + slot;
-        uint32_t attribSlot = pState->vertexAttribOffset + slot;
+        uint32_t attribSlot    = pState->vertexAttribOffset + slot;
          pa.Assemble(srcAttribSlot, attrib);
  
          for (uint32_t i = 0; i < numVertsPerPrim; ++i)
@@ -843,13 +878,13 @@ static void GeometryShaderStage(
  #if USE_SIMD16_FRONTEND
      uint32_t numInputPrims = numPrims_simd8;
  #else
-    uint32_t numInputPrims = pa.NumPrims();
+    uint32_t          numInputPrims = pa.NumPrims();
  #endif
  
      for (uint32_t instance = 0; instance < pState->instanceCount; ++instance)
      {
          gsContext.InstanceID = instance;
-        gsContext.mask = GenerateMask(numInputPrims);
+        gsContext.mask       = GenerateMask(numInputPrims);
  
          // execute the geometry shader
          state.pfnGsFunc(GetPrivateState(pDC), pWorkerData, &gsContext);
@@ -868,25 +903,43 @@ static void GeometryShaderStage(
      {
          switch (pState->outputTopology)
          {
-        case TOP_RECT_LIST:         pfnClipFunc = ClipRectangles_simd16; break;
-        case TOP_TRIANGLE_STRIP:    pfnClipFunc = ClipTriangles_simd16; break;
-        case TOP_LINE_STRIP:        pfnClipFunc = ClipLines_simd16; break;
-        case TOP_POINT_LIST:        pfnClipFunc = ClipPoints_simd16; break;
-        default: SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
+        case TOP_RECT_LIST:
+            pfnClipFunc = ClipRectangles_simd16;
+            break;
+        case TOP_TRIANGLE_STRIP:
+            pfnClipFunc = ClipTriangles_simd16;
+            break;
+        case TOP_LINE_STRIP:
+            pfnClipFunc = ClipLines_simd16;
+            break;
+        case TOP_POINT_LIST:
+            pfnClipFunc = ClipPoints_simd16;
+            break;
+        default:
+            SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
          }
      }
  
  #else
-    PFN_PROCESS_PRIMS pfnClipFunc = nullptr;
+    PFN_PROCESS_PRIMS pfnClipFunc   = nullptr;
      if (HasRastT::value)
      {
          switch (pState->outputTopology)
          {
-        case TOP_RECT_LIST:         pfnClipFunc = ClipRectangles; break;
-        case TOP_TRIANGLE_STRIP:    pfnClipFunc = ClipTriangles; break;
-        case TOP_LINE_STRIP:        pfnClipFunc = ClipLines; break;
-        case TOP_POINT_LIST:        pfnClipFunc = ClipPoints; break;
-        default: SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
+        case TOP_RECT_LIST:
+            pfnClipFunc = ClipRectangles;
+            break;
+        case TOP_TRIANGLE_STRIP:
+            pfnClipFunc = ClipTriangles;
+            break;
+        case TOP_LINE_STRIP:
+            pfnClipFunc = ClipLines;
+            break;
+        case TOP_POINT_LIST:
+            pfnClipFunc = ClipPoints;
+            break;
+        default:
+            SWR_INVALID("Unexpected GS output topology: %d", pState->outputTopology);
          }
      }
  
@@ -922,29 +975,37 @@ static void GeometryShaderStage(
              }
  
              uint8_t* pBase = pInstanceBase + instance * pState->allocationSize;
-            uint8_t* pCutBase = pState->controlDataSize == 0 ? &sNullBuffer[0] : pBase + pState->controlDataOffset;
+            uint8_t* pCutBase =
+                pState->controlDataSize == 0 ? &sNullBuffer[0] : pBase + pState->controlDataOffset;
              uint8_t* pVertexBaseAOS = pBase + pState->outputVertexOffset;
  
  #if USE_SIMD16_FRONTEND
-            TransposeSOAtoAOS<SIMD512, KNOB_SIMD16_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed, pVertexBaseAOS, vertexCount, pState->outputVertexSize);
+            TransposeSOAtoAOS<SIMD512, KNOB_SIMD16_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed,
+                                                          pVertexBaseAOS,
+                                                          vertexCount,
+                                                          pState->outputVertexSize);
  #else
-            TransposeSOAtoAOS<SIMD256, KNOB_SIMD_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed, pVertexBaseAOS, vertexCount, pState->outputVertexSize);
+            TransposeSOAtoAOS<SIMD256, KNOB_SIMD_WIDTH>((uint8_t*)pGsBuffers->pGsTransposed,
+                                                        pVertexBaseAOS,
+                                                        vertexCount,
+                                                        pState->outputVertexSize);
  #endif
  
              uint32_t numAttribs = state.feNumAttributes;
  
              for (uint32_t stream = 0; stream < MAX_SO_STREAMS; ++stream)
              {
-                bool processCutVerts = false;
-                uint8_t* pCutBuffer = pCutBase;
+                bool     processCutVerts = false;
+                uint8_t* pCutBuffer      = pCutBase;
  
                  // assign default stream ID, only relevant when GS is outputting a single stream
                  uint32_t streamID = 0;
                  if (pState->isSingleStream)
                  {
                      processCutVerts = true;
-                    streamID = pState->singleStreamID;
-                    if (streamID != stream) continue;
+                    streamID        = pState->singleStreamID;
+                    if (streamID != stream)
+                        continue;
                  }
                  else
                  {
@@ -955,16 +1016,35 @@ static void GeometryShaderStage(
                      }
  
                      // multi-stream output, need to translate StreamID buffer to a cut buffer
-                    ProcessStreamIdBuffer(stream, pCutBase, numEmittedVerts, (uint8_t*)pGsBuffers->pStreamCutBuffer);
-                    pCutBuffer = (uint8_t*)pGsBuffers->pStreamCutBuffer;
+                    ProcessStreamIdBuffer(
+                        stream, pCutBase, numEmittedVerts, (uint8_t*)pGsBuffers->pStreamCutBuffer);
+                    pCutBuffer      = (uint8_t*)pGsBuffers->pStreamCutBuffer;
                      processCutVerts = false;
                  }
  
  #if USE_SIMD16_FRONTEND
-                PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, reinterpret_cast<simd16mask *>(pCutBuffer), numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts, pa.numVertsPerPrim);
+                PA_STATE_CUT gsPa(pDC,
+                                  (uint8_t*)pGsBuffers->pGsTransposed,
+                                  numEmittedVerts,
+                                  pState->outputVertexSize,
+                                  reinterpret_cast<simd16mask*>(pCutBuffer),
+                                  numEmittedVerts,
+                                  numAttribs,
+                                  pState->outputTopology,
+                                  processCutVerts,
+                                  pa.numVertsPerPrim);
  
  #else
-                PA_STATE_CUT gsPa(pDC, (uint8_t*)pGsBuffers->pGsTransposed, numEmittedVerts, pState->outputVertexSize, pCutBuffer, numEmittedVerts, numAttribs, pState->outputTopology, processCutVerts, pa.numVertsPerPrim);
+                PA_STATE_CUT gsPa(pDC,
+                                  (uint8_t*)pGsBuffers->pGsTransposed,
+                                  numEmittedVerts,
+                                  pState->outputVertexSize,
+                                  pCutBuffer,
+                                  numEmittedVerts,
+                                  numAttribs,
+                                  pState->outputTopology,
+                                  processCutVerts,
+                                  pa.numVertsPerPrim);
  
  #endif
                  while (gsPa.GetNextStreamOutput())
@@ -999,18 +1079,19 @@ static void GeometryShaderStage(
  
                                  // Gather data from the SVG if provided.
                                  simd16scalari vViewportIdx = SIMD16::setzero_si();
-                                simd16scalari vRtIdx = SIMD16::setzero_si();
-                                SIMD16::Vec4 svgAttrib[4];
+                                simd16scalari vRtIdx       = SIMD16::setzero_si();
+                                SIMD16::Vec4  svgAttrib[4];
  
-                                if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                if (state.backendState.readViewportArrayIndex ||
+                                    state.backendState.readRenderTargetArrayIndex)
                                  {
                                      gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
                                  }
  
-
                                  if (state.backendState.readViewportArrayIndex)
                                  {
-                                    vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                    vViewportIdx =
+                                        SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
                                      gsPa.viewportArrayActive = true;
                                  }
                                  if (state.backendState.readRenderTargetArrayIndex)
@@ -1021,36 +1102,50 @@ static void GeometryShaderStage(
  
                                  {
                                      // OOB VPAI indices => forced to zero.
-                                    vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
-                                    simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-                                    simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
+                                    vViewportIdx =
+                                        SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
+                                    simd16scalari vNumViewports =
+                                        SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                    simd16scalari vClearMask =
+                                        SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
                                      vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
  
                                      gsPa.useAlternateOffset = false;
-                                    pfnClipFunc(pDC, gsPa, workerId, attrib_simd16, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
+                                    pfnClipFunc(pDC,
+                                                gsPa,
+                                                workerId,
+                                                attrib_simd16,
+                                                GenMask(gsPa.NumPrims()),
+                                                vPrimId,
+                                                vViewportIdx,
+                                                vRtIdx);
                                  }
  #else
                                  simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
  
                                  // Gather data from the SVG if provided.
                                  simdscalari vViewportIdx = SIMD::setzero_si();
-                                simdscalari vRtIdx = SIMD::setzero_si();
-                                SIMD::Vec4 svgAttrib[4];
+                                simdscalari vRtIdx       = SIMD::setzero_si();
+                                SIMD::Vec4  svgAttrib[4];
  
-                                if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                if (state.backendState.readViewportArrayIndex ||
+                                    state.backendState.readRenderTargetArrayIndex)
                                  {
                                      gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
                                  }
  
-
                                  if (state.backendState.readViewportArrayIndex)
                                  {
-                                    vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                    vViewportIdx =
+                                        SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
  
                                      // OOB VPAI indices => forced to zero.
-                                    vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
-                                    simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-                                    simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
+                                    vViewportIdx =
+                                        SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
+                                    simdscalari vNumViewports =
+                                        SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                    simdscalari vClearMask =
+                                        SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
                                      vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
                                      gsPa.viewportArrayActive = true;
                                  }
@@ -1060,7 +1155,14 @@ static void GeometryShaderStage(
                                      gsPa.rtArrayActive = true;
                                  }
  
-                                pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
+                                pfnClipFunc(pDC,
+                                            gsPa,
+                                            workerId,
+                                            attrib,
+                                            GenMask(gsPa.NumPrims()),
+                                            vPrimId,
+                                            vViewportIdx,
+                                            vRtIdx);
  #endif
                              }
                          }
@@ -1073,7 +1175,7 @@ static void GeometryShaderStage(
      // update GS pipeline stats
      UPDATE_STAT_FE(GsInvocations, numInputPrims * pState->instanceCount);
      UPDATE_STAT_FE(GsPrimitives, totalPrimsGenerated);
-    AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim*numInputPrims));
+    AR_EVENT(GSPrimInfo(numInputPrims, totalPrimsGenerated, numVertsPerPrim * numInputPrims));
      RDTSC_END(FEGeometryShader, 1);
  }
  
@@ -1083,8 +1185,11 @@ static void GeometryShaderStage(
  /// @param state - API state
  /// @param ppGsOut - pointer to GS output buffer allocation
  /// @param ppCutBuffer - pointer to GS output cut buffer allocation
-template<typename SIMD_T, uint32_t SIMD_WIDTH>
-static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state, uint32_t vertsPerPrim, GsBuffers* pGsBuffers)
+template <typename SIMD_T, uint32_t SIMD_WIDTH>
+static INLINE void AllocateGsBuffers(DRAW_CONTEXT*    pDC,
+                                     const API_STATE& state,
+                                     uint32_t         vertsPerPrim,
+                                     GsBuffers*       pGsBuffers)
  {
      auto pArena = pDC->pArena;
      SWR_ASSERT(pArena != nullptr);
@@ -1094,7 +1199,7 @@ static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state,
  
      // Allocate storage for vertex inputs
      uint32_t vertexInBufferSize = gsState.inputVertStride * sizeof(simdvector) * vertsPerPrim;
-    pGsBuffers->pGsIn = (uint8_t*)pArena->AllocAligned(vertexInBufferSize, 32);
+    pGsBuffers->pGsIn           = (uint8_t*)pArena->AllocAligned(vertexInBufferSize, 32);
  
      // Allocate arena space to hold GS output verts
      const uint32_t vertexBufferSize = gsState.instanceCount * gsState.allocationSize;
@@ -1106,7 +1211,8 @@ static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state,
  
      // Allocate storage for transposed GS output
      uint32_t numSimdBatches = AlignUp(gsState.maxNumVerts, SIMD_WIDTH) / SIMD_WIDTH;
-    uint32_t transposedBufferSize = numSimdBatches * gsState.outputVertexSize * sizeof(Vec4<SIMD_T>);
+    uint32_t transposedBufferSize =
+        numSimdBatches * gsState.outputVertexSize * sizeof(Vec4<SIMD_T>);
      pGsBuffers->pGsTransposed = (uint8_t*)pArena->AllocAligned(transposedBufferSize, 32);
  
      // Allocate storage to hold temporary stream->cut buffer, if necessary
@@ -1116,7 +1222,8 @@ static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state,
      }
      else
      {
-        pGsBuffers->pStreamCutBuffer = (uint8_t*)pArena->AllocAligned(AlignUp(gsState.maxNumVerts * 2, 32), 32);
+        pGsBuffers->pStreamCutBuffer =
+            (uint8_t*)pArena->AllocAligned(AlignUp(gsState.maxNumVerts * 2, 32), 32);
      }
  }
  
@@ -1126,12 +1233,12 @@ static INLINE void AllocateGsBuffers(DRAW_CONTEXT* pDC, const API_STATE& state,
  struct TessellationThreadLocalData
  {
      SWR_HS_CONTEXT hsContext;
-    ScalarPatch patchData[KNOB_SIMD_WIDTH];
-    void* pTxCtx;
-    size_t tsCtxSize;
+    ScalarPatch    patchData[KNOB_SIMD_WIDTH];
+    void*          pTxCtx;
+    size_t         tsCtxSize;
  
      simdscalar* pDSOutput;
-    size_t dsOutputAllocSize;
+    size_t      dsOutputAllocSize;
  };
  
  THREAD TessellationThreadLocalData* gt_pTessellationThreadData = nullptr;
@@ -1144,8 +1251,8 @@ static void AllocateTessellationData(SWR_CONTEXT* pContext)
      /// @TODO - Don't use thread local storage.  Use Worker local storage instead.
      if (gt_pTessellationThreadData == nullptr)
      {
-        gt_pTessellationThreadData = (TessellationThreadLocalData*)
-            AlignedMalloc(sizeof(TessellationThreadLocalData), 64);
+        gt_pTessellationThreadData =
+            (TessellationThreadLocalData*)AlignedMalloc(sizeof(TessellationThreadLocalData), 64);
          memset(gt_pTessellationThreadData, 0, sizeof(*gt_pTessellationThreadData));
      }
  }
@@ -1156,42 +1263,37 @@ static void AllocateTessellationData(SWR_CONTEXT* pContext)
  /// @param workerId - thread's worker id. Even thread has a unique id.
  /// @param pa - The primitive assembly object.
  /// @param pGsOut - output stream for GS
-template <
-    typename HasGeometryShaderT,
-    typename HasStreamOutT,
-    typename HasRastT>
-static void TessellationStages(
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    PA_STATE& pa,
-    GsBuffers* pGsBuffers,
-    uint32_t* pSoPrimData,
+template <typename HasGeometryShaderT, typename HasStreamOutT, typename HasRastT>
+static void TessellationStages(DRAW_CONTEXT* pDC,
+                               uint32_t      workerId,
+                               PA_STATE&     pa,
+                               GsBuffers*    pGsBuffers,
+                               uint32_t*     pSoPrimData,
  #if USE_SIMD16_FRONTEND
-    uint32_t numPrims_simd8,
+                               uint32_t numPrims_simd8,
  #endif
-    simdscalari const &primID)
+                               simdscalari const& primID)
  {
-    const API_STATE& state = GetApiState(pDC);
+    const API_STATE&    state   = GetApiState(pDC);
      const SWR_TS_STATE& tsState = state.tsState;
      void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
      SWR_ASSERT(gt_pTessellationThreadData);
  
-    HANDLE tsCtx = TSInitCtx(
-        tsState.domain,
-        tsState.partitioning,
-        tsState.tsOutputTopology,
-        gt_pTessellationThreadData->pTxCtx,
-        gt_pTessellationThreadData->tsCtxSize);
+    HANDLE tsCtx = TSInitCtx(tsState.domain,
+                             tsState.partitioning,
+                             tsState.tsOutputTopology,
+                             gt_pTessellationThreadData->pTxCtx,
+                             gt_pTessellationThreadData->tsCtxSize);
      if (tsCtx == nullptr)
      {
-        gt_pTessellationThreadData->pTxCtx = AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64);
-        tsCtx = TSInitCtx(
-            tsState.domain,
-            tsState.partitioning,
-            tsState.tsOutputTopology,
-            gt_pTessellationThreadData->pTxCtx,
-            gt_pTessellationThreadData->tsCtxSize);
+        gt_pTessellationThreadData->pTxCtx =
+            AlignedMalloc(gt_pTessellationThreadData->tsCtxSize, 64);
+        tsCtx = TSInitCtx(tsState.domain,
+                          tsState.partitioning,
+                          tsState.tsOutputTopology,
+                          gt_pTessellationThreadData->pTxCtx,
+                          gt_pTessellationThreadData->tsCtxSize);
      }
      SWR_ASSERT(tsCtx);
  
@@ -1201,10 +1303,17 @@ static void TessellationStages(
      {
          switch (tsState.postDSTopology)
          {
-        case TOP_TRIANGLE_LIST: pfnClipFunc = ClipTriangles_simd16; break;
-        case TOP_LINE_LIST:     pfnClipFunc = ClipLines_simd16; break;
-        case TOP_POINT_LIST:    pfnClipFunc = ClipPoints_simd16; break;
-        default: SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
+        case TOP_TRIANGLE_LIST:
+            pfnClipFunc = ClipTriangles_simd16;
+            break;
+        case TOP_LINE_LIST:
+            pfnClipFunc = ClipLines_simd16;
+            break;
+        case TOP_POINT_LIST:
+            pfnClipFunc = ClipPoints_simd16;
+            break;
+        default:
+            SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
          }
      }
  
@@ -1214,17 +1323,24 @@ static void TessellationStages(
      {
          switch (tsState.postDSTopology)
          {
-        case TOP_TRIANGLE_LIST: pfnClipFunc = ClipTriangles; break;
-        case TOP_LINE_LIST:     pfnClipFunc = ClipLines; break;
-        case TOP_POINT_LIST:    pfnClipFunc = ClipPoints; break;
-        default: SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
+        case TOP_TRIANGLE_LIST:
+            pfnClipFunc = ClipTriangles;
+            break;
+        case TOP_LINE_LIST:
+            pfnClipFunc = ClipLines;
+            break;
+        case TOP_POINT_LIST:
+            pfnClipFunc = ClipPoints;
+            break;
+        default:
+            SWR_INVALID("Unexpected DS output topology: %d", tsState.postDSTopology);
          }
      }
  
  #endif
      SWR_HS_CONTEXT& hsContext = gt_pTessellationThreadData->hsContext;
-    hsContext.pCPout = gt_pTessellationThreadData->patchData;
-    hsContext.PrimitiveID = primID;
+    hsContext.pCPout          = gt_pTessellationThreadData->patchData;
+    hsContext.PrimitiveID     = primID;
  
      uint32_t numVertsPerPrim = NumVertsPerPrim(pa.binTopology, false);
      // Max storage for one attribute for an entire simdprimitive
@@ -1266,7 +1382,7 @@ static void TessellationStages(
      for (uint32_t p = 0; p < numPrims; ++p)
      {
          // Run Tessellator
-        SWR_TS_TESSELLATED_DATA tsData = { 0 };
+        SWR_TS_TESSELLATED_DATA tsData = {0};
          RDTSC_BEGIN(FETessellation, pDC->drawId);
          TSTessellate(tsCtx, hsContext.pCPout[p].tessFactors, tsData);
          AR_EVENT(TessPrimCount(1));
@@ -1279,17 +1395,20 @@ static void TessellationStages(
          SWR_ASSERT(tsData.NumDomainPoints);
  
          // Allocate DS Output memory
-        uint32_t requiredDSVectorInvocations = AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH;
+        uint32_t requiredDSVectorInvocations =
+            AlignUp(tsData.NumDomainPoints, KNOB_SIMD_WIDTH) / KNOB_SIMD_WIDTH;
  #if USE_SIMD16_FRONTEND
-        size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) * tsState.dsAllocationSize;      // simd8 -> simd16, padding
+        size_t requiredAllocSize = sizeof(simdvector) * RoundUpEven(requiredDSVectorInvocations) *
+                                   tsState.dsAllocationSize; // simd8 -> simd16, padding
  #else
          size_t requiredDSOutputVectors = requiredDSVectorInvocations * tsState.dsAllocationSize;
-        size_t requiredAllocSize = sizeof(simdvector) * requiredDSOutputVectors;
+        size_t requiredAllocSize       = sizeof(simdvector) * requiredDSOutputVectors;
  #endif
          if (requiredAllocSize > gt_pTessellationThreadData->dsOutputAllocSize)
          {
              AlignedFree(gt_pTessellationThreadData->pDSOutput);
-            gt_pTessellationThreadData->pDSOutput = (simdscalar*)AlignedMalloc(requiredAllocSize, 64);
+            gt_pTessellationThreadData->pDSOutput =
+                (simdscalar*)AlignedMalloc(requiredAllocSize, 64);
              gt_pTessellationThreadData->dsOutputAllocSize = requiredAllocSize;
          }
          SWR_ASSERT(gt_pTessellationThreadData->pDSOutput);
@@ -1301,21 +1420,22 @@ static void TessellationStages(
  
          // Run Domain Shader
          SWR_DS_CONTEXT dsContext;
-        dsContext.PrimitiveID = pPrimId[p];
-        dsContext.pCpIn = &hsContext.pCPout[p];
-        dsContext.pDomainU = (simdscalar*)tsData.pDomainPointsU;
-        dsContext.pDomainV = (simdscalar*)tsData.pDomainPointsV;
-        dsContext.pOutputData = gt_pTessellationThreadData->pDSOutput;
+        dsContext.PrimitiveID           = pPrimId[p];
+        dsContext.pCpIn                 = &hsContext.pCPout[p];
+        dsContext.pDomainU              = (simdscalar*)tsData.pDomainPointsU;
+        dsContext.pDomainV              = (simdscalar*)tsData.pDomainPointsV;
+        dsContext.pOutputData           = gt_pTessellationThreadData->pDSOutput;
          dsContext.outVertexAttribOffset = tsState.dsOutVtxAttribOffset;
  #if USE_SIMD16_FRONTEND
-        dsContext.vectorStride = RoundUpEven(requiredDSVectorInvocations);      // simd8 -> simd16
+        dsContext.vectorStride = RoundUpEven(requiredDSVectorInvocations); // simd8 -> simd16
  #else
-        dsContext.vectorStride = requiredDSVectorInvocations;
+        dsContext.vectorStride         = requiredDSVectorInvocations;
  #endif
  
          uint32_t dsInvocations = 0;
  
-        for (dsContext.vectorOffset = 0; dsContext.vectorOffset < requiredDSVectorInvocations; ++dsContext.vectorOffset)
+        for (dsContext.vectorOffset = 0; dsContext.vectorOffset < requiredDSVectorInvocations;
+             ++dsContext.vectorOffset)
          {
              dsContext.mask = GenerateMask(tsData.NumDomainPoints - dsInvocations);
  
@@ -1330,14 +1450,14 @@ static void TessellationStages(
          UPDATE_STAT_FE(DsInvocations, tsData.NumDomainPoints);
  
  #if USE_SIMD16_FRONTEND
-        SWR_ASSERT(IsEven(dsContext.vectorStride));                             // simd8 -> simd16
+        SWR_ASSERT(IsEven(dsContext.vectorStride)); // simd8 -> simd16
  
  #endif
          PA_TESS tessPa(
              pDC,
  #if USE_SIMD16_FRONTEND
-            reinterpret_cast<const simd16scalar *>(dsContext.pOutputData),      // simd8 -> simd16
-            dsContext.vectorStride / 2,                                         // simd8 -> simd16
+            reinterpret_cast<const simd16scalar*>(dsContext.pOutputData), // simd8 -> simd16
+            dsContext.vectorStride / 2,                                   // simd8 -> simd16
  #else
              dsContext.pOutputData,
              dsContext.vectorStride,
@@ -1352,29 +1472,37 @@ static void TessellationStages(
          while (tessPa.HasWork())
          {
  #if USE_SIMD16_FRONTEND
-            const uint32_t numPrims = tessPa.NumPrims();
+            const uint32_t numPrims    = tessPa.NumPrims();
              const uint32_t numPrims_lo = std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
-            const uint32_t numPrims_hi = std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
+            const uint32_t numPrims_hi =
+                std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
  
-            const simd16scalari primID = _simd16_set1_epi32(dsContext.PrimitiveID);
-            const simdscalari primID_lo = _simd16_extract_si(primID, 0);
-            const simdscalari primID_hi = _simd16_extract_si(primID, 1);
+            const simd16scalari primID    = _simd16_set1_epi32(dsContext.PrimitiveID);
+            const simdscalari   primID_lo = _simd16_extract_si(primID, 0);
+            const simdscalari   primID_hi = _simd16_extract_si(primID, 1);
  
  #endif
              if (HasGeometryShaderT::value)
              {
  #if USE_SIMD16_FRONTEND
                  tessPa.useAlternateOffset = false;
-                GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_lo, primID_lo);
+                GeometryShaderStage<HasStreamOutT, HasRastT>(
+                    pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_lo, primID_lo);
  
                  if (numPrims_hi)
                  {
                      tessPa.useAlternateOffset = true;
-                    GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_hi, primID_hi);
+                    GeometryShaderStage<HasStreamOutT, HasRastT>(
+                        pDC, workerId, tessPa, pGsBuffers, pSoPrimData, numPrims_hi, primID_hi);
                  }
  #else
                  GeometryShaderStage<HasStreamOutT, HasRastT>(
-                    pDC, workerId, tessPa, pGsBuffers, pSoPrimData, _simd_set1_epi32(dsContext.PrimitiveID));
+                    pDC,
+                    workerId,
+                    tessPa,
+                    pGsBuffers,
+                    pSoPrimData,
+                    _simd_set1_epi32(dsContext.PrimitiveID));
  #endif
              }
              else
@@ -1390,9 +1518,9 @@ static void TessellationStages(
                  if (HasRastT::value)
                  {
  #if USE_SIMD16_FRONTEND
-                    simd16vector    prim_simd16[3]; // Only deal with triangles, lines, or points
+                    simd16vector prim_simd16[3]; // Only deal with triangles, lines, or points
  #else
-                    simdvector      prim[3];        // Only deal with triangles, lines, or points
+                    simdvector prim[3]; // Only deal with triangles, lines, or points
  #endif
                      RDTSC_BEGIN(FEPAAssemble, pDC->drawId);
                      bool assemble =
@@ -1408,15 +1536,15 @@ static void TessellationStages(
  #if USE_SIMD16_FRONTEND
                      // Gather data from the SVG if provided.
                      simd16scalari vViewportIdx = SIMD16::setzero_si();
-                    simd16scalari vRtIdx = SIMD16::setzero_si();
-                    SIMD16::Vec4 svgAttrib[4];
+                    simd16scalari vRtIdx       = SIMD16::setzero_si();
+                    SIMD16::Vec4  svgAttrib[4];
  
-                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                    if (state.backendState.readViewportArrayIndex ||
+                        state.backendState.readRenderTargetArrayIndex)
                      {
                          tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
                      }
  
-
                      if (state.backendState.readViewportArrayIndex)
                      {
                          vViewportIdx = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
@@ -1432,20 +1560,29 @@ static void TessellationStages(
                      {
                          // OOB VPAI indices => forced to zero.
                          vViewportIdx = SIMD16::max_epi32(vViewportIdx, SIMD16::setzero_si());
-                        simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                        simd16scalari vNumViewports =
+                            SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
                          simd16scalari vClearMask = SIMD16::cmplt_epi32(vViewportIdx, vNumViewports);
-                        vViewportIdx = SIMD16::and_si(vClearMask, vViewportIdx);
+                        vViewportIdx             = SIMD16::and_si(vClearMask, vViewportIdx);
  
                          tessPa.useAlternateOffset = false;
-                        pfnClipFunc(pDC, tessPa, workerId, prim_simd16, GenMask(numPrims), primID, vViewportIdx, vRtIdx);
+                        pfnClipFunc(pDC,
+                                    tessPa,
+                                    workerId,
+                                    prim_simd16,
+                                    GenMask(numPrims),
+                                    primID,
+                                    vViewportIdx,
+                                    vRtIdx);
                      }
  #else
                      // Gather data from the SGV if provided.
                      simdscalari vViewportIdx = SIMD::setzero_si();
-                    simdscalari vRtIdx = SIMD::setzero_si();
-                    SIMD::Vec4 svgAttrib[4];
+                    simdscalari vRtIdx       = SIMD::setzero_si();
+                    SIMD::Vec4  svgAttrib[4];
  
-                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                    if (state.backendState.readViewportArrayIndex ||
+                        state.backendState.readRenderTargetArrayIndex)
                      {
                          tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
                      }
@@ -1456,18 +1593,24 @@ static void TessellationStages(
  
                          // OOB VPAI indices => forced to zero.
                          vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
-                        simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-                        simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
-                        vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
+                        simdscalari vNumViewports  = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                        simdscalari vClearMask     = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
+                        vViewportIdx               = SIMD::and_si(vClearMask, vViewportIdx);
                          tessPa.viewportArrayActive = true;
                      }
                      if (state.backendState.readRenderTargetArrayIndex)
                      {
-                        vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                        vRtIdx               = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
                          tessPa.rtArrayActive = true;
                      }
-                    pfnClipFunc(pDC, tessPa, workerId, prim,
-                        GenMask(tessPa.NumPrims()), _simd_set1_epi32(dsContext.PrimitiveID), vViewportIdx, vRtIdx);
+                    pfnClipFunc(pDC,
+                                tessPa,
+                                workerId,
+                                prim,
+                                GenMask(tessPa.NumPrims()),
+                                _simd_set1_epi32(dsContext.PrimitiveID),
+                                vViewportIdx,
+                                vRtIdx);
  #endif
                  }
              }
@@ -1475,7 +1618,7 @@ static void TessellationStages(
              tessPa.NextPrim();
  
          } // while (tessPa.HasWork())
-    } // for (uint32_t p = 0; p < numPrims; ++p)
+    }     // for (uint32_t p = 0; p < numPrims; ++p)
  
  #if USE_SIMD16_FRONTEND
      if (gt_pTessellationThreadData->pDSOutput != nullptr)
@@ -1489,8 +1632,8 @@ static void TessellationStages(
      TSDestroyCtx(tsCtx);
  }
  
-THREAD PA_STATE::SIMDVERTEX *gpVertexStore = nullptr;
-THREAD uint32_t gVertexStoreSize = 0;
+THREAD PA_STATE::SIMDVERTEX* gpVertexStore = nullptr;
+THREAD uint32_t gVertexStoreSize           = 0;
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief FE handler for SwrDraw.
@@ -1503,20 +1646,14 @@ THREAD uint32_t gVertexStoreSize = 0;
  /// @param pDC - pointer to draw context.
  /// @param workerId - thread's worker id.
  /// @param pUserData - Pointer to DRAW_WORK
-template <
-    typename IsIndexedT,
-    typename IsCutIndexEnabledT,
-    typename HasTessellationT,
-    typename HasGeometryShaderT,
-    typename HasStreamOutT,
-    typename HasRastT>
-void ProcessDraw(
-    SWR_CONTEXT *pContext,
-    DRAW_CONTEXT *pDC,
-    uint32_t workerId,
-    void *pUserData)
+template <typename IsIndexedT,
+          typename IsCutIndexEnabledT,
+          typename HasTessellationT,
+          typename HasGeometryShaderT,
+          typename HasStreamOutT,
+          typename HasRastT>
+void ProcessDraw(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData)
  {
-
  #if KNOB_ENABLE_TOSS_POINTS
      if (KNOB_TOSS_QUEUE_FE)
      {
@@ -1528,8 +1665,8 @@ void ProcessDraw(
  
      void* pWorkerData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
-    DRAW_WORK&          work = *(DRAW_WORK*)pUserData;
-    const API_STATE&    state = GetApiState(pDC);
+    DRAW_WORK&       work  = *(DRAW_WORK*)pUserData;
+    const API_STATE& state = GetApiState(pDC);
  
      uint32_t indexSize = 0;
      uint32_t endVertex = work.numVerts;
@@ -1567,9 +1704,11 @@ void ProcessDraw(
      if (HasGeometryShaderT::value)
      {
  #if USE_SIMD16_FRONTEND
-        AllocateGsBuffers<SIMD512, KNOB_SIMD16_WIDTH>(pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
+        AllocateGsBuffers<SIMD512, KNOB_SIMD16_WIDTH>(
+            pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
  #else
-        AllocateGsBuffers<SIMD256, KNOB_SIMD_WIDTH>(pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
+        AllocateGsBuffers<SIMD256, KNOB_SIMD_WIDTH>(
+            pDC, state, NumVertsPerPrim(state.topology, true), &gsBuffers);
  #endif
      }
  
@@ -1599,14 +1738,14 @@ void ProcessDraw(
  #if USE_SIMD16_FRONTEND
      uint32_t simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simd16vector);
  #else
-    uint32_t simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simdvector);
+    uint32_t          simdVertexSizeBytes = state.frontendState.vsVertexSize * sizeof(simdvector);
  #endif
  
      SWR_ASSERT(vertexCount <= MAX_NUM_VERTS_PER_PRIM);
  
      // Compute storage requirements for vertex store
      // TODO: allocation needs to be rethought for better cut support
-    uint32_t numVerts = vertexCount + 2; // Need extra space for PA state machine
+    uint32_t numVerts        = vertexCount + 2; // Need extra space for PA state machine
      uint32_t vertexStoreSize = numVerts * simdVertexSizeBytes;
  
      // grow the vertex store for the PA as necessary
@@ -1620,30 +1759,36 @@ void ProcessDraw(
  
          SWR_ASSERT(gpVertexStore == nullptr);
  
-        gpVertexStore = reinterpret_cast<PA_STATE::SIMDVERTEX *>(AlignedMalloc(vertexStoreSize, 64));
+        gpVertexStore = reinterpret_cast<PA_STATE::SIMDVERTEX*>(AlignedMalloc(vertexStoreSize, 64));
          gVertexStoreSize = vertexStoreSize;
  
          SWR_ASSERT(gpVertexStore != nullptr);
      }
  
      // choose primitive assembler
-    
-    PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC, state.topology, work.numVerts, gpVertexStore, numVerts, state.frontendState.vsVertexSize, GetNumVerts(state.topology, 1));
-    PA_STATE& pa = paFactory.GetPA();
+
+    PA_FACTORY<IsIndexedT, IsCutIndexEnabledT> paFactory(pDC,
+                                                         state.topology,
+                                                         work.numVerts,
+                                                         gpVertexStore,
+                                                         numVerts,
+                                                         state.frontendState.vsVertexSize,
+                                                         GetNumVerts(state.topology, 1));
+    PA_STATE&                                  pa = paFactory.GetPA();
  
  #if USE_SIMD16_FRONTEND
  #if USE_SIMD16_SHADERS
-    simd16vertex        vin;
+    simd16vertex vin;
  #else
-    simdvertex          vin_lo;
-    simdvertex          vin_hi;
+    simdvertex vin_lo;
+    simdvertex vin_hi;
  #endif
-    SWR_VS_CONTEXT      vsContext_lo;
-    SWR_VS_CONTEXT      vsContext_hi;
+    SWR_VS_CONTEXT vsContext_lo;
+    SWR_VS_CONTEXT vsContext_hi;
  
  #if USE_SIMD16_SHADERS
-    vsContext_lo.pVin = reinterpret_cast<simdvertex *>(&vin);
-    vsContext_hi.pVin = reinterpret_cast<simdvertex *>(&vin);
+    vsContext_lo.pVin = reinterpret_cast<simdvertex*>(&vin);
+    vsContext_hi.pVin = reinterpret_cast<simdvertex*>(&vin);
  #else
      vsContext_lo.pVin = &vin_lo;
      vsContext_hi.pVin = &vin_hi;
@@ -1651,11 +1796,11 @@ void ProcessDraw(
      vsContext_lo.AlternateOffset = 0;
      vsContext_hi.AlternateOffset = 1;
  
-    SWR_FETCH_CONTEXT   fetchInfo_lo = { 0 };
+    SWR_FETCH_CONTEXT fetchInfo_lo = {0};
  
-    fetchInfo_lo.pStreams = &state.vertexBuffers[0];
+    fetchInfo_lo.pStreams      = &state.vertexBuffers[0];
      fetchInfo_lo.StartInstance = work.startInstance;
-    fetchInfo_lo.StartVertex = 0;
+    fetchInfo_lo.StartVertex   = 0;
  
      if (IsIndexedT::value)
      {
@@ -1674,27 +1819,30 @@ void ProcessDraw(
          fetchInfo_lo.StartVertex = work.startVertex;
      }
  
-    SWR_FETCH_CONTEXT   fetchInfo_hi = fetchInfo_lo;
+    SWR_FETCH_CONTEXT fetchInfo_hi = fetchInfo_lo;
  
-    const simd16scalari vScale = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+    const simd16scalari vScale =
+        _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
  
      for (uint32_t instanceNum = 0; instanceNum < work.numInstances; instanceNum++)
      {
-        uint32_t  i = 0;
+        uint32_t i = 0;
  
          simd16scalari vIndex;
  
          if (IsIndexedT::value)
          {
              fetchInfo_lo.xpIndices = work.xpIB;
-            fetchInfo_hi.xpIndices = fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize;    // 1/2 of KNOB_SIMD16_WIDTH
+            fetchInfo_hi.xpIndices =
+                fetchInfo_lo.xpIndices + KNOB_SIMD_WIDTH * indexSize; // 1/2 of KNOB_SIMD16_WIDTH
          }
          else
          {
              vIndex = _simd16_add_epi32(_simd16_set1_epi32(work.startVertexID), vScale);
  
              fetchInfo_lo.xpIndices = (gfxptr_t)&vIndex;
-            fetchInfo_hi.xpIndices = (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH
+            fetchInfo_hi.xpIndices =
+                (gfxptr_t)&vIndex + KNOB_SIMD_WIDTH * sizeof(int32_t); // 1/2 of KNOB_SIMD16_WIDTH
          }
  
          fetchInfo_lo.CurInstance = instanceNum;
@@ -1705,24 +1853,24 @@ void ProcessDraw(
  
          while (pa.HasWork())
          {
-            // GetNextVsOutput currently has the side effect of updating some PA state machine state.
-            // So we need to keep this outside of (i < endVertex) check.
+            // GetNextVsOutput currently has the side effect of updating some PA state machine
+            // state. So we need to keep this outside of (i < endVertex) check.
  
-            simdmask *pvCutIndices_lo = nullptr;
-            simdmask *pvCutIndices_hi = nullptr;
+            simdmask* pvCutIndices_lo = nullptr;
+            simdmask* pvCutIndices_hi = nullptr;
  
              if (IsIndexedT::value)
              {
                  // simd16mask <=> simdmask[2]
  
-                pvCutIndices_lo = &reinterpret_cast<simdmask *>(&pa.GetNextVsIndices())[0];
-                pvCutIndices_hi = &reinterpret_cast<simdmask *>(&pa.GetNextVsIndices())[1];
+                pvCutIndices_lo = &reinterpret_cast<simdmask*>(&pa.GetNextVsIndices())[0];
+                pvCutIndices_hi = &reinterpret_cast<simdmask*>(&pa.GetNextVsIndices())[1];
              }
  
-            simd16vertex &vout = pa.GetNextVsOutput();
+            simd16vertex& vout = pa.GetNextVsOutput();
  
-            vsContext_lo.pVout = reinterpret_cast<simdvertex *>(&vout);
-            vsContext_hi.pVout = reinterpret_cast<simdvertex *>(&vout);
+            vsContext_lo.pVout = reinterpret_cast<simdvertex*>(&vout);
+            vsContext_hi.pVout = reinterpret_cast<simdvertex*>(&vout);
  
              if (i < endVertex)
              {
@@ -1730,13 +1878,14 @@ void ProcessDraw(
                  {
                      fetchInfo_lo.xpLastIndex = fetchInfo_lo.xpIndices;
                      uint32_t offset;
-                    offset = std::min(endVertex-i, (uint32_t) KNOB_SIMD16_WIDTH);
+                    offset = std::min(endVertex - i, (uint32_t)KNOB_SIMD16_WIDTH);
                      offset *= 4; // convert from index to address
  #if USE_SIMD16_SHADERS
                      fetchInfo_lo.xpLastIndex += offset;
  #else
-                    fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t) KNOB_SIMD_WIDTH);
-                    uint32_t offset2 = std::min(offset, (uint32_t) KNOB_SIMD16_WIDTH)-KNOB_SIMD_WIDTH;
+                    fetchInfo_lo.xpLastIndex += std::min(offset, (uint32_t)KNOB_SIMD_WIDTH);
+                    uint32_t offset2 =
+                        std::min(offset, (uint32_t)KNOB_SIMD16_WIDTH) - KNOB_SIMD_WIDTH;
                      assert(offset >= 0);
                      fetchInfo_hi.xpLastIndex = fetchInfo_hi.xpIndices;
                      fetchInfo_hi.xpLastIndex += offset2;
@@ -1749,7 +1898,7 @@ void ProcessDraw(
  #else
                  state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_lo, vin_lo);
  
-                if ((i + KNOB_SIMD_WIDTH) < endVertex)  // 1/2 of KNOB_SIMD16_WIDTH
+                if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
                  {
                      state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo_hi, vin_hi);
                  }
@@ -1759,10 +1908,10 @@ void ProcessDraw(
                  // forward fetch generated vertex IDs to the vertex shader
  #if USE_SIMD16_SHADERS
  #if USE_SIMD16_VS
-                vsContext_lo.VertexID16 = _simd16_insert_si(
-                    vsContext_lo.VertexID16, fetchInfo_lo.VertexID, 0);
-                vsContext_lo.VertexID16 = _simd16_insert_si(
-                    vsContext_lo.VertexID16, fetchInfo_lo.VertexID2, 1);
+                vsContext_lo.VertexID16 =
+                    _simd16_insert_si(vsContext_lo.VertexID16, fetchInfo_lo.VertexID, 0);
+                vsContext_lo.VertexID16 =
+                    _simd16_insert_si(vsContext_lo.VertexID16, fetchInfo_lo.VertexID2, 1);
  #else
                  vsContext_lo.VertexID = fetchInfo_lo.VertexID;
                  vsContext_hi.VertexID = fetchInfo_lo.VertexID2;
@@ -1776,8 +1925,8 @@ void ProcessDraw(
  #if USE_SIMD16_VS
                  vsContext_lo.mask16 = GenerateMask16(endVertex - i);
  #else
-                vsContext_lo.mask = GenerateMask(endVertex - i);
-                vsContext_hi.mask = GenerateMask(endVertex - (i + KNOB_SIMD_WIDTH));
+                vsContext_lo.mask     = GenerateMask(endVertex - i);
+                vsContext_hi.mask     = GenerateMask(endVertex - (i + KNOB_SIMD_WIDTH));
  #endif
  
                  // forward cut mask to the PA
@@ -1806,7 +1955,7 @@ void ProcessDraw(
                      state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_lo);
                      AR_EVENT(VSStats(vsContext_lo.stats.numInstExecuted));
  
-                    if ((i + KNOB_SIMD_WIDTH) < endVertex)  // 1/2 of KNOB_SIMD16_WIDTH
+                    if ((i + KNOB_SIMD_WIDTH) < endVertex) // 1/2 of KNOB_SIMD16_WIDTH
                      {
                          state.pfnVertexFunc(GetPrivateState(pDC), pWorkerData, &vsContext_hi);
                          AR_EVENT(VSStats(vsContext_hi.stats.numInstExecuted));
@@ -1840,33 +1989,61 @@ void ProcessDraw(
                              UPDATE_STAT_FE(IaPrimitives, pa.NumPrims());
  
                              const uint32_t numPrims = pa.NumPrims();
-                            const uint32_t numPrims_lo = std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
-                            const uint32_t numPrims_hi = std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
+                            const uint32_t numPrims_lo =
+                                std::min<uint32_t>(numPrims, KNOB_SIMD_WIDTH);
+                            const uint32_t numPrims_hi =
+                                std::max<uint32_t>(numPrims, KNOB_SIMD_WIDTH) - KNOB_SIMD_WIDTH;
  
-                            const simd16scalari primID = pa.GetPrimID(work.startPrimID);
-                            const simdscalari primID_lo = _simd16_extract_si(primID, 0);
-                            const simdscalari primID_hi = _simd16_extract_si(primID, 1);
+                            const simd16scalari primID    = pa.GetPrimID(work.startPrimID);
+                            const simdscalari   primID_lo = _simd16_extract_si(primID, 0);
+                            const simdscalari   primID_hi = _simd16_extract_si(primID, 1);
  
                              if (HasTessellationT::value)
                              {
                                  pa.useAlternateOffset = false;
-                                TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_lo, primID_lo);
+                                TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
+                                    pDC,
+                                    workerId,
+                                    pa,
+                                    &gsBuffers,
+                                    pSoPrimData,
+                                    numPrims_lo,
+                                    primID_lo);
  
                                  if (numPrims_hi)
                                  {
                                      pa.useAlternateOffset = true;
-                                    TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_hi, primID_hi);
+                                    TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
+                                        pDC,
+                                        workerId,
+                                        pa,
+                                        &gsBuffers,
+                                        pSoPrimData,
+                                        numPrims_hi,
+                                        primID_hi);
                                  }
                              }
                              else if (HasGeometryShaderT::value)
                              {
                                  pa.useAlternateOffset = false;
-                                GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_lo, primID_lo);
+                                GeometryShaderStage<HasStreamOutT, HasRastT>(pDC,
+                                                                             workerId,
+                                                                             pa,
+                                                                             &gsBuffers,
+                                                                             pSoPrimData,
+                                                                             numPrims_lo,
+                                                                             primID_lo);
  
                                  if (numPrims_hi)
                                  {
                                      pa.useAlternateOffset = true;
-                                    GeometryShaderStage<HasStreamOutT, HasRastT>(pDC, workerId, pa, &gsBuffers, pSoPrimData, numPrims_hi, primID_hi);
+                                    GeometryShaderStage<HasStreamOutT, HasRastT>(pDC,
+                                                                                 workerId,
+                                                                                 pa,
+                                                                                 &gsBuffers,
+                                                                                 pSoPrimData,
+                                                                                 numPrims_hi,
+                                                                                 primID_hi);
                                  }
                              }
                              else
@@ -1884,14 +2061,14 @@ void ProcessDraw(
                                      // Gather data from the SVG if provided.
                                      simd16scalari vpai = SIMD16::setzero_si();
                                      simd16scalari rtai = SIMD16::setzero_si();
-                                    SIMD16::Vec4 svgAttrib[4];
+                                    SIMD16::Vec4  svgAttrib[4];
  
-                                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                    if (state.backendState.readViewportArrayIndex ||
+                                        state.backendState.readRenderTargetArrayIndex)
                                      {
                                          pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
                                      }
  
-
                                      if (state.backendState.readViewportArrayIndex)
                                      {
                                          vpai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
@@ -1899,19 +2076,29 @@ void ProcessDraw(
                                      }
                                      if (state.backendState.readRenderTargetArrayIndex)
                                      {
-                                        rtai = SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                                        rtai =
+                                            SIMD16::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
                                          pa.rtArrayActive = true;
                                      }
  
                                      {
                                          // OOB VPAI indices => forced to zero.
                                          vpai = SIMD16::max_epi32(vpai, SIMD16::setzero_si());
-                                        simd16scalari vNumViewports = SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-                                        simd16scalari vClearMask = SIMD16::cmplt_epi32(vpai, vNumViewports);
+                                        simd16scalari vNumViewports =
+                                            SIMD16::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                        simd16scalari vClearMask =
+                                            SIMD16::cmplt_epi32(vpai, vNumViewports);
                                          vpai = SIMD16::and_si(vClearMask, vpai);
  
                                          pa.useAlternateOffset = false;
-                                        pDC->pState->pfnProcessPrims_simd16(pDC, pa, workerId, prim_simd16, GenMask(numPrims), primID, vpai, rtai);
+                                        pDC->pState->pfnProcessPrims_simd16(pDC,
+                                                                            pa,
+                                                                            workerId,
+                                                                            prim_simd16,
+                                                                            GenMask(numPrims),
+                                                                            primID,
+                                                                            vpai,
+                                                                            rtai);
                                      }
                                  }
                              }
@@ -1937,12 +2124,12 @@ void ProcessDraw(
      }
  
  #else
-    SWR_VS_CONTEXT      vsContext;
-    SWR_FETCH_CONTEXT   fetchInfo = { 0 };
+    SWR_VS_CONTEXT    vsContext;
+    SWR_FETCH_CONTEXT fetchInfo = {0};
  
-    fetchInfo.pStreams = &state.vertexBuffers[0];
+    fetchInfo.pStreams      = &state.vertexBuffers[0];
      fetchInfo.StartInstance = work.startInstance;
-    fetchInfo.StartVertex = 0;
+    fetchInfo.StartVertex   = 0;
  
      if (IsIndexedT::value)
      {
@@ -1950,7 +2137,8 @@ void ProcessDraw(
  
          // if the entire index buffer isn't being consumed, set the last index
          // so that fetches < a SIMD wide will be masked off
-        fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
+        fetchInfo.pLastIndex =
+            (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
          if (xpLastRequestedIndex < fetchInfo.pLastIndex)
          {
              fetchInfo.pLastIndex = xpLastRequestedIndex;
@@ -1961,13 +2149,13 @@ void ProcessDraw(
          fetchInfo.StartVertex = work.startVertex;
      }
  
-    const simdscalari   vScale = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+    const simdscalari vScale = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
  
      /// @todo: temporarily move instance loop in the FE to ensure SO ordering
      for (uint32_t instanceNum = 0; instanceNum < work.numInstances; instanceNum++)
      {
          simdscalari vIndex;
-        uint32_t  i = 0;
+        uint32_t    i = 0;
  
          if (IsIndexedT::value)
          {
@@ -1975,17 +2163,17 @@ void ProcessDraw(
          }
          else
          {
-            vIndex = _simd_add_epi32(_simd_set1_epi32(work.startVertexID), vScale);
+            vIndex             = _simd_add_epi32(_simd_set1_epi32(work.startVertexID), vScale);
              fetchInfo.pIndices = (const int32_t*)&vIndex;
          }
  
          fetchInfo.CurInstance = instanceNum;
-        vsContext.InstanceID = instanceNum;
+        vsContext.InstanceID  = instanceNum;
  
          while (pa.HasWork())
          {
-            // GetNextVsOutput currently has the side effect of updating some PA state machine state.
-            // So we need to keep this outside of (i < endVertex) check.
+            // GetNextVsOutput currently has the side effect of updating some PA state machine
+            // state. So we need to keep this outside of (i < endVertex) check.
              simdmask* pvCutIndices = nullptr;
              if (IsIndexedT::value)
              {
@@ -1993,12 +2181,11 @@ void ProcessDraw(
              }
  
              simdvertex& vout = pa.GetNextVsOutput();
-            vsContext.pVin = &vout;
-            vsContext.pVout = &vout;
+            vsContext.pVin   = &vout;
+            vsContext.pVout  = &vout;
  
              if (i < endVertex)
              {
-
                  // 1. Execute FS/VS for a single SIMD.
                  RDTSC_BEGIN(FEFetchShader, pDC->drawId);
                  state.pfnFetchFunc(GetPrivateState(pDC), pWorkerData, fetchInfo, vout);
@@ -2055,12 +2242,22 @@ void ProcessDraw(
                              if (HasTessellationT::value)
                              {
                                  TessellationStages<HasGeometryShaderT, HasStreamOutT, HasRastT>(
-                                    pDC, workerId, pa, &gsBuffers, pSoPrimData, pa.GetPrimID(work.startPrimID));
+                                    pDC,
+                                    workerId,
+                                    pa,
+                                    &gsBuffers,
+                                    pSoPrimData,
+                                    pa.GetPrimID(work.startPrimID));
                              }
                              else if (HasGeometryShaderT::value)
                              {
                                  GeometryShaderStage<HasStreamOutT, HasRastT>(
-                                    pDC, workerId, pa, &gsBuffers, pSoPrimData, pa.GetPrimID(work.startPrimID));
+                                    pDC,
+                                    workerId,
+                                    pa,
+                                    &gsBuffers,
+                                    pSoPrimData,
+                                    pa.GetPrimID(work.startPrimID));
                              }
                              else
                              {
@@ -2076,33 +2273,45 @@ void ProcessDraw(
  
                                      // Gather data from the SVG if provided.
                                      simdscalari vViewportIdx = SIMD::setzero_si();
-                                    simdscalari vRtIdx = SIMD::setzero_si();
-                                    SIMD::Vec4 svgAttrib[4];
+                                    simdscalari vRtIdx       = SIMD::setzero_si();
+                                    SIMD::Vec4  svgAttrib[4];
  
-                                    if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
+                                    if (state.backendState.readViewportArrayIndex ||
+                                        state.backendState.readRenderTargetArrayIndex)
                                      {
                                          pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
                                      }
  
                                      if (state.backendState.readViewportArrayIndex)
                                      {
-                                        vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
+                                        vViewportIdx =
+                                            SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
  
                                          // OOB VPAI indices => forced to zero.
-                                        vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
-                                        simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
-                                        simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
+                                        vViewportIdx =
+                                            SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
+                                        simdscalari vNumViewports =
+                                            SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
+                                        simdscalari vClearMask =
+                                            SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
                                          vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
                                          pa.viewportArrayActive = true;
                                      }
                                      if (state.backendState.readRenderTargetArrayIndex)
                                      {
-                                        vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
+                                        vRtIdx =
+                                            SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
                                          pa.rtArrayActive = true;
                                      }
  
-                                    pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
-                                        GenMask(pa.NumPrims()), pa.GetPrimID(work.startPrimID), vViewportIdx, vRtIdx);
+                                    pDC->pState->pfnProcessPrims(pDC,
+                                                                 pa,
+                                                                 workerId,
+                                                                 prim,
+                                                                 GenMask(pa.NumPrims()),
+                                                                 pa.GetPrimID(work.startPrimID),
+                                                                 vViewportIdx,
+                                                                 vRtIdx);
                                  }
                              }
                          }
@@ -2112,7 +2321,8 @@ void ProcessDraw(
  
              if (IsIndexedT::value)
              {
-                fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
+                fetchInfo.pIndices =
+                    (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
              }
              else
              {
@@ -2140,15 +2350,18 @@ struct FEDrawChooser
      }
  };
  
-
  // Selector for correct templated Draw front-end function
-PFN_FE_WORK_FUNC GetProcessDrawFunc(
-    bool IsIndexed,
-    bool IsCutIndexEnabled,
-    bool HasTessellation,
-    bool HasGeometryShader,
-    bool HasStreamOut,
-    bool HasRasterization)
+PFN_FE_WORK_FUNC GetProcessDrawFunc(bool IsIndexed,
+                                    bool IsCutIndexEnabled,
+                                    bool HasTessellation,
+                                    bool HasGeometryShader,
+                                    bool HasStreamOut,
+                                    bool HasRasterization)
  {
-    return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed, IsCutIndexEnabled, HasTessellation, HasGeometryShader, HasStreamOut, HasRasterization);
+    return TemplateArgUnroller<FEDrawChooser>::GetFunc(IsIndexed,
+                                                       IsCutIndexEnabled,
+                                                       HasTessellation,
+                                                       HasGeometryShader,
+                                                       HasStreamOut,
+                                                       HasRasterization);
  }
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.h b/src/gallium/drivers/swr/rasterizer/core/frontend.h

index 6a2ec8474f124c2a9ad29ceb305b302931e6ff12..38fe77e240de01fc54d5c409627adbdfedd4763a 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.h
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.h
@@ -1,38 +1,38 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file frontend.h
-*
-* @brief Definitions for Frontend which handles vertex processing,
-*        primitive assembly, clipping, binning, etc.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file frontend.h
+ *
+ * @brief Definitions for Frontend which handles vertex processing,
+ *        primitive assembly, clipping, binning, etc.
+ *
+ ******************************************************************************/
  #pragma once
  #include "context.h"
  #include "common/simdintrin.h"
  #include <type_traits>
  
  // Calculates the A and B coefficients for the 3 edges of the triangle
-// 
+//
  // maths for edge equations:
  //   standard form of a line in 2d
  //   Ax + By + C = 0
@@ -40,14 +40,14 @@
  //   B = x1 - x0
  //   C = x0y1 - x1y0
  INLINE
-void triangleSetupAB(const __m128 vX, const __m128 vY, __m128 & vA, __m128 & vB)
+void triangleSetupAB(const __m128 vX, const __m128 vY, __m128& vA, __m128& vB)
  {
      // vYsub = y1 y2 y0 dc
      __m128 vYsub = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(3, 0, 2, 1));
      // vY =    y0 y1 y2 dc
      vA = _mm_sub_ps(vY, vYsub);
  
-    // Result: 
+    // Result:
      // A[0] = y0 - y1
      // A[1] = y1 - y2
      // A[2] = y2 - y0
@@ -57,28 +57,31 @@ void triangleSetupAB(const __m128 vX, const __m128 vY, __m128 & vA, __m128 & vB)
      // vX =    x0 x1 x2 dc
      vB = _mm_sub_ps(vXsub, vX);
  
-    // Result: 
+    // Result:
      // B[0] = x1 - x0
      // B[1] = x2 - x1
      // B[2] = x0 - x2
  }
  
  INLINE
-void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i & vA, __m128i & vB)
+void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i& vA, __m128i& vB)
  {
      // generate edge equations
      // A = y0 - y1
      // B = x1 - x0
      // C = x0y1 - x1y0
      __m128i vYsub = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 2, 1));
-    vA = _mm_sub_epi32(vY, vYsub);
+    vA            = _mm_sub_epi32(vY, vYsub);
  
      __m128i vXsub = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 2, 1));
-    vB = _mm_sub_epi32(vXsub, vX);
+    vB            = _mm_sub_epi32(vXsub, vX);
  }
  
  INLINE
-void triangleSetupABIntVertical(const simdscalari vX[3], const simdscalari vY[3], simdscalari (&vA)[3], simdscalari (&vB)[3])
+void triangleSetupABIntVertical(const simdscalari vX[3],
+                                const simdscalari vY[3],
+                                simdscalari (&vA)[3],
+                                simdscalari (&vB)[3])
  {
      // A = y0 - y1
      // B = x1 - x0
@@ -93,7 +96,10 @@ void triangleSetupABIntVertical(const simdscalari vX[3], const simdscalari vY[3]
  
  #if ENABLE_AVX512_SIMD16
  INLINE
-void triangleSetupABIntVertical(const simd16scalari vX[3], const simd16scalari vY[3], simd16scalari(&vA)[3], simd16scalari(&vB)[3])
+void triangleSetupABIntVertical(const simd16scalari vX[3],
+                                const simd16scalari vY[3],
+                                simd16scalari (&vA)[3],
+                                simd16scalari (&vB)[3])
  {
      // A = y0 - y1
      // B = x1 - x0
@@ -112,7 +118,7 @@ void triangleSetupABIntVertical(const simd16scalari vX[3], const simd16scalari v
  // Px = x0-x2, Py = y0-y2
  // Qx = x1-x2, Qy = y1-y2
  //       |Px Qx|
-// det = |     | = PxQy - PyQx 
+// det = |     | = PxQy - PyQx
  //       |Py Qy|
  // simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
  //               try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
@@ -127,37 +133,39 @@ float calcDeterminantInt(const __m128i vA, const __m128i vB)
      // vBShuf = [B2, B0, B1, B0]
      __m128i vBShuf = _mm_shuffle_epi32(vB, _MM_SHUFFLE(0, 1, 0, 2));
      // vMul = [A1*B2, B1*A2]
-    __m128i vMul   = _mm_mul_epi32(vAShuf, vBShuf);
+    __m128i vMul = _mm_mul_epi32(vAShuf, vBShuf);
  
      // shuffle upper to lower
      // vMul2 = [B1*A2, B1*A2]
      __m128i vMul2 = _mm_shuffle_epi32(vMul, _MM_SHUFFLE(3, 2, 3, 2));
-    //vMul = [A1*B2 - B1*A2]
+    // vMul = [A1*B2 - B1*A2]
      vMul = _mm_sub_epi64(vMul, vMul2);
  
      int64_t result;
      _mm_store_sd((double*)&result, _mm_castsi128_pd(vMul));
  
      double dResult = (double)result;
-    dResult = dResult * (1.0 / FIXED_POINT16_SCALE);
+    dResult        = dResult * (1.0 / FIXED_POINT16_SCALE);
  
      return (float)dResult;
  }
  
  INLINE
-void calcDeterminantIntVertical(const simdscalari vA[3], const simdscalari vB[3], simdscalari *pvDet)
+void calcDeterminantIntVertical(const simdscalari vA[3],
+                                const simdscalari vB[3],
+                                simdscalari*      pvDet)
  {
      // refer to calcDeterminantInt comment for calculation explanation
  
      // A1*B2
-    simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]);     // 0 0 1 1 4 4 5 5
-    simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]);     // 2 2 3 3 6 6 7 7
+    simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]); // 0 0 1 1 4 4 5 5
+    simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]); // 2 2 3 3 6 6 7 7
  
      simdscalari vB2Lo = _simd_unpacklo_epi32(vB[2], vB[2]);
      simdscalari vB2Hi = _simd_unpackhi_epi32(vB[2], vB[2]);
  
-    simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo);        // 0 1 4 5
-    simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi);        // 2 3 6 7
+    simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo); // 0 1 4 5
+    simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi); // 2 3 6 7
  
      // B1*A2
      simdscalari vA2Lo = _simd_unpacklo_epi32(vA[2], vA[2]);
@@ -185,19 +193,22 @@ void calcDeterminantIntVertical(const simdscalari vA[3], const simdscalari vB[3]
  
  #if ENABLE_AVX512_SIMD16
  INLINE
-void calcDeterminantIntVertical(const simd16scalari vA[3], const simd16scalari vB[3], simd16scalari *pvDet)
+void calcDeterminantIntVertical(const simd16scalari vA[3],
+                                const simd16scalari vB[3],
+                                simd16scalari*      pvDet)
  {
      // refer to calcDeterminantInt comment for calculation explanation
  
      // A1*B2
-    simd16scalari vA1_lo = _simd16_unpacklo_epi32(vA[1], vA[1]);                // X 0 X 1 X 4 X 5 X 8 X 9 X C X D (32b)
-    simd16scalari vA1_hi = _simd16_unpackhi_epi32(vA[1], vA[1]);                // X 2 X 3 X 6 X 7 X A X B X E X F
+    simd16scalari vA1_lo =
+        _simd16_unpacklo_epi32(vA[1], vA[1]); // X 0 X 1 X 4 X 5 X 8 X 9 X C X D (32b)
+    simd16scalari vA1_hi = _simd16_unpackhi_epi32(vA[1], vA[1]); // X 2 X 3 X 6 X 7 X A X B X E X F
  
      simd16scalari vB2_lo = _simd16_unpacklo_epi32(vB[2], vB[2]);
      simd16scalari vB2_hi = _simd16_unpackhi_epi32(vB[2], vB[2]);
  
-    simd16scalari vA1B2_lo = _simd16_mul_epi32(vA1_lo, vB2_lo);                 // 0 1 4 5 8 9 C D (64b)
-    simd16scalari vA1B2_hi = _simd16_mul_epi32(vA1_hi, vB2_hi);                 // 2 3 6 7 A B E F
+    simd16scalari vA1B2_lo = _simd16_mul_epi32(vA1_lo, vB2_lo); // 0 1 4 5 8 9 C D (64b)
+    simd16scalari vA1B2_hi = _simd16_mul_epi32(vA1_hi, vB2_hi); // 2 3 6 7 A B E F
  
      // B1*A2
      simd16scalari vA2_lo = _simd16_unpacklo_epi32(vA[2], vA[2]);
@@ -210,32 +221,31 @@ void calcDeterminantIntVertical(const simd16scalari vA[3], const simd16scalari v
      simd16scalari vA2B1_hi = _simd16_mul_epi32(vA2_hi, vB1_hi);
  
      // A1*B2 - A2*B1
-    simd16scalari difflo = _simd16_sub_epi64(vA1B2_lo, vA2B1_lo);               // 0 1 4 5 8 9 C D (64b)
-    simd16scalari diffhi = _simd16_sub_epi64(vA1B2_hi, vA2B1_hi);               // 2 3 6 7 A B E F
+    simd16scalari difflo = _simd16_sub_epi64(vA1B2_lo, vA2B1_lo); // 0 1 4 5 8 9 C D (64b)
+    simd16scalari diffhi = _simd16_sub_epi64(vA1B2_hi, vA2B1_hi); // 2 3 6 7 A B E F
  
      // (1, 0, 1, 0) = 01 00 01 00 = 0x44, (3, 2, 3, 2) = 11 10 11 10 = 0xEE
-    simd16scalari templo = _simd16_permute2f128_si(difflo, diffhi, 0x44);       // 0 1 4 5 2 3 6 7 (64b)
-    simd16scalari temphi = _simd16_permute2f128_si(difflo, diffhi, 0xEE);       // 8 9 C D A B E F
+    simd16scalari templo = _simd16_permute2f128_si(difflo, diffhi, 0x44); // 0 1 4 5 2 3 6 7 (64b)
+    simd16scalari temphi = _simd16_permute2f128_si(difflo, diffhi, 0xEE); // 8 9 C D A B E F
  
      // (3, 1, 2, 0) = 11 01 10 00 = 0xD8
-    pvDet[0] = _simd16_permute2f128_si(templo, templo, 0xD8);                   // 0 1 2 3 4 5 6 7 (64b)
-    pvDet[1] = _simd16_permute2f128_si(temphi, temphi, 0xD8);                   // 8 9 A B C D E F
+    pvDet[0] = _simd16_permute2f128_si(templo, templo, 0xD8); // 0 1 2 3 4 5 6 7 (64b)
+    pvDet[1] = _simd16_permute2f128_si(temphi, temphi, 0xD8); // 8 9 A B C D E F
  }
  
  #endif
  INLINE
-void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128 &vB, __m128 &vC)
+void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128& vB, __m128& vC)
  {
      // C = -Ax - By
-    vC  = _mm_mul_ps(vA, vX);
-    __m128 vCy = _mm_mul_ps(vB, vY);    
-    vC  = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
-    vC  = _mm_sub_ps(vC, vCy);
+    vC         = _mm_mul_ps(vA, vX);
+    __m128 vCy = _mm_mul_ps(vB, vY);
+    vC         = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
+    vC         = _mm_sub_ps(vC, vCy);
  }
  
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simdvector* v, const SWR_VIEWPORT_MATRICES& vpMatrices)
  {
      simdscalar m00 = _simd_load1_ps(&vpMatrices.m00[0]);
      simdscalar m30 = _simd_load1_ps(&vpMatrices.m30[0]);
@@ -253,9 +263,8 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
  }
  
  #if USE_SIMD16_FRONTEND
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simd16vector* v, const SWR_VIEWPORT_MATRICES& vpMatrices)
  {
      const simd16scalar m00 = _simd16_broadcast_ss(&vpMatrices.m00[0]);
      const simd16scalar m30 = _simd16_broadcast_ss(&vpMatrices.m30[0]);
@@ -273,9 +282,10 @@ void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices
  }
  
  #endif
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari const &vViewportIdx)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simdvector*                  v,
+                              const SWR_VIEWPORT_MATRICES& vpMatrices,
+                              simdscalari const&           vViewportIdx)
  {
      // perform a gather of each matrix element based on the viewport array indexes
      simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
@@ -294,9 +304,10 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices,
  }
  
  #if USE_SIMD16_FRONTEND
-template<uint32_t NumVerts>
-INLINE
-void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simd16scalari const &vViewportIdx)
+template <uint32_t NumVerts>
+INLINE void viewportTransform(simd16vector*                v,
+                              const SWR_VIEWPORT_MATRICES& vpMatrices,
+                              simd16scalari const&         vViewportIdx)
  {
      // perform a gather of each matrix element based on the viewport array indexes
      const simd16scalar m00 = _simd16_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
@@ -316,7 +327,7 @@ void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices
  
  #endif
  INLINE
-void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
+void calcBoundingBoxInt(const __m128i& vX, const __m128i& vY, SWR_RECT& bbox)
  {
      // Need horizontal fp min here
      __m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
@@ -325,18 +336,17 @@ void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
      __m128i vY1 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 2, 0, 1));
      __m128i vY2 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 1, 2));
  
-
      __m128i vMinX = _mm_min_epi32(vX, vX1);
-            vMinX = _mm_min_epi32(vMinX, vX2);
+    vMinX         = _mm_min_epi32(vMinX, vX2);
  
      __m128i vMaxX = _mm_max_epi32(vX, vX1);
-            vMaxX = _mm_max_epi32(vMaxX, vX2);
+    vMaxX         = _mm_max_epi32(vMaxX, vX2);
  
      __m128i vMinY = _mm_min_epi32(vY, vY1);
-            vMinY = _mm_min_epi32(vMinY, vY2);
+    vMinY         = _mm_min_epi32(vMinY, vY2);
  
      __m128i vMaxY = _mm_max_epi32(vY, vY1);
-            vMaxY = _mm_max_epi32(vMaxY, vY2);
+    vMaxY         = _mm_max_epi32(vMaxY, vY2);
  
      bbox.xmin = _mm_extract_epi32(vMinX, 0);
      bbox.xmax = _mm_extract_epi32(vMaxX, 0);
@@ -345,54 +355,84 @@ void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
  }
  
  INLINE
-bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
+bool CanUseSimplePoints(DRAW_CONTEXT* pDC)
  {
      const API_STATE& state = GetApiState(pDC);
  
      return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X &&
-            state.rastState.pointSize == 1.0f &&
-            !state.rastState.pointParam &&
-            !state.rastState.pointSpriteEnable &&
-            !state.backendState.clipDistanceMask);
+            state.rastState.pointSize == 1.0f && !state.rastState.pointParam &&
+            !state.rastState.pointSpriteEnable && !state.backendState.clipDistanceMask);
  }
  
  INLINE
  bool vHasNaN(const __m128& vec)
  {
-    const __m128 result = _mm_cmpunord_ps(vec, vec);
-    const int32_t mask = _mm_movemask_ps(result);
+    const __m128  result = _mm_cmpunord_ps(vec, vec);
+    const int32_t mask   = _mm_movemask_ps(result);
      return (mask != 0);
  }
  
  uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
  uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
  
-
  // ProcessDraw front-end function.  All combinations of parameter values are available
-PFN_FE_WORK_FUNC GetProcessDrawFunc(
-    bool IsIndexed,
-    bool IsCutIndexEnabled,
-    bool HasTessellation,
-    bool HasGeometryShader,
-    bool HasStreamOut,
-    bool HasRasterization);
-
-void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
-void ProcessShutdown(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
+PFN_FE_WORK_FUNC GetProcessDrawFunc(bool IsIndexed,
+                                    bool IsCutIndexEnabled,
+                                    bool HasTessellation,
+                                    bool HasGeometryShader,
+                                    bool HasStreamOut,
+                                    bool HasRasterization);
+
+void ProcessClear(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
+void ProcessStoreTiles(SWR_CONTEXT*  pContext,
+                       DRAW_CONTEXT* pDC,
+                       uint32_t      workerId,
+                       void*         pUserData);
+void ProcessDiscardInvalidateTiles(SWR_CONTEXT*  pContext,
+                                   DRAW_CONTEXT* pDC,
+                                   uint32_t      workerId,
+                                   void*         pUserData);
+void ProcessSync(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
+void ProcessShutdown(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, void* pUserData);
  
  PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
  #if USE_SIMD16_FRONTEND
  PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
  #endif
  
-struct PA_STATE_BASE;  // forward decl
-void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
-void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari const &primID, simdscalari const &viewportIdx, simdscalari const &rtIdx);
+struct PA_STATE_BASE; // forward decl
+void BinPoints(DRAW_CONTEXT*      pDC,
+               PA_STATE&          pa,
+               uint32_t           workerId,
+               simdvector         prims[3],
+               uint32_t           primMask,
+               simdscalari const& primID,
+               simdscalari const& viewportIdx,
+               simdscalari const& rtIdx);
+void BinLines(DRAW_CONTEXT*      pDC,
+              PA_STATE&          pa,
+              uint32_t           workerId,
+              simdvector         prims[3],
+              uint32_t           primMask,
+              simdscalari const& primID,
+              simdscalari const& viewportIdx,
+              simdscalari const& rtIdx);
  #if USE_SIMD16_FRONTEND
-void SIMDCALL BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
-void SIMDCALL BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari const &primID, simd16scalari const &viewportIdx, simd16scalari const &rtIdx);
+void SIMDCALL BinPoints_simd16(DRAW_CONTEXT*        pDC,
+                               PA_STATE&            pa,
+                               uint32_t             workerId,
+                               simd16vector         prims[3],
+                               uint32_t             primMask,
+                               simd16scalari const& primID,
+                               simd16scalari const& viewportIdx,
+                               simd16scalari const& rtIdx);
+void SIMDCALL BinLines_simd16(DRAW_CONTEXT*        pDC,
+                              PA_STATE&            pa,
+                              uint32_t             workerId,
+                              simd16vector         prims[3],
+                              uint32_t             primMask,
+                              simd16scalari const& primID,
+                              simd16scalari const& viewportIdx,
+                              simd16scalari const& rtIdx);
  #endif
  
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h

index d88a3aac97c4fef95634a47eb3307b6c58547ae7..b52accbbab317ab89fa4eb1ecdc92fc68474794b 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
@@ -1,48 +1,48 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file knobs.h
-*
-* @brief Static (Compile-Time) Knobs for Core.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file knobs.h
+ *
+ * @brief Static (Compile-Time) Knobs for Core.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include <stdint.h>
  #include <gen_knobs.h>
  
-#define KNOB_ARCH_AVX    0
-#define KNOB_ARCH_AVX2   1
+#define KNOB_ARCH_AVX 0
+#define KNOB_ARCH_AVX2 1
  #define KNOB_ARCH_AVX512 2
  
  ///////////////////////////////////////////////////////////////////////////////
  // AVX512 Support
  ///////////////////////////////////////////////////////////////////////////////
  
-#define ENABLE_AVX512_SIMD16    1
-#define USE_8x2_TILE_BACKEND    1
-#define USE_SIMD16_FRONTEND     1
-#define USE_SIMD16_SHADERS      1   // requires USE_SIMD16_FRONTEND
-#define USE_SIMD16_VS           1   // requires USE_SIMD16_SHADERS
+#define ENABLE_AVX512_SIMD16 1
+#define USE_8x2_TILE_BACKEND 1
+#define USE_SIMD16_FRONTEND 1
+#define USE_SIMD16_SHADERS 1 // requires USE_SIMD16_FRONTEND
+#define USE_SIMD16_VS 1      // requires USE_SIMD16_SHADERS
  
  ///////////////////////////////////////////////////////////////////////////////
  // Architecture validation
@@ -89,49 +89,49 @@
  // Configuration knobs
  ///////////////////////////////////////////////////////////////////////////////
  // Maximum supported number of active vertex buffer streams
-#define KNOB_NUM_STREAMS                    32
+#define KNOB_NUM_STREAMS 32
  
  // Maximum supported active viewports and scissors
-#define KNOB_NUM_VIEWPORTS_SCISSORS         16
+#define KNOB_NUM_VIEWPORTS_SCISSORS 16
  
  // Guardband range used by the clipper
-#define KNOB_GUARDBAND_WIDTH                32768.0f
-#define KNOB_GUARDBAND_HEIGHT               32768.0f
+#define KNOB_GUARDBAND_WIDTH 32768.0f
+#define KNOB_GUARDBAND_HEIGHT 32768.0f
  
  ///////////////////////////////
  // Macro tile configuration
  ///////////////////////////////
  
  // raster tile dimensions
-#define KNOB_TILE_X_DIM                      8
-#define KNOB_TILE_X_DIM_SHIFT                3
-#define KNOB_TILE_Y_DIM                      8
-#define KNOB_TILE_Y_DIM_SHIFT                3
+#define KNOB_TILE_X_DIM 8
+#define KNOB_TILE_X_DIM_SHIFT 3
+#define KNOB_TILE_Y_DIM 8
+#define KNOB_TILE_Y_DIM_SHIFT 3
  
-// fixed macrotile pixel dimension for now, eventually will be 
+// fixed macrotile pixel dimension for now, eventually will be
  // dynamically set based on tile format and pixel size
-#define KNOB_MACROTILE_X_DIM                32
-#define KNOB_MACROTILE_Y_DIM                32
-#define KNOB_MACROTILE_X_DIM_FIXED_SHIFT    13
-#define KNOB_MACROTILE_Y_DIM_FIXED_SHIFT    13
-#define KNOB_MACROTILE_X_DIM_FIXED          (KNOB_MACROTILE_X_DIM << 8)
-#define KNOB_MACROTILE_Y_DIM_FIXED          (KNOB_MACROTILE_Y_DIM << 8)
-#define KNOB_MACROTILE_X_DIM_IN_TILES       (KNOB_MACROTILE_X_DIM >> KNOB_TILE_X_DIM_SHIFT)
-#define KNOB_MACROTILE_Y_DIM_IN_TILES       (KNOB_MACROTILE_Y_DIM >> KNOB_TILE_Y_DIM_SHIFT)
+#define KNOB_MACROTILE_X_DIM 32
+#define KNOB_MACROTILE_Y_DIM 32
+#define KNOB_MACROTILE_X_DIM_FIXED_SHIFT 13
+#define KNOB_MACROTILE_Y_DIM_FIXED_SHIFT 13
+#define KNOB_MACROTILE_X_DIM_FIXED (KNOB_MACROTILE_X_DIM << 8)
+#define KNOB_MACROTILE_Y_DIM_FIXED (KNOB_MACROTILE_Y_DIM << 8)
+#define KNOB_MACROTILE_X_DIM_IN_TILES (KNOB_MACROTILE_X_DIM >> KNOB_TILE_X_DIM_SHIFT)
+#define KNOB_MACROTILE_Y_DIM_IN_TILES (KNOB_MACROTILE_Y_DIM >> KNOB_TILE_Y_DIM_SHIFT)
  
  // total # of hot tiles available. This should be enough to
  // fully render a 16kx16k 128bpp render target
-#define KNOB_NUM_HOT_TILES_X                 256
-#define KNOB_NUM_HOT_TILES_Y                 256
-#define KNOB_COLOR_HOT_TILE_FORMAT           R32G32B32A32_FLOAT
-#define KNOB_DEPTH_HOT_TILE_FORMAT           R32_FLOAT
-#define KNOB_STENCIL_HOT_TILE_FORMAT         R8_UINT
+#define KNOB_NUM_HOT_TILES_X 256
+#define KNOB_NUM_HOT_TILES_Y 256
+#define KNOB_COLOR_HOT_TILE_FORMAT R32G32B32A32_FLOAT
+#define KNOB_DEPTH_HOT_TILE_FORMAT R32_FLOAT
+#define KNOB_STENCIL_HOT_TILE_FORMAT R8_UINT
  
  // Max scissor rectangle
-#define KNOB_MAX_SCISSOR_X                  KNOB_NUM_HOT_TILES_X * KNOB_MACROTILE_X_DIM
-#define KNOB_MAX_SCISSOR_Y                  KNOB_NUM_HOT_TILES_Y * KNOB_MACROTILE_Y_DIM
+#define KNOB_MAX_SCISSOR_X KNOB_NUM_HOT_TILES_X* KNOB_MACROTILE_X_DIM
+#define KNOB_MAX_SCISSOR_Y KNOB_NUM_HOT_TILES_Y* KNOB_MACROTILE_Y_DIM
  
-#if KNOB_SIMD_WIDTH==8 && KNOB_TILE_X_DIM < 4
+#if KNOB_SIMD_WIDTH == 8 && KNOB_TILE_X_DIM < 4
  #error "incompatible width/tile dimensions"
  #endif
  
@@ -160,14 +160,14 @@
  ///////////////////////////////////////////////////////////////////////////////
  // Optimization knobs
  ///////////////////////////////////////////////////////////////////////////////
-#define KNOB_USE_FAST_SRGB                     TRUE
+#define KNOB_USE_FAST_SRGB TRUE
  
  // enables cut-aware primitive assembler
-#define KNOB_ENABLE_CUT_AWARE_PA               TRUE
+#define KNOB_ENABLE_CUT_AWARE_PA TRUE
  
  // enables early rasterization (useful for small triangles)
  #if !defined(KNOB_ENABLE_EARLY_RAST)
-#define KNOB_ENABLE_EARLY_RAST                 1
+#define KNOB_ENABLE_EARLY_RAST 1
  #endif
  
  #if KNOB_ENABLE_EARLY_RAST
@@ -182,6 +182,5 @@
  
  // Set to 1 to use the dynamic KNOB_TOSS_XXXX knobs.
  #if !defined(KNOB_ENABLE_TOSS_POINTS)
-#define KNOB_ENABLE_TOSS_POINTS                 0
+#define KNOB_ENABLE_TOSS_POINTS 0
  #endif
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs_init.h b/src/gallium/drivers/swr/rasterizer/core/knobs_init.h

index 12c2a3031eafa2d42f0ed00a5b0105b3b5897484..f8797a8f2bc6eb4b485550f6961f0d5c0ef1672f 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs_init.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file knobs_init.h
-*
-* @brief Dynamic Knobs Initialization for Core.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file knobs_init.h
+ *
+ * @brief Dynamic Knobs Initialization for Core.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include <core/knobs.h>
@@ -37,9 +37,9 @@
  template <typename T>
  static inline void ConvertEnvToKnob(const char* pOverride, T& knobValue)
  {
-    uint32_t value = 0;
-    char* pStopped = nullptr;
-    value = strtoul(pOverride, &pStopped, 0);
+    uint32_t value    = 0;
+    char*    pStopped = nullptr;
+    value             = strtoul(pOverride, &pStopped, 0);
      if (pStopped != pOverride)
      {
          knobValue = static_cast<T>(value);
@@ -65,9 +65,9 @@ static inline void ConvertEnvToKnob(const char* pOverride, bool& knobValue)
      }
  
      // Try converting to a number and casting to bool
-    uint32_t value = 0;
-    char* pStopped = nullptr;
-    value = strtoul(pOverride, &pStopped, 0);
+    uint32_t value    = 0;
+    char*    pStopped = nullptr;
+    value             = strtoul(pOverride, &pStopped, 0);
      if (pStopped != pOverride)
      {
          knobValue = value != 0;
diff --git a/src/gallium/drivers/swr/rasterizer/core/multisample.h b/src/gallium/drivers/swr/rasterizer/core/multisample.h

index 2ca8c1b3e8da8493cc13d364db628370c7e0291a..3b23974a7f4fa3973b8ae19d4648a98c21f03266 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/multisample.h
+++ b/src/gallium/drivers/swr/rasterizer/core/multisample.h
@@ -1,28 +1,28 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file multisample.h
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file multisample.h
+ *
+ ******************************************************************************/
  
  #pragma once
  
@@ -36,225 +36,387 @@ typedef std::integral_constant<int, 1> SingleSampleT;
  INLINE
  SWR_MULTISAMPLE_COUNT GetSampleCount(uint32_t numSamples)
  {
-    switch(numSamples)
+    switch (numSamples)
      {
-    case 1: return SWR_MULTISAMPLE_1X;
-    case 2: return SWR_MULTISAMPLE_2X;
-    case 4: return SWR_MULTISAMPLE_4X;
-    case 8: return SWR_MULTISAMPLE_8X;
-    case 16: return SWR_MULTISAMPLE_16X;
-    default: assert(0); return SWR_MULTISAMPLE_1X;
+    case 1:
+        return SWR_MULTISAMPLE_1X;
+    case 2:
+        return SWR_MULTISAMPLE_2X;
+    case 4:
+        return SWR_MULTISAMPLE_4X;
+    case 8:
+        return SWR_MULTISAMPLE_8X;
+    case 16:
+        return SWR_MULTISAMPLE_16X;
+    default:
+        assert(0);
+        return SWR_MULTISAMPLE_1X;
      }
  }
  
  // hardcoded offsets based on Direct3d standard multisample positions
  // 8 x 8 pixel grid ranging from (0, 0) to (15, 15), with (0, 0) = UL pixel corner
  // coords are 0.8 fixed point offsets from (0, 0)
-template<SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
+template <SWR_MULTISAMPLE_COUNT sampleCount, bool isCenter = false>
  struct MultisampleTraits
  {
-    INLINE static float X(uint32_t sampleNum) = delete;
-    INLINE static float Y(uint32_t sampleNum) = delete;
-    INLINE static simdscalari FullSampleMask() = delete;
+    INLINE static float       X(uint32_t sampleNum) = delete;
+    INLINE static float       Y(uint32_t sampleNum) = delete;
+    INLINE static simdscalari FullSampleMask()      = delete;
  
      static const uint32_t numSamples = 0;
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_1X, false>
  {
-    INLINE static float X(uint32_t sampleNum) {return samplePosX[sampleNum];};
-    INLINE static float Y(uint32_t sampleNum) {return samplePosY[sampleNum];};
-    INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
+    INLINE static float       X(uint32_t sampleNum) { return samplePosX[sampleNum]; };
+    INLINE static float       Y(uint32_t sampleNum) { return samplePosY[sampleNum]; };
+    INLINE static simdscalari FullSampleMask() { return _simd_set1_epi32(0x1); };
  
-    static const uint32_t numSamples = 1;
-    static const uint32_t numCoverageSamples = 1;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
-    static constexpr uint32_t samplePosXi[1] = { 0x80 };
-    static constexpr uint32_t samplePosYi[1] = { 0x80 };
-    static constexpr float samplePosX[1] = { 0.5f };
-    static constexpr float samplePosY[1] = { 0.5f };
+    static const uint32_t              numSamples         = 1;
+    static const uint32_t              numCoverageSamples = 1;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_1X;
+    static constexpr uint32_t          samplePosXi[1]     = {0x80};
+    static constexpr uint32_t          samplePosYi[1]     = {0x80};
+    static constexpr float             samplePosX[1]      = {0.5f};
+    static constexpr float             samplePosY[1]      = {0.5f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_1X, true>
  {
-    INLINE static float X(uint32_t sampleNum) {return 0.5f;};
-    INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
-    INLINE static simdscalari FullSampleMask(){return _simd_set1_epi32(0x1);};
-    
-    static const uint32_t numSamples = 1;
-    static const uint32_t numCoverageSamples = 1;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_1X;
-    static constexpr uint32_t samplePosXi[1] = { 0x80 };
-    static constexpr uint32_t samplePosYi[1] = { 0x80 };
-    static constexpr float samplePosX[1] = { 0.5f };
-    static constexpr float samplePosY[1] = { 0.5f };
+    INLINE static float       X(uint32_t sampleNum) { return 0.5f; };
+    INLINE static float       Y(uint32_t sampleNum) { return 0.5f; };
+    INLINE static simdscalari FullSampleMask() { return _simd_set1_epi32(0x1); };
+
+    static const uint32_t              numSamples         = 1;
+    static const uint32_t              numCoverageSamples = 1;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_1X;
+    static constexpr uint32_t          samplePosXi[1]     = {0x80};
+    static constexpr uint32_t          samplePosYi[1]     = {0x80};
+    static constexpr float             samplePosX[1]      = {0.5f};
+    static constexpr float             samplePosY[1]      = {0.5f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_2X, false>
  {
-    INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
-    INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+    INLINE static float X(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosX[sampleNum];
+    };
+    INLINE static float Y(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosY[sampleNum];
+    };
      INLINE static simdscalari FullSampleMask()
      {
-         static const simdscalari mask =_simd_set1_epi32(0x3);
-         return mask;
+        static const simdscalari mask = _simd_set1_epi32(0x3);
+        return mask;
      }
  
-    static const uint32_t numSamples = 2;
-    static const uint32_t numCoverageSamples = 2;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
-    static constexpr uint32_t samplePosXi[2] = { 0xC0, 0x40 };
-    static constexpr uint32_t samplePosYi[2] = { 0xC0, 0x40 };
-    static constexpr float samplePosX[2] = {0.75f, 0.25f};
-    static constexpr float samplePosY[2] = {0.75f, 0.25f};
+    static const uint32_t              numSamples         = 2;
+    static const uint32_t              numCoverageSamples = 2;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_2X;
+    static constexpr uint32_t          samplePosXi[2]     = {0xC0, 0x40};
+    static constexpr uint32_t          samplePosYi[2]     = {0xC0, 0x40};
+    static constexpr float             samplePosX[2]      = {0.75f, 0.25f};
+    static constexpr float             samplePosY[2]      = {0.75f, 0.25f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_2X, true>
  {
-    INLINE static float X(uint32_t sampleNum) {return 0.5f;};
-    INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+    INLINE static float       X(uint32_t sampleNum) { return 0.5f; };
+    INLINE static float       Y(uint32_t sampleNum) { return 0.5f; };
      INLINE static simdscalari FullSampleMask()
      {
-         static const simdscalari mask =_simd_set1_epi32(0x3);
-         return mask;
+        static const simdscalari mask = _simd_set1_epi32(0x3);
+        return mask;
      }
-    static const uint32_t numSamples = 2;
-    static const uint32_t numCoverageSamples = 1;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_2X;
-    static constexpr uint32_t samplePosXi[2] = { 0x80 , 0x80 };
-    static constexpr uint32_t samplePosYi[2] = { 0x80 , 0x80 };
-    static constexpr float samplePosX[2] = { 0.5f, 0.5f };
-    static constexpr float samplePosY[2] = { 0.5f, 0.5f };
+    static const uint32_t              numSamples         = 2;
+    static const uint32_t              numCoverageSamples = 1;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_2X;
+    static constexpr uint32_t          samplePosXi[2]     = {0x80, 0x80};
+    static constexpr uint32_t          samplePosYi[2]     = {0x80, 0x80};
+    static constexpr float             samplePosX[2]      = {0.5f, 0.5f};
+    static constexpr float             samplePosY[2]      = {0.5f, 0.5f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_4X, false>
  {
-    INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
-    INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+    INLINE static float X(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosX[sampleNum];
+    };
+    INLINE static float Y(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosY[sampleNum];
+    };
      INLINE static simdscalari FullSampleMask()
      {
          static const simdscalari mask = _simd_set1_epi32(0xF);
          return mask;
      }
  
-    static const uint32_t numSamples = 4;
-    static const uint32_t numCoverageSamples = 4;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
-    static constexpr uint32_t samplePosXi[4] = { 0x60, 0xE0, 0x20, 0xA0 };
-    static constexpr uint32_t samplePosYi[4] = { 0x20, 0x60, 0xA0, 0xE0 };
-    static constexpr float samplePosX[4] = { 0.375f, 0.875f, 0.125f, 0.625f };
-    static constexpr float samplePosY[4] = { 0.125f, 0.375f, 0.625f, 0.875f };
+    static const uint32_t              numSamples         = 4;
+    static const uint32_t              numCoverageSamples = 4;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_4X;
+    static constexpr uint32_t          samplePosXi[4]     = {0x60, 0xE0, 0x20, 0xA0};
+    static constexpr uint32_t          samplePosYi[4]     = {0x20, 0x60, 0xA0, 0xE0};
+    static constexpr float             samplePosX[4]      = {0.375f, 0.875f, 0.125f, 0.625f};
+    static constexpr float             samplePosY[4]      = {0.125f, 0.375f, 0.625f, 0.875f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_4X, true>
  {
-    INLINE static float X(uint32_t sampleNum) {return 0.5f;};
-    INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+    INLINE static float       X(uint32_t sampleNum) { return 0.5f; };
+    INLINE static float       Y(uint32_t sampleNum) { return 0.5f; };
      INLINE static simdscalari FullSampleMask()
      {
          static const simdscalari mask = _simd_set1_epi32(0xF);
          return mask;
      }
  
-    static const uint32_t numSamples = 4;
-    static const uint32_t numCoverageSamples = 1;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_4X;
-    static constexpr uint32_t samplePosXi[4] = { 0x80, 0x80, 0x80, 0x80 };
-    static constexpr uint32_t samplePosYi[4] = { 0x80, 0x80, 0x80, 0x80 };
-    static constexpr float samplePosX[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
-    static constexpr float samplePosY[4] = { 0.5f, 0.5f, 0.5f, 0.5f };
+    static const uint32_t              numSamples         = 4;
+    static const uint32_t              numCoverageSamples = 1;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_4X;
+    static constexpr uint32_t          samplePosXi[4]     = {0x80, 0x80, 0x80, 0x80};
+    static constexpr uint32_t          samplePosYi[4]     = {0x80, 0x80, 0x80, 0x80};
+    static constexpr float             samplePosX[4]      = {0.5f, 0.5f, 0.5f, 0.5f};
+    static constexpr float             samplePosY[4]      = {0.5f, 0.5f, 0.5f, 0.5f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_8X, false>
  {
-    INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
-    INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+    INLINE static float X(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosX[sampleNum];
+    };
+    INLINE static float Y(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosY[sampleNum];
+    };
      INLINE static simdscalari FullSampleMask()
      {
          static const simdscalari mask = _simd_set1_epi32(0xFF);
          return mask;
      }
  
-    static const uint32_t numSamples = 8;
-    static const uint32_t numCoverageSamples = 8;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
-    static constexpr uint32_t samplePosXi[8] = { 0x90, 0x70, 0xD0, 0x50, 0x30, 0x10, 0xB0, 0xF0 };
-    static constexpr uint32_t samplePosYi[8] = { 0x50, 0xB0, 0x90, 0x30, 0xD0, 0x70, 0xF0, 0x10 };
-    static constexpr float samplePosX[8] = { 0.5625f, 0.4375f, 0.8125f, 0.3125f, 0.1875f, 0.0625f, 0.6875f, 0.9375f };
-    static constexpr float samplePosY[8] = { 0.3125f, 0.6875f, 0.5625f, 0.1875f, 0.8125f, 0.4375f, 0.9375f, 0.0625f };
+    static const uint32_t              numSamples         = 8;
+    static const uint32_t              numCoverageSamples = 8;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_8X;
+    static constexpr uint32_t samplePosXi[8] = {0x90, 0x70, 0xD0, 0x50, 0x30, 0x10, 0xB0, 0xF0};
+    static constexpr uint32_t samplePosYi[8] = {0x50, 0xB0, 0x90, 0x30, 0xD0, 0x70, 0xF0, 0x10};
+    static constexpr float    samplePosX[8]  = {
+        0.5625f, 0.4375f, 0.8125f, 0.3125f, 0.1875f, 0.0625f, 0.6875f, 0.9375f};
+    static constexpr float samplePosY[8] = {
+        0.3125f, 0.6875f, 0.5625f, 0.1875f, 0.8125f, 0.4375f, 0.9375f, 0.0625f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_8X, true>
  {
-    INLINE static float X(uint32_t sampleNum) {return 0.5f;};
-    INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+    INLINE static float       X(uint32_t sampleNum) { return 0.5f; };
+    INLINE static float       Y(uint32_t sampleNum) { return 0.5f; };
      INLINE static simdscalari FullSampleMask()
      {
          static const simdscalari mask = _simd_set1_epi32(0xFF);
          return mask;
      }
-    static const uint32_t numSamples = 8;
-    static const uint32_t numCoverageSamples = 1;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_8X;
-    static constexpr uint32_t samplePosXi[8] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
-    static constexpr uint32_t samplePosYi[8] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
-    static constexpr float samplePosX[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
-    static constexpr float samplePosY[8] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
+    static const uint32_t              numSamples         = 8;
+    static const uint32_t              numCoverageSamples = 1;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_8X;
+    static constexpr uint32_t samplePosXi[8] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
+    static constexpr uint32_t samplePosYi[8] = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80};
+    static constexpr float    samplePosX[8]  = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
+    static constexpr float    samplePosY[8]  = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_16X, false>
  {
-    INLINE static float X(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosX[sampleNum]; };
-    INLINE static float Y(uint32_t sampleNum) { SWR_ASSERT(sampleNum < numSamples); return samplePosY[sampleNum]; };
+    INLINE static float X(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosX[sampleNum];
+    };
+    INLINE static float Y(uint32_t sampleNum)
+    {
+        SWR_ASSERT(sampleNum < numSamples);
+        return samplePosY[sampleNum];
+    };
      INLINE static simdscalari FullSampleMask()
      {
          static const simdscalari mask = _simd_set1_epi32(0xFFFF);
          return mask;
      }
  
-    static const uint32_t numSamples = 16;
-    static const uint32_t numCoverageSamples = 16;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
-    static constexpr uint32_t samplePosXi[16] = { 0x90, 0x70, 0x50, 0xC0, 0x30, 0xA0, 0xD0, 0xB0, 0x60, 0x80, 0x40, 0x20, 0x00, 0xF0, 0xE0, 0x10 };
-    static constexpr uint32_t samplePosYi[16] = { 0x90, 0x50, 0xA0, 0x70, 0x60, 0xD0, 0xB0, 0x30, 0xE0, 0x10, 0x20, 0xC0, 0x80, 0x40, 0xF0, 0x00 };
-    static constexpr float samplePosX[16] = { 0.5625f, 0.4375f, 0.3125f, 0.7500f, 0.1875f, 0.6250f, 0.8125f, 0.6875f, 0.3750f, 0.5000f, 0.2500f, 0.1250f, 0.0000f, 0.9375f, 0.8750f, 0.0625f };
-    static constexpr float samplePosY[16] = { 0.5625f, 0.3125f, 0.6250f, 0.4375f, 0.3750f, 0.8125f, 0.6875f, 0.1875f, 0.8750f, 0.0625f, 0.1250f, 0.7500f, 0.5000f, 0.2500f, 0.9375f, 0.0000f };
+    static const uint32_t              numSamples         = 16;
+    static const uint32_t              numCoverageSamples = 16;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_16X;
+    static constexpr uint32_t          samplePosXi[16]    = {0x90,
+                                                 0x70,
+                                                 0x50,
+                                                 0xC0,
+                                                 0x30,
+                                                 0xA0,
+                                                 0xD0,
+                                                 0xB0,
+                                                 0x60,
+                                                 0x80,
+                                                 0x40,
+                                                 0x20,
+                                                 0x00,
+                                                 0xF0,
+                                                 0xE0,
+                                                 0x10};
+    static constexpr uint32_t          samplePosYi[16]    = {0x90,
+                                                 0x50,
+                                                 0xA0,
+                                                 0x70,
+                                                 0x60,
+                                                 0xD0,
+                                                 0xB0,
+                                                 0x30,
+                                                 0xE0,
+                                                 0x10,
+                                                 0x20,
+                                                 0xC0,
+                                                 0x80,
+                                                 0x40,
+                                                 0xF0,
+                                                 0x00};
+    static constexpr float             samplePosX[16]     = {0.5625f,
+                                             0.4375f,
+                                             0.3125f,
+                                             0.7500f,
+                                             0.1875f,
+                                             0.6250f,
+                                             0.8125f,
+                                             0.6875f,
+                                             0.3750f,
+                                             0.5000f,
+                                             0.2500f,
+                                             0.1250f,
+                                             0.0000f,
+                                             0.9375f,
+                                             0.8750f,
+                                             0.0625f};
+    static constexpr float             samplePosY[16]     = {0.5625f,
+                                             0.3125f,
+                                             0.6250f,
+                                             0.4375f,
+                                             0.3750f,
+                                             0.8125f,
+                                             0.6875f,
+                                             0.1875f,
+                                             0.8750f,
+                                             0.0625f,
+                                             0.1250f,
+                                             0.7500f,
+                                             0.5000f,
+                                             0.2500f,
+                                             0.9375f,
+                                             0.0000f};
  };
  
-template<>
+template <>
  struct MultisampleTraits<SWR_MULTISAMPLE_16X, true>
  {
-    INLINE static float X(uint32_t sampleNum) {return 0.5f;};
-    INLINE static float Y(uint32_t sampleNum) {return 0.5f;};
+    INLINE static float       X(uint32_t sampleNum) { return 0.5f; };
+    INLINE static float       Y(uint32_t sampleNum) { return 0.5f; };
      INLINE static simdscalari FullSampleMask()
      {
          static const simdscalari mask = _simd_set1_epi32(0xFFFF);
          return mask;
      }
-    static const uint32_t numSamples = 16;
-    static const uint32_t numCoverageSamples = 1;
-    static const SWR_MULTISAMPLE_COUNT sampleCount = SWR_MULTISAMPLE_16X;
-    static constexpr uint32_t samplePosXi[16] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
-    static constexpr uint32_t samplePosYi[16] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
-    static constexpr float samplePosX[16] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
-    static constexpr float samplePosY[16] = { 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f };
+    static const uint32_t              numSamples         = 16;
+    static const uint32_t              numCoverageSamples = 1;
+    static const SWR_MULTISAMPLE_COUNT sampleCount        = SWR_MULTISAMPLE_16X;
+    static constexpr uint32_t          samplePosXi[16]    = {0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80};
+    static constexpr uint32_t          samplePosYi[16]    = {0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80,
+                                                 0x80};
+    static constexpr float             samplePosX[16]     = {0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f};
+    static constexpr float             samplePosY[16]     = {0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f,
+                                             0.5f};
  };
  
  INLINE
-bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MULTISAMPLE_POS& samplePos)
+bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount,
+                          const SWR_MULTISAMPLE_POS&  samplePos)
  {
      // detect if we're using standard or center sample patterns
      const uint32_t *standardPosX, *standardPosY;
-    switch(sampleCount)
+    switch (sampleCount)
      {
      case SWR_MULTISAMPLE_1X:
          standardPosX = MultisampleTraits<SWR_MULTISAMPLE_1X>::samplePosXi;
@@ -281,15 +443,15 @@ bool isNonStandardPattern(const SWR_MULTISAMPLE_COUNT sampleCount, const SWR_MUL
      }
  
      // scan sample pattern for standard or center
-    uint32_t numSamples = GetNumSamples(sampleCount);
-    bool bIsStandard = true;
-    if(numSamples > 1)
+    uint32_t numSamples  = GetNumSamples(sampleCount);
+    bool     bIsStandard = true;
+    if (numSamples > 1)
      {
-        for(uint32_t i = 0; i < numSamples; i++)
+        for (uint32_t i = 0; i < numSamples; i++)
          {
-            bIsStandard = (standardPosX[i] == samplePos.Xi(i)) ||
-                (standardPosY[i] == samplePos.Yi(i));
-            if(!bIsStandard)
+            bIsStandard =
+                (standardPosX[i] == samplePos.Xi(i)) || (standardPosY[i] == samplePos.Yi(i));
+            if (!bIsStandard)
                  break;
          }
      }
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa.h b/src/gallium/drivers/swr/rasterizer/core/pa.h

index ab1d46de9d0095ef657502cb60d6841b72d1915e..e19c8ea4a798017410f2f9e1b3e6b108800a2336 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/pa.h
+++ b/src/gallium/drivers/swr/rasterizer/core/pa.h
@@ -1,33 +1,33 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file pa.h
-*
-* @brief Definitions for primitive assembly.
-*        N primitives are assembled at a time, where N is the SIMD width.
-*        A state machine, that is specific for a given topology, drives the
-*        assembly of vertices into triangles.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file pa.h
+ *
+ * @brief Definitions for primitive assembly.
+ *        N primitives are assembled at a time, where N is the SIMD width.
+ *        A state machine, that is specific for a given topology, drives the
+ *        assembly of vertices into triangles.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "frontend.h"
@@ -42,13 +42,13 @@ struct PA_STATE
          SIMD_WIDTH_LOG2 = 4
      };
  
-    typedef         simd16mask          SIMDMASK;
+    typedef simd16mask SIMDMASK;
  
-    typedef         simd16scalar        SIMDSCALAR;
-    typedef         simd16vector        SIMDVECTOR;
-    typedef         simd16vertex        SIMDVERTEX;
+    typedef simd16scalar SIMDSCALAR;
+    typedef simd16vector SIMDVECTOR;
+    typedef simd16vertex SIMDVERTEX;
  
-    typedef         simd16scalari       SIMDSCALARI;
+    typedef simd16scalari SIMDSCALARI;
  
  #else
      enum
@@ -58,36 +58,45 @@ struct PA_STATE
          SIMD_WIDTH_LOG2 = 3
      };
  
-    typedef         simdmask            SIMDMASK;
+    typedef simdmask SIMDMASK;
  
-    typedef         simdscalar          SIMDSCALAR;
-    typedef         simdvector          SIMDVECTOR;
-    typedef         simdvertex          SIMDVERTEX;
+    typedef simdscalar SIMDSCALAR;
+    typedef simdvector SIMDVECTOR;
+    typedef simdvertex SIMDVERTEX;
  
-    typedef         simdscalari         SIMDSCALARI;
+    typedef simdscalari SIMDSCALARI;
  
  #endif
-    DRAW_CONTEXT *pDC{ nullptr };       // draw context
-    uint8_t* pStreamBase{ nullptr };    // vertex stream
-    uint32_t streamSizeInVerts{ 0 };    // total size of the input stream in verts
-    uint32_t vertexStride{ 0 };         // stride of a vertex in simdvector units
+    DRAW_CONTEXT* pDC{nullptr};         // draw context
+    uint8_t*      pStreamBase{nullptr}; // vertex stream
+    uint32_t      streamSizeInVerts{0}; // total size of the input stream in verts
+    uint32_t      vertexStride{0};      // stride of a vertex in simdvector units
  
-    // The topology the binner will use. In some cases the FE changes the topology from the api state.
-    PRIMITIVE_TOPOLOGY binTopology{ TOP_UNKNOWN };
+    // The topology the binner will use. In some cases the FE changes the topology from the api
+    // state.
+    PRIMITIVE_TOPOLOGY binTopology{TOP_UNKNOWN};
  
  #if ENABLE_AVX512_SIMD16
-    bool useAlternateOffset{ false };
+    bool useAlternateOffset{false};
  #endif
  
-    bool viewportArrayActive{ false };
-    bool rtArrayActive { false };
-    uint32_t numVertsPerPrim{ 0 };
+    bool     viewportArrayActive{false};
+    bool     rtArrayActive{false};
+    uint32_t numVertsPerPrim{0};
  
-    PA_STATE(){}
-    PA_STATE(DRAW_CONTEXT *in_pDC, uint8_t* in_pStreamBase, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, uint32_t in_numVertsPerPrim) :
-        pDC(in_pDC), pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts), vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim) {}
+    PA_STATE() {}
+    PA_STATE(DRAW_CONTEXT* in_pDC,
+             uint8_t*      in_pStreamBase,
+             uint32_t      in_streamSizeInVerts,
+             uint32_t      in_vertexStride,
+             uint32_t      in_numVertsPerPrim) :
+        pDC(in_pDC),
+        pStreamBase(in_pStreamBase), streamSizeInVerts(in_streamSizeInVerts),
+        vertexStride(in_vertexStride), numVertsPerPrim(in_numVertsPerPrim)
+    {
+    }
  
-    virtual bool HasWork() = 0;
+    virtual bool        HasWork()                                    = 0;
      virtual simdvector& GetSimdVector(uint32_t index, uint32_t slot) = 0;
  #if ENABLE_AVX512_SIMD16
      virtual simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot) = 0;
@@ -96,14 +105,14 @@ struct PA_STATE
  #if ENABLE_AVX512_SIMD16
      virtual bool Assemble(uint32_t slot, simd16vector verts[]) = 0;
  #endif
-    virtual void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[]) = 0;
-    virtual bool NextPrim() = 0;
-    virtual SIMDVERTEX& GetNextVsOutput() = 0;
-    virtual bool GetNextStreamOutput() = 0;
-    virtual SIMDMASK& GetNextVsIndices() = 0;
-    virtual uint32_t NumPrims() = 0;
-    virtual void Reset() = 0;
-    virtual SIMDSCALARI GetPrimID(uint32_t startID) = 0;
+    virtual void        AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[]) = 0;
+    virtual bool        NextPrim()                                                             = 0;
+    virtual SIMDVERTEX& GetNextVsOutput()                                                      = 0;
+    virtual bool        GetNextStreamOutput()                                                  = 0;
+    virtual SIMDMASK&   GetNextVsIndices()                                                     = 0;
+    virtual uint32_t    NumPrims()                                                             = 0;
+    virtual void        Reset()                                                                = 0;
+    virtual SIMDSCALARI GetPrimID(uint32_t startID)                                            = 0;
  };
  
  // The Optimized PA is a state machine that assembles triangles from vertex shader simd
@@ -117,69 +126,77 @@ struct PA_STATE
  //                1.    We call this the current and previous simd vertex.
  //                2.    The SSE simd is 4-wide which is not a multiple of 3 needed for triangles. In
  //                    order to assemble the second triangle, for a triangle list, we'll need the
-//                    last vertex from the previous simd and the first 2 vertices from the current simd.
+//                    last vertex from the previous simd and the first 2 vertices from the current
+//                    simd.
  //                3. At times the PA can assemble multiple triangles from the 2 simd vertices.
  //
  // This optimized PA is not cut aware, so only should be used by non-indexed draws or draws without
  // cuts
  struct PA_STATE_OPT : public PA_STATE
  {
-    uint32_t numPrims{ 0 };              // Total number of primitives for draw.
-    uint32_t numPrimsComplete{ 0 };      // Total number of complete primitives.
+    uint32_t numPrims{0};         // Total number of primitives for draw.
+    uint32_t numPrimsComplete{0}; // Total number of complete primitives.
  
-    uint32_t numSimdPrims{ 0 };          // Number of prims in current simd.
+    uint32_t numSimdPrims{0}; // Number of prims in current simd.
  
-    uint32_t cur{ 0 };                   // index to current VS output.
-    uint32_t prev{ 0 };                  // index to prev VS output. Not really needed in the state.
-    const uint32_t first{ 0 };           // index to first VS output. Used for tri fan and line loop.
+    uint32_t       cur{0};   // index to current VS output.
+    uint32_t       prev{0};  // index to prev VS output. Not really needed in the state.
+    const uint32_t first{0}; // index to first VS output. Used for tri fan and line loop.
  
-    uint32_t counter{ 0 };               // state counter
-    bool reset{ false };                 // reset state
+    uint32_t counter{0};   // state counter
+    bool     reset{false}; // reset state
  
-    uint32_t primIDIncr{ 0 };            // how much to increment for each vector (typically vector / {1, 2})
+    uint32_t    primIDIncr{0}; // how much to increment for each vector (typically vector / {1, 2})
      SIMDSCALARI primID;
  
-    typedef bool(*PFN_PA_FUNC)(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
+    typedef bool (*PFN_PA_FUNC)(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[]);
  #if ENABLE_AVX512_SIMD16
-    typedef bool(*PFN_PA_FUNC_SIMD16)(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
+    typedef bool (*PFN_PA_FUNC_SIMD16)(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[]);
  #endif
-    typedef void(*PFN_PA_SINGLE_FUNC)(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[]);
+    typedef void (*PFN_PA_SINGLE_FUNC)(PA_STATE_OPT& pa,
+                                       uint32_t      slot,
+                                       uint32_t      primIndex,
+                                       simd4scalar   verts[]);
  
-    PFN_PA_FUNC        pfnPaFunc{ nullptr };        // PA state machine function for assembling 4 triangles.
+    PFN_PA_FUNC pfnPaFunc{nullptr}; // PA state machine function for assembling 4 triangles.
  #if ENABLE_AVX512_SIMD16
-    PFN_PA_FUNC_SIMD16 pfnPaFunc_simd16{ nullptr };
+    PFN_PA_FUNC_SIMD16 pfnPaFunc_simd16{nullptr};
  #endif
-    PFN_PA_SINGLE_FUNC pfnPaSingleFunc{ nullptr };  // PA state machine function for assembling single triangle.
-    PFN_PA_FUNC        pfnPaFuncReset{ nullptr };   // initial state to set on reset
+    PFN_PA_SINGLE_FUNC pfnPaSingleFunc{
+        nullptr}; // PA state machine function for assembling single triangle.
+    PFN_PA_FUNC pfnPaFuncReset{nullptr}; // initial state to set on reset
  #if ENABLE_AVX512_SIMD16
-    PFN_PA_FUNC_SIMD16 pfnPaFuncReset_simd16{ nullptr };
+    PFN_PA_FUNC_SIMD16 pfnPaFuncReset_simd16{nullptr};
  #endif
  
      // state used to advance the PA when Next is called
-    PFN_PA_FUNC        pfnPaNextFunc{ nullptr };
+    PFN_PA_FUNC pfnPaNextFunc{nullptr};
  #if ENABLE_AVX512_SIMD16
-    PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16{ nullptr };
+    PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16{nullptr};
  #endif
-    uint32_t           nextNumSimdPrims{ 0 };
-    uint32_t           nextNumPrimsIncrement{ 0 };
-    bool               nextReset{ false };
-    bool               isStreaming{ false };
+    uint32_t nextNumSimdPrims{0};
+    uint32_t nextNumPrimsIncrement{0};
+    bool     nextReset{false};
+    bool     isStreaming{false};
  
-    SIMDMASK           junkIndices  { 0 };          // temporary index store for unused virtual function
+    SIMDMASK junkIndices{0}; // temporary index store for unused virtual function
  
      PA_STATE_OPT() {}
-    PA_STATE_OPT(DRAW_CONTEXT* pDC, uint32_t numPrims, uint8_t* pStream, uint32_t streamSizeInVerts,
-        uint32_t vertexStride, bool in_isStreaming, uint32_t numVertsPerPrim, PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN);
+    PA_STATE_OPT(DRAW_CONTEXT*      pDC,
+                 uint32_t           numPrims,
+                 uint8_t*           pStream,
+                 uint32_t           streamSizeInVerts,
+                 uint32_t           vertexStride,
+                 bool               in_isStreaming,
+                 uint32_t           numVertsPerPrim,
+                 PRIMITIVE_TOPOLOGY topo = TOP_UNKNOWN);
  
-    bool HasWork()
-    {
-        return (this->numPrimsComplete < this->numPrims) ? true : false;
-    }
+    bool HasWork() { return (this->numPrimsComplete < this->numPrims) ? true : false; }
  
      simdvector& GetSimdVector(uint32_t index, uint32_t slot)
      {
          SWR_ASSERT(slot < vertexStride);
-        uint32_t offset = index * vertexStride + slot;
+        uint32_t    offset     = index * vertexStride + slot;
          simdvector& vertexSlot = ((simdvector*)pStreamBase)[offset];
          return vertexSlot;
      }
@@ -188,7 +205,7 @@ struct PA_STATE_OPT : public PA_STATE
      simd16vector& GetSimdVector_simd16(uint32_t index, uint32_t slot)
      {
          SWR_ASSERT(slot < vertexStride);
-        uint32_t offset = index * vertexStride + slot;
+        uint32_t      offset     = index * vertexStride + slot;
          simd16vector& vertexSlot = ((simd16vector*)pStreamBase)[offset];
          return vertexSlot;
      }
@@ -196,10 +213,7 @@ struct PA_STATE_OPT : public PA_STATE
  #endif
      // Assembles 4 triangles. Each simdvector is a single vertex from 4
      // triangles (xxxx yyyy zzzz wwww) and there are 3 verts per triangle.
-    bool Assemble(uint32_t slot, simdvector verts[])
-    {
-        return this->pfnPaFunc(*this, slot, verts);
-    }
+    bool Assemble(uint32_t slot, simdvector verts[]) { return this->pfnPaFunc(*this, slot, verts); }
  
  #if ENABLE_AVX512_SIMD16
      bool Assemble(uint32_t slot, simd16vector verts[])
@@ -239,12 +253,12 @@ struct PA_STATE_OPT : public PA_STATE
          else
          {
              this->counter = (this->reset) ? 0 : (this->counter + 1);
-            this->reset = false;
+            this->reset   = false;
          }
  
          if (!HasWork())
          {
-            morePrims = false;    // no more to do
+            morePrims = false; // no more to do
          }
  
          return morePrims;
@@ -259,15 +273,16 @@ struct PA_STATE_OPT : public PA_STATE
          {
              // prev undefined for first state
              prev = cur;
-            cur = counter;
+            cur  = counter;
          }
          else
          {
-            // swap/recycle last two simd verts for prev and cur, leave other simd verts intact in the buffer
+            // swap/recycle last two simd verts for prev and cur, leave other simd verts intact in
+            // the buffer
              uint32_t temp = prev;
  
              prev = cur;
-            cur = temp;
+            cur  = temp;
          }
  
          SWR_ASSERT(cur < numSimdVerts);
@@ -285,44 +300,46 @@ struct PA_STATE_OPT : public PA_STATE
      bool GetNextStreamOutput()
      {
          this->prev = this->cur;
-        this->cur = this->counter;
+        this->cur  = this->counter;
  
          return HasWork();
      }
  
      uint32_t NumPrims()
      {
-        return (this->numPrimsComplete + this->nextNumPrimsIncrement > this->numPrims) ?
-            (SIMD_WIDTH - (this->numPrimsComplete + this->nextNumPrimsIncrement - this->numPrims)) : SIMD_WIDTH;
+        return (this->numPrimsComplete + this->nextNumPrimsIncrement > this->numPrims)
+                   ? (SIMD_WIDTH -
+                      (this->numPrimsComplete + this->nextNumPrimsIncrement - this->numPrims))
+                   : SIMD_WIDTH;
      }
  
-    void SetNextState(PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
-        PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
-        uint32_t numSimdPrims = 0,
-        uint32_t numPrimsIncrement = 0,
-        bool reset = false)
+    void SetNextState(PA_STATE_OPT::PFN_PA_FUNC        pfnPaNextFunc,
+                      PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+                      uint32_t                         numSimdPrims      = 0,
+                      uint32_t                         numPrimsIncrement = 0,
+                      bool                             reset             = false)
      {
-        this->pfnPaNextFunc = pfnPaNextFunc;
-        this->nextNumSimdPrims = numSimdPrims;
+        this->pfnPaNextFunc         = pfnPaNextFunc;
+        this->nextNumSimdPrims      = numSimdPrims;
          this->nextNumPrimsIncrement = numPrimsIncrement;
-        this->nextReset = reset;
+        this->nextReset             = reset;
  
          this->pfnPaSingleFunc = pfnPaNextSingleFunc;
      }
  
  #if ENABLE_AVX512_SIMD16
      void SetNextState_simd16(PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
-        PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
-        PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
-        uint32_t numSimdPrims = 0,
-        uint32_t numPrimsIncrement = 0,
-        bool reset = false)
-    {
-        this->pfnPaNextFunc_simd16 = pfnPaNextFunc_simd16;
-        this->pfnPaNextFunc = pfnPaNextFunc;
-        this->nextNumSimdPrims = numSimdPrims;
+                             PA_STATE_OPT::PFN_PA_FUNC        pfnPaNextFunc,
+                             PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+                             uint32_t                         numSimdPrims      = 0,
+                             uint32_t                         numPrimsIncrement = 0,
+                             bool                             reset             = false)
+    {
+        this->pfnPaNextFunc_simd16  = pfnPaNextFunc_simd16;
+        this->pfnPaNextFunc         = pfnPaNextFunc;
+        this->nextNumSimdPrims      = numSimdPrims;
          this->nextNumPrimsIncrement = numPrimsIncrement;
-        this->nextReset = reset;
+        this->nextReset             = reset;
  
          this->pfnPaSingleFunc = pfnPaNextSingleFunc;
      }
@@ -339,44 +356,54 @@ struct PA_STATE_OPT : public PA_STATE
          this->pfnPaFunc_simd16 = this->pfnPaFuncReset_simd16;
  #endif
          this->numPrimsComplete = 0;
-        this->numSimdPrims = 0;
-        this->cur = 0;
-        this->prev = 0;
-        this->counter = 0;
-        this->reset = false;
+        this->numSimdPrims     = 0;
+        this->cur              = 0;
+        this->prev             = 0;
+        this->counter          = 0;
+        this->reset            = false;
      }
  
      SIMDSCALARI GetPrimID(uint32_t startID)
      {
  #if USE_SIMD16_FRONTEND
-        return _simd16_add_epi32(this->primID,
+        return _simd16_add_epi32(
+            this->primID,
              _simd16_set1_epi32(startID + this->primIDIncr * (this->numPrimsComplete / SIMD_WIDTH)));
  #else
-        return _simd_add_epi32(this->primID,
+        return _simd_add_epi32(
+            this->primID,
              _simd_set1_epi32(startID + this->primIDIncr * (this->numPrimsComplete / SIMD_WIDTH)));
  #endif
      }
  };
  
  // helper C wrappers to avoid having to rewrite all the PA topology state functions
-INLINE void SetNextPaState(PA_STATE_OPT& pa, PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
-    PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
-    uint32_t numSimdPrims = 0,
-    uint32_t numPrimsIncrement = 0,
-    bool reset = false)
+INLINE void SetNextPaState(PA_STATE_OPT&                    pa,
+                           PA_STATE_OPT::PFN_PA_FUNC        pfnPaNextFunc,
+                           PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+                           uint32_t                         numSimdPrims      = 0,
+                           uint32_t                         numPrimsIncrement = 0,
+                           bool                             reset             = false)
  {
-    return pa.SetNextState(pfnPaNextFunc, pfnPaNextSingleFunc, numSimdPrims, numPrimsIncrement, reset);
+    return pa.SetNextState(
+        pfnPaNextFunc, pfnPaNextSingleFunc, numSimdPrims, numPrimsIncrement, reset);
  }
  
  #if ENABLE_AVX512_SIMD16
-INLINE void SetNextPaState_simd16(PA_STATE_OPT& pa, PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
-    PA_STATE_OPT::PFN_PA_FUNC pfnPaNextFunc,
-    PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
-    uint32_t numSimdPrims = 0,
-    uint32_t numPrimsIncrement = 0,
-    bool reset = false)
+INLINE void SetNextPaState_simd16(PA_STATE_OPT&                    pa,
+                                  PA_STATE_OPT::PFN_PA_FUNC_SIMD16 pfnPaNextFunc_simd16,
+                                  PA_STATE_OPT::PFN_PA_FUNC        pfnPaNextFunc,
+                                  PA_STATE_OPT::PFN_PA_SINGLE_FUNC pfnPaNextSingleFunc,
+                                  uint32_t                         numSimdPrims      = 0,
+                                  uint32_t                         numPrimsIncrement = 0,
+                                  bool                             reset             = false)
  {
-    return pa.SetNextState_simd16(pfnPaNextFunc_simd16, pfnPaNextFunc, pfnPaNextSingleFunc, numSimdPrims, numPrimsIncrement, reset);
+    return pa.SetNextState_simd16(pfnPaNextFunc_simd16,
+                                  pfnPaNextFunc,
+                                  pfnPaNextSingleFunc,
+                                  numSimdPrims,
+                                  numPrimsIncrement,
+                                  reset);
  }
  
  #endif
@@ -395,59 +422,70 @@ INLINE simd16vector& PaGetSimdVector_simd16(PA_STATE& pa, uint32_t index, uint32
  // Cut-aware primitive assembler.
  struct PA_STATE_CUT : public PA_STATE
  {
-    SIMDMASK* pCutIndices{ nullptr };    // cut indices buffer, 1 bit per vertex
-    uint32_t numVerts{ 0 };              // number of vertices available in buffer store
-    uint32_t numAttribs{ 0 };            // number of attributes
-    int32_t numRemainingVerts{ 0 };      // number of verts remaining to be assembled
-    uint32_t numVertsToAssemble{ 0 };    // total number of verts to assemble for the draw
+    SIMDMASK* pCutIndices{nullptr};  // cut indices buffer, 1 bit per vertex
+    uint32_t  numVerts{0};           // number of vertices available in buffer store
+    uint32_t  numAttribs{0};         // number of attributes
+    int32_t   numRemainingVerts{0};  // number of verts remaining to be assembled
+    uint32_t  numVertsToAssemble{0}; // total number of verts to assemble for the draw
  #if ENABLE_AVX512_SIMD16
-    OSALIGNSIMD16(uint32_t) indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH];    // current index buffer for gather
+    OSALIGNSIMD16(uint32_t)
+    indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
  #else
-    OSALIGNSIMD(uint32_t) indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH];    // current index buffer for gather
+    OSALIGNSIMD(uint32_t)
+    indices[MAX_NUM_VERTS_PER_PRIM][SIMD_WIDTH]; // current index buffer for gather
  #endif
-    SIMDSCALARI vOffsets[MAX_NUM_VERTS_PER_PRIM];           // byte offsets for currently assembling simd
-    uint32_t numPrimsAssembled{ 0 };     // number of primitives that are fully assembled
-    uint32_t headVertex{ 0 };            // current unused vertex slot in vertex buffer store
-    uint32_t tailVertex{ 0 };            // beginning vertex currently assembling
-    uint32_t curVertex{ 0 };             // current unprocessed vertex
-    uint32_t startPrimId{ 0 };           // starting prim id
-    SIMDSCALARI vPrimId;                 // vector of prim ID
-    bool needOffsets{ false };           // need to compute gather offsets for current SIMD
-    uint32_t vertsPerPrim{ 0 };
-    bool processCutVerts{ false };       // vertex indices with cuts should be processed as normal, otherwise they
-                                         // are ignored.  Fetch shader sends invalid verts on cuts that should be ignored
-                                         // while the GS sends valid verts for every index
-
-    simdvector      junkVector;          // junk simdvector for unimplemented API
+    SIMDSCALARI vOffsets[MAX_NUM_VERTS_PER_PRIM]; // byte offsets for currently assembling simd
+    uint32_t    numPrimsAssembled{0};             // number of primitives that are fully assembled
+    uint32_t    headVertex{0};      // current unused vertex slot in vertex buffer store
+    uint32_t    tailVertex{0};      // beginning vertex currently assembling
+    uint32_t    curVertex{0};       // current unprocessed vertex
+    uint32_t    startPrimId{0};     // starting prim id
+    SIMDSCALARI vPrimId;            // vector of prim ID
+    bool        needOffsets{false}; // need to compute gather offsets for current SIMD
+    uint32_t    vertsPerPrim{0};
+    bool        processCutVerts{
+        false}; // vertex indices with cuts should be processed as normal, otherwise they
+                // are ignored.  Fetch shader sends invalid verts on cuts that should be ignored
+                // while the GS sends valid verts for every index
+
+    simdvector junkVector; // junk simdvector for unimplemented API
  #if ENABLE_AVX512_SIMD16
-    simd16vector    junkVector_simd16;   // junk simd16vector for unimplemented API
+    simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
  #endif
  
      // Topology state tracking
      uint32_t vert[MAX_NUM_VERTS_PER_PRIM];
-    uint32_t curIndex{ 0 };
-    bool reverseWinding{ false };        // indicates reverse winding for strips
-    int32_t adjExtraVert{ 0 };           // extra vert uses for tristrip w/ adj
+    uint32_t curIndex{0};
+    bool     reverseWinding{false}; // indicates reverse winding for strips
+    int32_t  adjExtraVert{0};       // extra vert uses for tristrip w/ adj
  
-    typedef void(PA_STATE_CUT::* PFN_PA_FUNC)(uint32_t vert, bool finish);
-    PFN_PA_FUNC pfnPa{ nullptr };        // per-topology function that processes a single vert
+    typedef void (PA_STATE_CUT::*PFN_PA_FUNC)(uint32_t vert, bool finish);
+    PFN_PA_FUNC pfnPa{nullptr}; // per-topology function that processes a single vert
  
      PA_STATE_CUT() {}
-    PA_STATE_CUT(DRAW_CONTEXT* pDC, uint8_t* in_pStream, uint32_t in_streamSizeInVerts, uint32_t in_vertexStride, SIMDMASK* in_pIndices, uint32_t in_numVerts,
-        uint32_t in_numAttribs, PRIMITIVE_TOPOLOGY topo, bool in_processCutVerts, uint32_t in_numVertsPerPrim)
-        : PA_STATE(pDC, in_pStream, in_streamSizeInVerts, in_vertexStride, in_numVertsPerPrim)
-    {
-        numVerts = in_streamSizeInVerts;
-        numAttribs = in_numAttribs;
-        binTopology = topo;
-        needOffsets = false;
+    PA_STATE_CUT(DRAW_CONTEXT*      pDC,
+                 uint8_t*           in_pStream,
+                 uint32_t           in_streamSizeInVerts,
+                 uint32_t           in_vertexStride,
+                 SIMDMASK*          in_pIndices,
+                 uint32_t           in_numVerts,
+                 uint32_t           in_numAttribs,
+                 PRIMITIVE_TOPOLOGY topo,
+                 bool               in_processCutVerts,
+                 uint32_t           in_numVertsPerPrim) :
+        PA_STATE(pDC, in_pStream, in_streamSizeInVerts, in_vertexStride, in_numVertsPerPrim)
+    {
+        numVerts        = in_streamSizeInVerts;
+        numAttribs      = in_numAttribs;
+        binTopology     = topo;
+        needOffsets     = false;
          processCutVerts = in_processCutVerts;
  
          numVertsToAssemble = numRemainingVerts = in_numVerts;
-        numPrimsAssembled = 0;
+        numPrimsAssembled                      = 0;
          headVertex = tailVertex = curVertex = 0;
  
-        curIndex = 0;
+        curIndex    = 0;
          pCutIndices = in_pIndices;
          memset(indices, 0, sizeof(indices));
  #if USE_SIMD16_FRONTEND
@@ -456,49 +494,72 @@ struct PA_STATE_CUT : public PA_STATE
          vPrimId = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
  #endif
          reverseWinding = false;
-        adjExtraVert = -1;
+        adjExtraVert   = -1;
  
          bool gsEnabled = pDC->pState->state.gsState.gsEnable;
-        vertsPerPrim = NumVertsPerPrim(topo, gsEnabled);
+        vertsPerPrim   = NumVertsPerPrim(topo, gsEnabled);
  
          switch (topo)
          {
-        case TOP_TRIANGLE_LIST:     pfnPa = &PA_STATE_CUT::ProcessVertTriList; break;
-        case TOP_TRI_LIST_ADJ:      pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertTriListAdj : &PA_STATE_CUT::ProcessVertTriListAdjNoGs; break;
-        case TOP_TRIANGLE_STRIP:    pfnPa = &PA_STATE_CUT::ProcessVertTriStrip; break;
-        case TOP_TRI_STRIP_ADJ:     if (gsEnabled)
-                                    {
-                                        pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj < true > ;
-                                    }
-                                    else
-                                    {
-                                        pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj < false > ;
-                                    }
-                                    break;
-
-        case TOP_POINT_LIST:        pfnPa = &PA_STATE_CUT::ProcessVertPointList; break;
-        case TOP_LINE_LIST:         pfnPa = &PA_STATE_CUT::ProcessVertLineList; break;
-        case TOP_LINE_LIST_ADJ:     pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj : &PA_STATE_CUT::ProcessVertLineListAdjNoGs; break;
-        case TOP_LINE_STRIP:        pfnPa = &PA_STATE_CUT::ProcessVertLineStrip; break;
-        case TOP_LISTSTRIP_ADJ:     pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs; break;
-        case TOP_RECT_LIST:         pfnPa = &PA_STATE_CUT::ProcessVertRectList; break;
-        default: assert(0 && "Unimplemented topology");
+        case TOP_TRIANGLE_LIST:
+            pfnPa = &PA_STATE_CUT::ProcessVertTriList;
+            break;
+        case TOP_TRI_LIST_ADJ:
+            pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertTriListAdj
+                              : &PA_STATE_CUT::ProcessVertTriListAdjNoGs;
+            break;
+        case TOP_TRIANGLE_STRIP:
+            pfnPa = &PA_STATE_CUT::ProcessVertTriStrip;
+            break;
+        case TOP_TRI_STRIP_ADJ:
+            if (gsEnabled)
+            {
+                pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj<true>;
+            }
+            else
+            {
+                pfnPa = &PA_STATE_CUT::ProcessVertTriStripAdj<false>;
+            }
+            break;
+
+        case TOP_POINT_LIST:
+            pfnPa = &PA_STATE_CUT::ProcessVertPointList;
+            break;
+        case TOP_LINE_LIST:
+            pfnPa = &PA_STATE_CUT::ProcessVertLineList;
+            break;
+        case TOP_LINE_LIST_ADJ:
+            pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineListAdj
+                              : &PA_STATE_CUT::ProcessVertLineListAdjNoGs;
+            break;
+        case TOP_LINE_STRIP:
+            pfnPa = &PA_STATE_CUT::ProcessVertLineStrip;
+            break;
+        case TOP_LISTSTRIP_ADJ:
+            pfnPa = gsEnabled ? &PA_STATE_CUT::ProcessVertLineStripAdj
+                              : &PA_STATE_CUT::ProcessVertLineStripAdjNoGs;
+            break;
+        case TOP_RECT_LIST:
+            pfnPa = &PA_STATE_CUT::ProcessVertRectList;
+            break;
+        default:
+            assert(0 && "Unimplemented topology");
          }
      }
  
      SIMDVERTEX& GetNextVsOutput()
      {
          uint32_t vertexIndex = this->headVertex / SIMD_WIDTH;
-        this->headVertex = (this->headVertex + SIMD_WIDTH) % this->numVerts;
-        this->needOffsets = true;
-        SIMDVECTOR* pVertex = &((SIMDVECTOR*)pStreamBase)[vertexIndex * vertexStride];
+        this->headVertex     = (this->headVertex + SIMD_WIDTH) % this->numVerts;
+        this->needOffsets    = true;
+        SIMDVECTOR* pVertex  = &((SIMDVECTOR*)pStreamBase)[vertexIndex * vertexStride];
  
          return *(SIMDVERTEX*)pVertex;
      }
  
      SIMDMASK& GetNextVsIndices()
      {
-        uint32_t vertexIndex = this->headVertex / SIMD_WIDTH;
+        uint32_t  vertexIndex  = this->headVertex / SIMD_WIDTH;
          SIMDMASK* pCurCutIndex = this->pCutIndices + vertexIndex;
          return *pCurCutIndex;
      }
@@ -543,12 +604,12 @@ struct PA_STATE_CUT : public PA_STATE
  #endif
          this->numRemainingVerts = this->numVertsToAssemble;
          this->numPrimsAssembled = 0;
-        this->curIndex = 0;
-        this->curVertex = 0;
-        this->tailVertex = 0;
-        this->headVertex = 0;
-        this->reverseWinding = false;
-        this->adjExtraVert = -1;
+        this->curIndex          = 0;
+        this->curVertex         = 0;
+        this->tailVertex        = 0;
+        this->headVertex        = 0;
+        this->reverseWinding    = false;
+        this->adjExtraVert      = -1;
  #if USE_SIMD16_FRONTEND
          this->vPrimId = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
  #else
@@ -556,10 +617,7 @@ struct PA_STATE_CUT : public PA_STATE
  #endif
      }
  
-    bool HasWork()
-    {
-        return this->numRemainingVerts > 0 || this->adjExtraVert != -1;
-    }
+    bool HasWork() { return this->numRemainingVerts > 0 || this->adjExtraVert != -1; }
  
      bool IsVertexStoreFull()
      {
@@ -568,14 +626,14 @@ struct PA_STATE_CUT : public PA_STATE
  
      void RestartTopology()
      {
-        this->curIndex = 0;
+        this->curIndex       = 0;
          this->reverseWinding = false;
-        this->adjExtraVert = -1;
+        this->adjExtraVert   = -1;
      }
  
      bool IsCutIndex(uint32_t vertex)
      {
-        uint32_t vertexIndex = vertex / SIMD_WIDTH;
+        uint32_t vertexIndex  = vertex / SIMD_WIDTH;
          uint32_t vertexOffset = vertex & (SIMD_WIDTH - 1);
          return CheckBit(this->pCutIndices[vertexIndex], vertexOffset);
      }
@@ -584,9 +642,8 @@ struct PA_STATE_CUT : public PA_STATE
      // have assembled SIMD prims
      void ProcessVerts()
      {
-        while (this->numPrimsAssembled != SIMD_WIDTH &&
-            this->numRemainingVerts > 0 &&
-            this->curVertex != this->headVertex)
+        while (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts > 0 &&
+               this->curVertex != this->headVertex)
          {
              // if cut index, restart topology
              if (IsCutIndex(this->curVertex))
@@ -608,14 +665,16 @@ struct PA_STATE_CUT : public PA_STATE
              }
  
              this->curVertex++;
-            if (this->curVertex >= this->numVerts) {
-               this->curVertex = 0;
+            if (this->curVertex >= this->numVerts)
+            {
+                this->curVertex = 0;
              }
              this->numRemainingVerts--;
          }
  
          // special case last primitive for tri strip w/ adj
-        if (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts == 0 && this->adjExtraVert != -1)
+        if (this->numPrimsAssembled != SIMD_WIDTH && this->numRemainingVerts == 0 &&
+            this->adjExtraVert != -1)
          {
              (this->*pfnPa)(this->curVertex, true);
          }
@@ -625,7 +684,7 @@ struct PA_STATE_CUT : public PA_STATE
      {
          // done with current batch
          // advance tail to the current unsubmitted vertex
-        this->tailVertex = this->curVertex;
+        this->tailVertex        = this->curVertex;
          this->numPrimsAssembled = 0;
  #if USE_SIMD16_FRONTEND
          this->vPrimId = _simd16_add_epi32(vPrimId, _simd16_set1_epi32(SIMD_WIDTH));
@@ -648,32 +707,38 @@ struct PA_STATE_CUT : public PA_STATE
      {
          for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
          {
-            uint32_t vertexStrideBytes = vertexStride * sizeof(SIMDVECTOR);
-            SIMDSCALARI vIndices = *(SIMDSCALARI*)&this->indices[v][0];
+            uint32_t    vertexStrideBytes = vertexStride * sizeof(SIMDVECTOR);
+            SIMDSCALARI vIndices          = *(SIMDSCALARI*)&this->indices[v][0];
  
              // step to simdvertex batch
              const uint32_t simdShift = SIMD_WIDTH_LOG2;
  #if USE_SIMD16_FRONTEND
              SIMDSCALARI vVertexBatch = _simd16_srai_epi32(vIndices, simdShift);
-            this->vOffsets[v] = _simd16_mullo_epi32(vVertexBatch, _simd16_set1_epi32(vertexStrideBytes));
+            this->vOffsets[v] =
+                _simd16_mullo_epi32(vVertexBatch, _simd16_set1_epi32(vertexStrideBytes));
  #else
              SIMDSCALARI vVertexBatch = _simd_srai_epi32(vIndices, simdShift);
-            this->vOffsets[v] = _simd_mullo_epi32(vVertexBatch, _simd_set1_epi32(vertexStrideBytes));
+            this->vOffsets[v] =
+                _simd_mullo_epi32(vVertexBatch, _simd_set1_epi32(vertexStrideBytes));
  #endif
  
              // step to index
              const uint32_t simdMask = SIMD_WIDTH - 1;
  #if USE_SIMD16_FRONTEND
              SIMDSCALARI vVertexIndex = _simd16_and_si(vIndices, _simd16_set1_epi32(simdMask));
-            this->vOffsets[v] = _simd16_add_epi32(this->vOffsets[v], _simd16_mullo_epi32(vVertexIndex, _simd16_set1_epi32(sizeof(float))));
+            this->vOffsets[v]        = _simd16_add_epi32(
+                this->vOffsets[v],
+                _simd16_mullo_epi32(vVertexIndex, _simd16_set1_epi32(sizeof(float))));
  #else
              SIMDSCALARI vVertexIndex = _simd_and_si(vIndices, _simd_set1_epi32(simdMask));
-            this->vOffsets[v] = _simd_add_epi32(this->vOffsets[v], _simd_mullo_epi32(vVertexIndex, _simd_set1_epi32(sizeof(float))));
+            this->vOffsets[v] =
+                _simd_add_epi32(this->vOffsets[v],
+                                _simd_mullo_epi32(vVertexIndex, _simd_set1_epi32(sizeof(float))));
  #endif
          }
      }
  
-    bool Assemble(uint32_t slot, simdvector *verts)
+    bool Assemble(uint32_t slot, simdvector* verts)
      {
          // process any outstanding verts
          ProcessVerts();
@@ -684,7 +749,8 @@ struct PA_STATE_CUT : public PA_STATE
              return false;
          }
  
-        // cache off gather offsets given the current SIMD set of indices the first time we get an assemble
+        // cache off gather offsets given the current SIMD set of indices the first time we get an
+        // assemble
          if (this->needOffsets)
          {
              ComputeOffsets();
@@ -709,7 +775,8 @@ struct PA_STATE_CUT : public PA_STATE
                  simd16scalar temp = _simd16_i32gather_ps(pBase, offsets, 1);
  
                  // Assigning to a temporary first to avoid an MSVC 2017 compiler bug
-                simdscalar t = useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
+                simdscalar t =
+                    useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
                  verts[v].v[c] = t;
  #else
                  verts[v].v[c] = _simd_i32gather_ps(pBase, offsets, 1);
@@ -728,7 +795,7 @@ struct PA_STATE_CUT : public PA_STATE
                  // v1, v3 = v1 + v2 - v0, v2
                  // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
                  simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
-                temp = _simd16_sub_ps(temp, verts[1].v[c]);
+                temp              = _simd16_sub_ps(temp, verts[1].v[c]);
                  temp = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
                  verts[1].v[c] = _simd16_extract_ps(temp, 0);
              }
@@ -740,7 +807,7 @@ struct PA_STATE_CUT : public PA_STATE
  #if ENABLE_AVX512_SIMD16
      bool Assemble(uint32_t slot, simd16vector verts[])
      {
-        // process any outstanding verts
+       // process any outstanding verts
          ProcessVerts();
  
          // return false if we don't have enough prims assembled
@@ -749,7 +816,8 @@ struct PA_STATE_CUT : public PA_STATE
              return false;
          }
  
-        // cache off gather offsets given the current SIMD set of indices the first time we get an assemble
+        // cache off gather offsets given the current SIMD set of indices the first time we get an
+        // assemble
          if (this->needOffsets)
          {
              ComputeOffsets();
@@ -773,7 +841,8 @@ struct PA_STATE_CUT : public PA_STATE
  #if USE_SIMD16_FRONTEND
                  verts[v].v[c] = _simd16_i32gather_ps(pBase, offsets, 1);
  #else
-                verts[v].v[c] = _simd16_insert_ps(_simd16_setzero_ps(), _simd_i32gather_ps(pBase, offsets, 1), 0);
+                verts[v].v[c] = _simd16_insert_ps(
+                    _simd16_setzero_ps(), _simd_i32gather_ps(pBase, offsets, 1), 0);
  #endif
  
                  // move base to next component
@@ -789,8 +858,9 @@ struct PA_STATE_CUT : public PA_STATE
                  // v1, v3 = v1 + v2 - v0, v2
                  // v1 stored in verts[0], v0 stored in verts[1], v2 stored in verts[2]
                  simd16scalar temp = _simd16_add_ps(verts[0].v[c], verts[2].v[c]);
-                temp = _simd16_sub_ps(temp, verts[1].v[c]);
-                verts[1].v[c] = _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
+                temp              = _simd16_sub_ps(temp, verts[1].v[c]);
+                verts[1].v[c] =
+                    _simd16_blend_ps(verts[1].v[c], temp, 0xAAAA); // 1010 1010 1010 1010
              }
          }
  
@@ -800,12 +870,13 @@ struct PA_STATE_CUT : public PA_STATE
  #endif
      void AssembleSingle(uint32_t slot, uint32_t triIndex, simd4scalar tri[3])
      {
-        // move to slot
+       // move to slot
          for (uint32_t v = 0; v < this->vertsPerPrim; ++v)
          {
              uint32_t* pOffset = (uint32_t*)&this->vOffsets[v];
  #if USE_SIMD16_FRONTEND
-            uint32_t offset = useAlternateOffset ? pOffset[triIndex + SIMD_WIDTH_DIV2] : pOffset[triIndex];
+            uint32_t offset =
+                useAlternateOffset ? pOffset[triIndex + SIMD_WIDTH_DIV2] : pOffset[triIndex];
  #else
              uint32_t offset = pOffset[triIndex];
  #endif
@@ -814,7 +885,7 @@ struct PA_STATE_CUT : public PA_STATE
              for (uint32_t c = 0; c < 4; ++c)
              {
                  float* pComponent = (float*)(this->pStreamBase + offset);
-                pVert[c] = *pComponent;
+                pVert[c]          = *pComponent;
                  offset += SIMD_WIDTH * sizeof(float);
              }
          }
@@ -835,10 +906,7 @@ struct PA_STATE_CUT : public PA_STATE
          }
      }
  
-    uint32_t NumPrims()
-    {
-        return this->numPrimsAssembled;
-    }
+    uint32_t NumPrims() { return this->numPrimsAssembled; }
  
      // Per-topology functions
      void ProcessVertTriStrip(uint32_t index, bool finish)
@@ -864,14 +932,14 @@ struct PA_STATE_CUT : public PA_STATE
              this->numPrimsAssembled++;
  
              // set up next prim state
-            this->vert[0] = this->vert[1];
-            this->vert[1] = this->vert[2];
+            this->vert[0]  = this->vert[1];
+            this->vert[1]  = this->vert[2];
              this->curIndex = 2;
              this->reverseWinding ^= 1;
          }
      }
  
-    template<bool gsEnabled>
+    template <bool gsEnabled>
      void AssembleTriStripAdj()
      {
          if (!gsEnabled)
@@ -898,8 +966,7 @@ struct PA_STATE_CUT : public PA_STATE
          this->numPrimsAssembled++;
      }
  
-
-    template<bool gsEnabled>
+    template <bool gsEnabled>
      void ProcessVertTriStripAdj(uint32_t index, bool finish)
      {
          // handle last primitive of tristrip
@@ -1059,7 +1126,6 @@ struct PA_STATE_CUT : public PA_STATE
          }
      }
  
-
      void ProcessVertLineList(uint32_t index, bool finish)
      {
          this->vert[this->curIndex] = index;
@@ -1088,7 +1154,7 @@ struct PA_STATE_CUT : public PA_STATE
              this->numPrimsAssembled++;
  
              // set up next prim state
-            this->vert[0] = this->vert[1];
+            this->vert[0]  = this->vert[1];
              this->curIndex = 1;
          }
      }
@@ -1109,9 +1175,9 @@ struct PA_STATE_CUT : public PA_STATE
              this->numPrimsAssembled++;
  
              // set up next prim state
-            this->vert[0] = this->vert[1];
-            this->vert[1] = this->vert[2];
-            this->vert[2] = this->vert[3];
+            this->vert[0]  = this->vert[1];
+            this->vert[1]  = this->vert[2];
+            this->vert[2]  = this->vert[3];
              this->curIndex = 3;
          }
      }
@@ -1130,9 +1196,9 @@ struct PA_STATE_CUT : public PA_STATE
              this->numPrimsAssembled++;
  
              // set up next prim state
-            this->vert[0] = this->vert[1];
-            this->vert[1] = this->vert[2];
-            this->vert[2] = this->vert[3];
+            this->vert[0]  = this->vert[1];
+            this->vert[1]  = this->vert[2];
+            this->vert[2]  = this->vert[3];
              this->curIndex = 3;
          }
      }
@@ -1192,9 +1258,9 @@ struct PA_STATE_CUT : public PA_STATE
  
              // second triangle in the rectangle
              // v1, v3 = v1 + v2 - v0, v2
-            this->indices[0][this->numPrimsAssembled+1] = this->vert[1];
-            this->indices[1][this->numPrimsAssembled+1] = this->vert[0];
-            this->indices[2][this->numPrimsAssembled+1] = this->vert[2];
+            this->indices[0][this->numPrimsAssembled + 1] = this->vert[1];
+            this->indices[1][this->numPrimsAssembled + 1] = this->vert[0];
+            this->indices[2][this->numPrimsAssembled + 1] = this->vert[2];
  
              // increment numPrimsAssembled
              this->numPrimsAssembled += 2;
@@ -1208,29 +1274,26 @@ struct PA_STATE_CUT : public PA_STATE
  // Primitive Assembly for data output from the DomainShader.
  struct PA_TESS : PA_STATE
  {
-    PA_TESS(
-        DRAW_CONTEXT *in_pDC,
-        const SIMDSCALAR* in_pVertData,
-        uint32_t in_attributeStrideInVectors,
-        uint32_t in_vertexStride,
-        uint32_t in_numAttributes,
-        uint32_t* (&in_ppIndices)[3],
-        uint32_t in_numPrims,
-        PRIMITIVE_TOPOLOGY in_binTopology,
-        uint32_t numVertsPerPrim) :
+    PA_TESS(DRAW_CONTEXT*     in_pDC,
+            const SIMDSCALAR* in_pVertData,
+            uint32_t          in_attributeStrideInVectors,
+            uint32_t          in_vertexStride,
+            uint32_t          in_numAttributes,
+            uint32_t* (&in_ppIndices)[3],
+            uint32_t           in_numPrims,
+            PRIMITIVE_TOPOLOGY in_binTopology,
+            uint32_t           numVertsPerPrim) :
  
          PA_STATE(in_pDC, nullptr, 0, in_vertexStride, numVertsPerPrim),
-        m_pVertexData(in_pVertData),
-        m_attributeStrideInVectors(in_attributeStrideInVectors),
-        m_numAttributes(in_numAttributes),
-        m_numPrims(in_numPrims)
+        m_pVertexData(in_pVertData), m_attributeStrideInVectors(in_attributeStrideInVectors),
+        m_numAttributes(in_numAttributes), m_numPrims(in_numPrims)
      {
  #if USE_SIMD16_FRONTEND
          m_vPrimId = _simd16_setzero_si();
  #else
          m_vPrimId = _simd_setzero_si();
  #endif
-        binTopology = in_binTopology;
+        binTopology    = in_binTopology;
          m_ppIndices[0] = in_ppIndices[0];
          m_ppIndices[1] = in_ppIndices[1];
          m_ppIndices[2] = in_ppIndices[2];
@@ -1255,10 +1318,7 @@ struct PA_TESS : PA_STATE
          }
      }
  
-    bool HasWork()
-    {
-        return m_numPrims != 0;
-    }
+    bool HasWork() { return m_numPrims != 0; }
  
      simdvector& GetSimdVector(uint32_t index, uint32_t slot)
      {
@@ -1278,19 +1338,14 @@ struct PA_TESS : PA_STATE
      {
          SWR_ASSERT(numPrims <= SIMD_WIDTH);
  #if USE_SIMD16_FRONTEND
-        static const OSALIGNLINE(int32_t) maskGen[SIMD_WIDTH * 2] =
-        {
+        static const OSALIGNLINE(int32_t) maskGen[SIMD_WIDTH * 2] = {
              -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
-        };
+            0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0};
  
          return _simd16_loadu_si((const SIMDSCALARI*)&maskGen[SIMD_WIDTH - numPrims]);
  #else
-        static const OSALIGNLINE(int32_t) maskGen[SIMD_WIDTH * 2] =
-        {
-            -1, -1, -1, -1, -1, -1, -1, -1,
-            0,  0,  0,  0,  0,  0,  0,  0
-        };
+        static const OSALIGNLINE(int32_t)
+            maskGen[SIMD_WIDTH * 2] = {-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
  
          return _simd_loadu_si((const SIMDSCALARI*)&maskGen[SIMD_WIDTH - numPrims]);
  #endif
@@ -1308,7 +1363,8 @@ struct PA_TESS : PA_STATE
  
          SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
  
-        const float* pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+        const float* pBaseAttrib =
+            (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
          for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
          {
  #if USE_SIMD16_FRONTEND
@@ -1321,21 +1377,21 @@ struct PA_TESS : PA_STATE
              for (uint32_t c = 0; c < 4; ++c)
              {
  #if USE_SIMD16_FRONTEND
-                simd16scalar temp = _simd16_mask_i32gather_ps(
-                    _simd16_setzero_ps(),
-                    pBase,
-                    indices,
-                    _simd16_castsi_ps(mask),
-                    4 /* gcc doesn't like sizeof(float) */);
-
-                verts[i].v[c] = useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
+                simd16scalar temp =
+                    _simd16_mask_i32gather_ps(_simd16_setzero_ps(),
+                                              pBase,
+                                              indices,
+                                              _simd16_castsi_ps(mask),
+                                              4 /* gcc doesn't like sizeof(float) */);
+
+                verts[i].v[c] =
+                    useAlternateOffset ? _simd16_extract_ps(temp, 1) : _simd16_extract_ps(temp, 0);
  #else
-                verts[i].v[c] = _simd_mask_i32gather_ps(
-                    _simd_setzero_ps(),
-                    pBase,
-                    indices,
-                    _simd_castsi_ps(mask),
-                    4); // gcc doesn't like sizeof(float)
+                verts[i].v[c] = _simd_mask_i32gather_ps(_simd_setzero_ps(),
+                                                        pBase,
+                                                        indices,
+                                                        _simd_castsi_ps(mask),
+                                                        4); // gcc doesn't like sizeof(float)
  #endif
                  pBase += m_attributeStrideInVectors * SIMD_WIDTH;
              }
@@ -1357,7 +1413,8 @@ struct PA_TESS : PA_STATE
  
          SIMDSCALARI mask = GenPrimMask(numPrimsToAssemble);
  
-        const float* pBaseAttrib = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+        const float* pBaseAttrib =
+            (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
          for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
          {
  #if USE_SIMD16_FRONTEND
@@ -1370,20 +1427,18 @@ struct PA_TESS : PA_STATE
              for (uint32_t c = 0; c < 4; ++c)
              {
  #if USE_SIMD16_FRONTEND
-                verts[i].v[c] = _simd16_mask_i32gather_ps(
-                    _simd16_setzero_ps(),
-                    pBase,
-                    indices,
-                    _simd16_castsi_ps(mask),
-                    4 /* gcc doesn't like sizeof(float) */);
+                verts[i].v[c] = _simd16_mask_i32gather_ps(_simd16_setzero_ps(),
+                                                          pBase,
+                                                          indices,
+                                                          _simd16_castsi_ps(mask),
+                                                          4 /* gcc doesn't like sizeof(float) */);
  #else
-                simdscalar temp = _simd_mask_i32gather_ps(
-                    _simd_setzero_ps(),
-                    pBase,
-                    indices,
-                    _simd_castsi_ps(mask),
-                    4 /* gcc doesn't like sizeof(float) */);
-                verts[i].v[c] = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
+                simdscalar temp = _simd_mask_i32gather_ps(_simd_setzero_ps(),
+                                                          pBase,
+                                                          indices,
+                                                          _simd_castsi_ps(mask),
+                                                          4 /* gcc doesn't like sizeof(float) */);
+                verts[i].v[c]   = _simd16_insert_ps(_simd16_setzero_ps(), temp, 0);
  #endif
                  pBase += m_attributeStrideInVectors * SIMD_WIDTH;
              }
@@ -1396,19 +1451,22 @@ struct PA_TESS : PA_STATE
      void AssembleSingle(uint32_t slot, uint32_t primIndex, simd4scalar verts[])
      {
          SWR_ASSERT(slot < m_numAttributes);
-        SWR_ASSERT(primIndex < PA_TESS::NumPrims());
  
  
-        const float* pVertDataBase = (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
+        SWR_ASSERT(primIndex < PA_TESS::NumPrims());
+
+        const float* pVertDataBase =
+            (const float*)&m_pVertexData[slot * m_attributeStrideInVectors * 4];
          for (uint32_t i = 0; i < m_numVertsPerPrim; ++i)
          {
  #if USE_SIMD16_FRONTEND
-            uint32_t index = useAlternateOffset ? m_ppIndices[i][primIndex + SIMD_WIDTH_DIV2] : m_ppIndices[i][primIndex];
+            uint32_t index = useAlternateOffset ? m_ppIndices[i][primIndex + SIMD_WIDTH_DIV2]
+                                                : m_ppIndices[i][primIndex];
  #else
              uint32_t index = m_ppIndices[i][primIndex];
  #endif
              const float* pVertData = pVertDataBase;
-            float* pVert = (float*)&verts[i];
+            float*       pVert     = (float*)&verts[i];
  
              for (uint32_t c = 0; c < 4; ++c)
              {
@@ -1447,15 +1505,9 @@ struct PA_TESS : PA_STATE
          return junkIndices;
      }
  
-    uint32_t NumPrims()
-    {
-        return std::min<uint32_t>(m_numPrims, SIMD_WIDTH);
-    }
+    uint32_t NumPrims() { return std::min<uint32_t>(m_numPrims, SIMD_WIDTH); }
  
-    void Reset()
-    {
-        SWR_NOT_IMPL;
-    }
+    void Reset() { SWR_NOT_IMPL; }
  
      SIMDSCALARI GetPrimID(uint32_t startID)
      {
@@ -1467,57 +1519,77 @@ struct PA_TESS : PA_STATE
      }
  
  private:
-    const SIMDSCALAR*   m_pVertexData = nullptr;
-    uint32_t            m_attributeStrideInVectors = 0;
-    uint32_t            m_numAttributes = 0;
-    uint32_t            m_numPrims = 0;
-    uint32_t*           m_ppIndices[3];
+    const SIMDSCALAR* m_pVertexData              = nullptr;
+    uint32_t          m_attributeStrideInVectors = 0;
+    uint32_t          m_numAttributes            = 0;
+    uint32_t          m_numPrims                 = 0;
+    uint32_t*         m_ppIndices[3];
  
-    uint32_t            m_numVertsPerPrim = 0;
+    uint32_t m_numVertsPerPrim = 0;
  
-    SIMDSCALARI         m_vPrimId;
+    SIMDSCALARI m_vPrimId;
  
-    simdvector          junkVector;         // junk simdvector for unimplemented API
+    simdvector junkVector; // junk simdvector for unimplemented API
  #if ENABLE_AVX512_SIMD16
-    simd16vector        junkVector_simd16;  // junk simd16vector for unimplemented API
+    simd16vector junkVector_simd16; // junk simd16vector for unimplemented API
  #endif
-    SIMDVERTEX          junkVertex;         // junk SIMDVERTEX for unimplemented API
-    SIMDMASK            junkIndices;        // temporary index store for unused virtual function
+    SIMDVERTEX junkVertex;  // junk SIMDVERTEX for unimplemented API
+    SIMDMASK   junkIndices; // temporary index store for unused virtual function
  };
  
-// Primitive Assembler factory class, responsible for creating and initializing the correct assembler
-// based on state.
+// Primitive Assembler factory class, responsible for creating and initializing the correct
+// assembler based on state.
  template <typename IsIndexedT, typename IsCutIndexEnabledT>
  struct PA_FACTORY
  {
-    PA_FACTORY(DRAW_CONTEXT* pDC, PRIMITIVE_TOPOLOGY in_topo, uint32_t numVerts, PA_STATE::SIMDVERTEX *pVertexStore, uint32_t vertexStoreSize, uint32_t vertexStride, uint32_t numVertsPerPrim) : topo(in_topo)
+    PA_FACTORY(DRAW_CONTEXT*         pDC,
+               PRIMITIVE_TOPOLOGY    in_topo,
+               uint32_t              numVerts,
+               PA_STATE::SIMDVERTEX* pVertexStore,
+               uint32_t              vertexStoreSize,
+               uint32_t              vertexStride,
+               uint32_t              numVertsPerPrim) :
+        topo(in_topo)
      {
  #if KNOB_ENABLE_CUT_AWARE_PA == TRUE
          const API_STATE& state = GetApiState(pDC);
-        if ((IsIndexedT::value && IsCutIndexEnabledT::value && (
-            topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST ||
-            topo == TOP_LINE_LIST || topo == TOP_LINE_STRIP ||
-            topo == TOP_TRIANGLE_LIST)) ||
-
-            // non-indexed draws with adjacency topologies must use cut-aware PA until we add support
-            // for them in the optimized PA
-            (topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ || topo == TOP_TRI_STRIP_ADJ))
+        if ((IsIndexedT::value && IsCutIndexEnabledT::value &&
+             (topo == TOP_TRIANGLE_STRIP || topo == TOP_POINT_LIST || topo == TOP_LINE_LIST ||
+              topo == TOP_LINE_STRIP || topo == TOP_TRIANGLE_LIST)) ||
+
+            // non-indexed draws with adjacency topologies must use cut-aware PA until we add
+            // support for them in the optimized PA
+            (topo == TOP_LINE_LIST_ADJ || topo == TOP_LISTSTRIP_ADJ || topo == TOP_TRI_LIST_ADJ ||
+             topo == TOP_TRI_STRIP_ADJ))
          {
              memset(&indexStore, 0, sizeof(indexStore));
              uint32_t numAttribs = state.feNumAttributes;
  
-            new (&this->paCut) PA_STATE_CUT(pDC, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH,
-                vertexStride, &this->indexStore[0], numVerts, numAttribs, state.topology, false, numVertsPerPrim);
+            new (&this->paCut) PA_STATE_CUT(pDC,
+                                            reinterpret_cast<uint8_t*>(pVertexStore),
+                                            vertexStoreSize * PA_STATE::SIMD_WIDTH,
+                                            vertexStride,
+                                            &this->indexStore[0],
+                                            numVerts,
+                                            numAttribs,
+                                            state.topology,
+                                            false,
+                                            numVertsPerPrim);
              cutPA = true;
          }
          else
  #endif
          {
              uint32_t numPrims = GetNumPrims(in_topo, numVerts);
-            new (&this->paOpt) PA_STATE_OPT(pDC, numPrims, reinterpret_cast<uint8_t *>(pVertexStore), vertexStoreSize * PA_STATE::SIMD_WIDTH, vertexStride, false, numVertsPerPrim);
+            new (&this->paOpt) PA_STATE_OPT(pDC,
+                                            numPrims,
+                                            reinterpret_cast<uint8_t*>(pVertexStore),
+                                            vertexStoreSize * PA_STATE::SIMD_WIDTH,
+                                            vertexStride,
+                                            false,
+                                            numVertsPerPrim);
              cutPA = false;
          }
-
      }
  
      PA_STATE& GetPA()
@@ -1537,9 +1609,9 @@ struct PA_FACTORY
      PA_STATE_OPT paOpt;
      PA_STATE_CUT paCut;
  
-    bool cutPA{ false };
+    bool cutPA{false};
  
-    PRIMITIVE_TOPOLOGY topo{ TOP_UNKNOWN };
+    PRIMITIVE_TOPOLOGY topo{TOP_UNKNOWN};
  
-    PA_STATE::SIMDMASK      indexStore[MAX_NUM_VERTS_PER_PRIM];
+    PA_STATE::SIMDMASK indexStore[MAX_NUM_VERTS_PER_PRIM];
  };
diff --git a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp

index 4f89e0c1799b653fa2001c2c61b899c839a26df7..25d7156ac63981b931dee23d19027c195ba73a44 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp
@@ -1,136 +1,160 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file pa_avx.cpp
-*
-* @brief AVX implementation for primitive assembly.
-*        N primitives are assembled at a time, where N is the SIMD width.
-*        A state machine, that is specific for a given topology, drives the
-*        assembly of vertices into triangles.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file pa_avx.cpp
+ *
+ * @brief AVX implementation for primitive assembly.
+ *        N primitives are assembled at a time, where N is the SIMD width.
+ *        A state machine, that is specific for a given topology, drives the
+ *        assembly of vertices into triangles.
+ *
+ ******************************************************************************/
  #include "context.h"
  #include "pa.h"
  #include "frontend.h"
  
  #if (KNOB_SIMD_WIDTH == 8)
  
-INLINE simd4scalar swizzleLane0(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane0(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
      simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
  }
  
-INLINE simd4scalar swizzleLane1(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane1(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
      simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
  }
  
-INLINE simd4scalar swizzleLane2(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane2(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
      simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 0);
  }
  
-INLINE simd4scalar swizzleLane3(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane3(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
      simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 0);
  }
  
-INLINE simd4scalar swizzleLane4(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane4(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
      simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
  }
  
-INLINE simd4scalar swizzleLane5(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane5(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpacklo_ps(x, z);
      simdscalar tmp1 = _mm256_unpacklo_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
  }
  
-INLINE simd4scalar swizzleLane6(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane6(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
      simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpacklo_ps(tmp0, tmp1), 1);
  }
  
-INLINE simd4scalar swizzleLane7(const simdscalar &x, const simdscalar &y, const simdscalar &z, const simdscalar &w)
+INLINE simd4scalar swizzleLane7(const simdscalar& x,
+                                const simdscalar& y,
+                                const simdscalar& z,
+                                const simdscalar& w)
  {
      simdscalar tmp0 = _mm256_unpackhi_ps(x, z);
      simdscalar tmp1 = _mm256_unpackhi_ps(y, w);
      return _mm256_extractf128_ps(_mm256_unpackhi_ps(tmp0, tmp1), 1);
  }
  
-INLINE simd4scalar swizzleLane0(const simdvector &v)
+INLINE simd4scalar swizzleLane0(const simdvector& v)
  {
      return swizzleLane0(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLane1(const simdvector &v)
+INLINE simd4scalar swizzleLane1(const simdvector& v)
  {
      return swizzleLane1(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLane2(const simdvector &v)
+INLINE simd4scalar swizzleLane2(const simdvector& v)
  {
      return swizzleLane2(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLane3(const simdvector &v)
+INLINE simd4scalar swizzleLane3(const simdvector& v)
  {
      return swizzleLane3(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLane4(const simdvector &v)
+INLINE simd4scalar swizzleLane4(const simdvector& v)
  {
      return swizzleLane4(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLane5(const simdvector &v)
+INLINE simd4scalar swizzleLane5(const simdvector& v)
  {
      return swizzleLane5(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLane6(const simdvector &v)
+INLINE simd4scalar swizzleLane6(const simdvector& v)
  {
      return swizzleLane6(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLane7(const simdvector &v)
+INLINE simd4scalar swizzleLane7(const simdvector& v)
  {
      return swizzleLane7(v.x, v.y, v.z, v.w);
  }
  
-INLINE simd4scalar swizzleLaneN(const simdvector &v, int lane)
+INLINE simd4scalar swizzleLaneN(const simdvector& v, int lane)
  {
      switch (lane)
      {
@@ -156,87 +180,135 @@ INLINE simd4scalar swizzleLaneN(const simdvector &v, int lane)
  }
  
  #if ENABLE_AVX512_SIMD16
-INLINE simd4scalar swizzleLane0(const simd16vector &v)
+INLINE simd4scalar swizzleLane0(const simd16vector& v)
  {
-    return swizzleLane0(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane0(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane1(const simd16vector &v)
+INLINE simd4scalar swizzleLane1(const simd16vector& v)
  {
-    return swizzleLane1(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane1(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane2(const simd16vector &v)
+INLINE simd4scalar swizzleLane2(const simd16vector& v)
  {
-    return swizzleLane2(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane2(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane3(const simd16vector &v)
+INLINE simd4scalar swizzleLane3(const simd16vector& v)
  {
-    return swizzleLane3(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane3(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane4(const simd16vector &v)
+INLINE simd4scalar swizzleLane4(const simd16vector& v)
  {
-    return swizzleLane4(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane4(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane5(const simd16vector &v)
+INLINE simd4scalar swizzleLane5(const simd16vector& v)
  {
-    return swizzleLane5(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane5(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane6(const simd16vector &v)
+INLINE simd4scalar swizzleLane6(const simd16vector& v)
  {
-    return swizzleLane6(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane6(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane7(const simd16vector &v)
+INLINE simd4scalar swizzleLane7(const simd16vector& v)
  {
-    return swizzleLane7(_simd16_extract_ps(v.x, 0), _simd16_extract_ps(v.y, 0), _simd16_extract_ps(v.z, 0), _simd16_extract_ps(v.w, 0));
+    return swizzleLane7(_simd16_extract_ps(v.x, 0),
+                        _simd16_extract_ps(v.y, 0),
+                        _simd16_extract_ps(v.z, 0),
+                        _simd16_extract_ps(v.w, 0));
  }
  
-INLINE simd4scalar swizzleLane8(const simd16vector &v)
+INLINE simd4scalar swizzleLane8(const simd16vector& v)
  {
-    return swizzleLane0(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane0(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLane9(const simd16vector &v)
+INLINE simd4scalar swizzleLane9(const simd16vector& v)
  {
-    return swizzleLane1(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane1(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLaneA(const simd16vector &v)
+INLINE simd4scalar swizzleLaneA(const simd16vector& v)
  {
-    return swizzleLane2(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane2(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLaneB(const simd16vector &v)
+INLINE simd4scalar swizzleLaneB(const simd16vector& v)
  {
-    return swizzleLane3(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane3(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLaneC(const simd16vector &v)
+INLINE simd4scalar swizzleLaneC(const simd16vector& v)
  {
-    return swizzleLane4(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane4(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLaneD(const simd16vector &v)
+INLINE simd4scalar swizzleLaneD(const simd16vector& v)
  {
-    return swizzleLane5(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane5(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLaneE(const simd16vector &v)
+INLINE simd4scalar swizzleLaneE(const simd16vector& v)
  {
-    return swizzleLane6(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane6(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLaneF(const simd16vector &v)
+INLINE simd4scalar swizzleLaneF(const simd16vector& v)
  {
-    return swizzleLane7(_simd16_extract_ps(v.x, 1), _simd16_extract_ps(v.y, 1), _simd16_extract_ps(v.z, 1), _simd16_extract_ps(v.w, 1));
+    return swizzleLane7(_simd16_extract_ps(v.x, 1),
+                        _simd16_extract_ps(v.y, 1),
+                        _simd16_extract_ps(v.z, 1),
+                        _simd16_extract_ps(v.w, 1));
  }
  
-INLINE simd4scalar swizzleLaneN(const simd16vector &v, int lane)
+INLINE simd4scalar swizzleLaneN(const simd16vector& v, int lane)
  {
      switch (lane)
      {
@@ -374,11 +446,11 @@ void PaPatchListSingle(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
      {
          uint32_t input_cp = primIndex * TotalControlPoints + cp;
  #if USE_SIMD16_FRONTEND
-        uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
+        uint32_t input_vec  = input_cp / KNOB_SIMD16_WIDTH;
          uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
  
  #else
-        uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
+        uint32_t input_vec  = input_cp / KNOB_SIMD_WIDTH;
          uint32_t input_lane = input_cp % KNOB_SIMD_WIDTH;
  
  #endif
@@ -386,7 +458,8 @@ void PaPatchListSingle(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
          for (uint32_t i = 0; i < 4; ++i)
          {
  #if USE_SIMD16_FRONTEND
-            const float* pInputVec = (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
+            const float* pInputVec =
+                (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
  #else
              const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
  #endif
@@ -395,18 +468,17 @@ void PaPatchListSingle(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
      }
  }
  
-template<uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
+template <uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
  static bool PaPatchList(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
-    SetNextPaState(
-        pa,
-        PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
-        PaPatchListSingle<TotalControlPoints>);
+    SetNextPaState(pa,
+                   PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
+                   PaPatchListSingle<TotalControlPoints>);
  
      return false;
  }
  
-template<uint32_t TotalControlPoints>
+template <uint32_t TotalControlPoints>
  static bool PaPatchListTerm(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      // We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
@@ -433,14 +505,15 @@ static bool PaPatchListTerm(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
              for (uint32_t lane = 0; lane < KNOB_SIMD_WIDTH; ++lane)
              {
  #if USE_SIMD16_FRONTEND
-                uint32_t input_cp = (lane + lane_offset) * TotalControlPoints + cp;
-                uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
+                uint32_t input_cp   = (lane + lane_offset) * TotalControlPoints + cp;
+                uint32_t input_vec  = input_cp / KNOB_SIMD16_WIDTH;
                  uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
  
-                const float* pInputVec = (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
+                const float* pInputVec =
+                    (const float*)(&PaGetSimdVector_simd16(pa, input_vec, slot)[i]);
  #else
-                uint32_t input_cp = lane * TotalControlPoints + cp;
-                uint32_t input_vec = input_cp / KNOB_SIMD_WIDTH;
+                uint32_t input_cp   = lane * TotalControlPoints + cp;
+                uint32_t input_vec  = input_cp / KNOB_SIMD_WIDTH;
                  uint32_t input_lane = input_cp % KNOB_SIMD_WIDTH;
  
                  const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
@@ -451,31 +524,29 @@ static bool PaPatchListTerm(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
          }
      }
  
-    SetNextPaState(
-        pa,
-        PaPatchList<TotalControlPoints>,
-        PaPatchListSingle<TotalControlPoints>,
-        0,
-        PA_STATE_OPT::SIMD_WIDTH,
-        true);
+    SetNextPaState(pa,
+                   PaPatchList<TotalControlPoints>,
+                   PaPatchListSingle<TotalControlPoints>,
+                   0,
+                   PA_STATE_OPT::SIMD_WIDTH,
+                   true);
  
      return true;
  }
  
  #if ENABLE_AVX512_SIMD16
-template<uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
+template <uint32_t TotalControlPoints, uint32_t CurrentControlPoints = 1>
  static bool PaPatchList_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
-    SetNextPaState_simd16(
-        pa,
-        PaPatchList_simd16<TotalControlPoints, CurrentControlPoints + 1>,
-        PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
-        PaPatchListSingle<TotalControlPoints>);
+    SetNextPaState_simd16(pa,
+                          PaPatchList_simd16<TotalControlPoints, CurrentControlPoints + 1>,
+                          PaPatchList<TotalControlPoints, CurrentControlPoints + 1>,
+                          PaPatchListSingle<TotalControlPoints>);
  
      return false;
  }
  
-template<uint32_t TotalControlPoints>
+template <uint32_t TotalControlPoints>
  static bool PaPatchListTerm_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      // We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
@@ -492,33 +563,35 @@ static bool PaPatchListTerm_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector
              float vec[KNOB_SIMD16_WIDTH];
              for (uint32_t lane = 0; lane < KNOB_SIMD16_WIDTH; ++lane)
              {
-                uint32_t input_cp = lane * TotalControlPoints + cp;
-                uint32_t input_vec = input_cp / KNOB_SIMD16_WIDTH;
+                uint32_t input_cp   = lane * TotalControlPoints + cp;
+                uint32_t input_vec  = input_cp / KNOB_SIMD16_WIDTH;
                  uint32_t input_lane = input_cp % KNOB_SIMD16_WIDTH;
  
                  const float* pInputVec = (const float*)(&PaGetSimdVector(pa, input_vec, slot)[i]);
-                vec[lane] = pInputVec[input_lane];
+                vec[lane]              = pInputVec[input_lane];
              }
              verts[cp][i] = _simd16_loadu_ps(vec);
          }
      }
  
-    SetNextPaState_simd16(
-        pa,
-        PaPatchList_simd16<TotalControlPoints>,
-        PaPatchList<TotalControlPoints>,
-        PaPatchListSingle<TotalControlPoints>,
-        0,
-        PA_STATE_OPT::SIMD_WIDTH,
-        true);
+    SetNextPaState_simd16(pa,
+                          PaPatchList_simd16<TotalControlPoints>,
+                          PaPatchList<TotalControlPoints>,
+                          PaPatchListSingle<TotalControlPoints>,
+                          0,
+                          PA_STATE_OPT::SIMD_WIDTH,
+                          true);
  
      return true;
  }
  
  #endif
-#define PA_PATCH_LIST_TERMINATOR(N) \
-    template<> bool PaPatchList<N, N>(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])\
-                           { return PaPatchListTerm<N>(pa, slot, verts); }
+#define PA_PATCH_LIST_TERMINATOR(N)                                              \
+    template <>                                                                  \
+    bool PaPatchList<N, N>(PA_STATE_OPT & pa, uint32_t slot, simdvector verts[]) \
+    {                                                                            \
+        return PaPatchListTerm<N>(pa, slot, verts);                              \
+    }
  PA_PATCH_LIST_TERMINATOR(1)
  PA_PATCH_LIST_TERMINATOR(2)
  PA_PATCH_LIST_TERMINATOR(3)
@@ -554,9 +627,12 @@ PA_PATCH_LIST_TERMINATOR(32)
  #undef PA_PATCH_LIST_TERMINATOR
  
  #if ENABLE_AVX512_SIMD16
-#define PA_PATCH_LIST_TERMINATOR_SIMD16(N) \
-    template<> bool PaPatchList_simd16<N, N>(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])\
-                           { return PaPatchListTerm_simd16<N>(pa, slot, verts); }
+#define PA_PATCH_LIST_TERMINATOR_SIMD16(N)                                                \
+    template <>                                                                           \
+    bool PaPatchList_simd16<N, N>(PA_STATE_OPT & pa, uint32_t slot, simd16vector verts[]) \
+    {                                                                                     \
+        return PaPatchListTerm_simd16<N>(pa, slot, verts);                                \
+    }
  PA_PATCH_LIST_TERMINATOR_SIMD16(1)
  PA_PATCH_LIST_TERMINATOR_SIMD16(2)
  PA_PATCH_LIST_TERMINATOR_SIMD16(3)
@@ -595,13 +671,13 @@ PA_PATCH_LIST_TERMINATOR_SIMD16(32)
  bool PaTriList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SetNextPaState(pa, PaTriList1, PaTriListSingle0);
-    return false;    // Not enough vertices to assemble 4 or 8 triangles.
+    return false; // Not enough vertices to assemble 4 or 8 triangles.
  }
  
  bool PaTriList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SetNextPaState(pa, PaTriList2, PaTriListSingle0);
-    return false;    // Not enough vertices to assemble 8 triangles.
+    return false; // Not enough vertices to assemble 8 triangles.
  }
  
  bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
@@ -614,8 +690,8 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -626,8 +702,8 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-        const simd16vector &c_16 = PaGetSimdVector_simd16(pa, 2, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& c_16 = PaGetSimdVector_simd16(pa, 2, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -638,9 +714,9 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    simdvector &a = PaGetSimdVector(pa, 0, slot);
-    simdvector &b = PaGetSimdVector(pa, 1, slot);
-    simdvector &c = PaGetSimdVector(pa, 2, slot);
+    simdvector& a = PaGetSimdVector(pa, 0, slot);
+    simdvector& b = PaGetSimdVector(pa, 1, slot);
+    simdvector& c = PaGetSimdVector(pa, 2, slot);
  
  #endif
      simdscalar s;
@@ -653,25 +729,25 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      for (int i = 0; i < 4; ++i)
      {
          simdvector& v0 = verts[0];
-        v0[i] = _simd_blend_ps(a[i], b[i], 0x92);
-        v0[i] = _simd_blend_ps(v0[i], c[i], 0x24);
-        v0[i] = _simd_permute_ps_i(v0[i], 0x6C);
-        s = _simd_permute2f128_ps(v0[i], v0[i], 0x21);
-        v0[i] = _simd_blend_ps(v0[i], s, 0x44);
+        v0[i]          = _simd_blend_ps(a[i], b[i], 0x92);
+        v0[i]          = _simd_blend_ps(v0[i], c[i], 0x24);
+        v0[i]          = _simd_permute_ps_i(v0[i], 0x6C);
+        s              = _simd_permute2f128_ps(v0[i], v0[i], 0x21);
+        v0[i]          = _simd_blend_ps(v0[i], s, 0x44);
  
          simdvector& v1 = verts[1];
-        v1[i] = _simd_blend_ps(a[i], b[i], 0x24);
-        v1[i] = _simd_blend_ps(v1[i], c[i], 0x49);
-        v1[i] = _simd_permute_ps_i(v1[i], 0xB1);
-        s = _simd_permute2f128_ps(v1[i], v1[i], 0x21);
-        v1[i] = _simd_blend_ps(v1[i], s, 0x66);
+        v1[i]          = _simd_blend_ps(a[i], b[i], 0x24);
+        v1[i]          = _simd_blend_ps(v1[i], c[i], 0x49);
+        v1[i]          = _simd_permute_ps_i(v1[i], 0xB1);
+        s              = _simd_permute2f128_ps(v1[i], v1[i], 0x21);
+        v1[i]          = _simd_blend_ps(v1[i], s, 0x66);
  
          simdvector& v2 = verts[2];
-        v2[i] = _simd_blend_ps(a[i], b[i], 0x49);
-        v2[i] = _simd_blend_ps(v2[i], c[i], 0x92);
-        v2[i] = _simd_permute_ps_i(v2[i], 0xC6);
-        s = _simd_permute2f128_ps(v2[i], v2[i], 0x21);
-        v2[i] = _simd_blend_ps(v2[i], s, 0x22);
+        v2[i]          = _simd_blend_ps(a[i], b[i], 0x49);
+        v2[i]          = _simd_blend_ps(v2[i], c[i], 0x92);
+        v2[i]          = _simd_permute_ps_i(v2[i], 0xC6);
+        s              = _simd_permute2f128_ps(v2[i], v2[i], 0x21);
+        v2[i]          = _simd_blend_ps(v2[i], s, 0x22);
      }
  
  #elif KNOB_ARCH >= KNOB_ARCH_AVX2
@@ -686,8 +762,8 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -698,8 +774,8 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
-        const simd16vector &c_16 = PaGetSimdVector_simd16(pa, 2, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& c_16 = PaGetSimdVector_simd16(pa, 2, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -710,18 +786,18 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    const simdvector &a = PaGetSimdVector(pa, 0, slot);
-    const simdvector &b = PaGetSimdVector(pa, 1, slot);
-    const simdvector &c = PaGetSimdVector(pa, 2, slot);
+    const simdvector& a = PaGetSimdVector(pa, 0, slot);
+    const simdvector& b = PaGetSimdVector(pa, 1, slot);
+    const simdvector& c = PaGetSimdVector(pa, 2, slot);
  
  #endif
      //  v0 -> a0 a3 a6 b1 b4 b7 c2 c5
      //  v1 -> a1 a4 a7 b2 b5 c0 c3 c6
      //  v2 -> a2 a5 b0 b3 b6 c1 c4 c7
  
-    simdvector &v0 = verts[0];
-    simdvector &v1 = verts[1];
-    simdvector &v2 = verts[2];
+    simdvector& v0 = verts[0];
+    simdvector& v1 = verts[1];
+    simdvector& v2 = verts[2];
  
      // for simd x, y, z, and w
      for (int i = 0; i < 4; ++i)
@@ -744,30 +820,32 @@ bool PaTriList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  bool PaTriList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SetNextPaState_simd16(pa, PaTriList1_simd16, PaTriList1, PaTriListSingle0);
-    return false;    // Not enough vertices to assemble 16 triangles
+    return false; // Not enough vertices to assemble 16 triangles
  }
  
  bool PaTriList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SetNextPaState_simd16(pa, PaTriList2_simd16, PaTriList2, PaTriListSingle0);
-    return false;    // Not enough vertices to assemble 16 triangles
+    return false; // Not enough vertices to assemble 16 triangles
  }
  
  bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
+    // clang-format off
+
  #if KNOB_ARCH >= KNOB_ARCH_AVX2
      const simd16scalari perm0 = _simd16_set_epi32(13, 10, 7, 4, 1, 14, 11,  8, 5, 2, 15, 12,  9, 6, 3, 0);
      const simd16scalari perm1 = _simd16_set_epi32(14, 11, 8, 5, 2, 15, 12,  9, 6, 3,  0, 13, 10, 7, 4, 1);
      const simd16scalari perm2 = _simd16_set_epi32(15, 12, 9, 6, 3,  0, 13, 10, 7, 4,  1, 14, 11, 8, 5, 2);
-#else   // KNOB_ARCH == KNOB_ARCH_AVX
+#else // KNOB_ARCH == KNOB_ARCH_AVX
      simd16scalar perm0 = _simd16_setzero_ps();
      simd16scalar perm1 = _simd16_setzero_ps();
      simd16scalar perm2 = _simd16_setzero_ps();
  #endif
  
-    const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
-    const simd16vector &c = PaGetSimdVector_simd16(pa, 2, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
+    const simd16vector& c = PaGetSimdVector_simd16(pa, 2, slot);
  
      const simd16mask mask0 = 0x4924;
      const simd16mask mask1 = 0x2492;
@@ -777,16 +855,16 @@ bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
      //  v1 -> a1 a4 a7 aA aD b0 b3 b6 b9 bC bF c2 c5 c8 cB cE
      //  v2 -> a2 a5 a8 aB aE b1 b4 b7 bA bD c0 c3 c6 c9 cC cF
  
-    simd16vector &v0 = verts[0];
-    simd16vector &v1 = verts[1];
-    simd16vector &v2 = verts[2];
+    simd16vector& v0 = verts[0];
+    simd16vector& v1 = verts[1];
+    simd16vector& v2 = verts[2];
  
      // for simd16 x, y, z, and w
      for (int i = 0; i < 4; i += 1)
      {
-        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
-        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
-        simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float *>(&c[i]));
+        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
+        simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float*>(&c[i]));
  
          simd16scalar temp0 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask0), tempc, mask1);
          simd16scalar temp1 = _simd16_blend_ps(_simd16_blend_ps(tempa, tempb, mask2), tempc, mask0);
@@ -796,41 +874,43 @@ bool PaTriList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
          v0[i] = _simd16_permute_ps(temp0, perm0);
          v1[i] = _simd16_permute_ps(temp1, perm1);
          v2[i] = _simd16_permute_ps(temp2, perm2);
-#else   // #if KNOB_ARCH == KNOB_ARCH_AVX
-        
+#else // #if KNOB_ARCH == KNOB_ARCH_AVX
+
          // the general permutes (above) are prohibitively slow to emulate on AVX (its scalar code)
  
-        temp0 = _simd16_permute_ps_i(temp0, 0x6C);          // (0, 3, 2, 1) => 00 11 01 10 => 0x6C
-        perm0 = _simd16_permute2f128_ps(temp0, temp0, 0xB1);// (1, 0, 3, 2) => 01 00 11 10 => 0xB1
-        temp0 = _simd16_blend_ps(temp0, perm0, 0x4444);     // 0010 0010 0010 0010
-        perm0 = _simd16_permute2f128_ps(temp0, temp0, 0x4E);// (2, 3, 0, 1) => 10 11 00 01 => 0x4E
-        v0[i] = _simd16_blend_ps(temp0, perm0, 0x3838);     // 0001 1100 0001 1100
+        temp0 = _simd16_permute_ps_i(temp0, 0x6C);           // (0, 3, 2, 1) => 00 11 01 10 => 0x6C
+        perm0 = _simd16_permute2f128_ps(temp0, temp0, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+        temp0 = _simd16_blend_ps(temp0, perm0, 0x4444);      // 0010 0010 0010 0010
+        perm0 = _simd16_permute2f128_ps(temp0, temp0, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
+        v0[i] = _simd16_blend_ps(temp0, perm0, 0x3838);      // 0001 1100 0001 1100
  
-        temp1 = _simd16_permute_ps_i(temp1, 0xB1);          // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
-        perm1 = _simd16_permute2f128_ps(temp1, temp1, 0xB1);// (1, 0, 3, 2) => 01 00 11 10 => 0xB1
-        temp1 = _simd16_blend_ps(temp1, perm1, 0x6666);     // 0010 0010 0010 0010
-        perm1 = _simd16_permute2f128_ps(temp1, temp1, 0x4E);// (2, 3, 0, 1) => 10 11 00 01 => 0x4E
-        v1[i] = _simd16_blend_ps(temp1, perm1, 0x1818);     // 0001 1000 0001 1000
+        temp1 = _simd16_permute_ps_i(temp1, 0xB1);           // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+        perm1 = _simd16_permute2f128_ps(temp1, temp1, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+        temp1 = _simd16_blend_ps(temp1, perm1, 0x6666);      // 0010 0010 0010 0010
+        perm1 = _simd16_permute2f128_ps(temp1, temp1, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
+        v1[i] = _simd16_blend_ps(temp1, perm1, 0x1818);      // 0001 1000 0001 1000
  
-        temp2 = _simd16_permute_ps_i(temp2, 0xC6);          // (2, 1, 0, 3) => 01 10 00 11 => 0xC6
-        perm2 = _simd16_permute2f128_ps(temp2, temp2, 0xB1);// (1, 0, 3, 2) => 01 00 11 10 => 0xB1
-        temp2 = _simd16_blend_ps(temp2, perm2, 0x2222);     // 0100 0100 0100 0100
-        perm2 = _simd16_permute2f128_ps(temp2, temp2, 0x4E);// (2, 3, 0, 1) => 10 11 00 01 => 0x4E
-        v2[i] = _simd16_blend_ps(temp2, perm2, 0x1C1C);     // 0011 1000 0011 1000
+        temp2 = _simd16_permute_ps_i(temp2, 0xC6);           // (2, 1, 0, 3) => 01 10 00 11 => 0xC6
+        perm2 = _simd16_permute2f128_ps(temp2, temp2, 0xB1); // (1, 0, 3, 2) => 01 00 11 10 => 0xB1
+        temp2 = _simd16_blend_ps(temp2, perm2, 0x2222);      // 0100 0100 0100 0100
+        perm2 = _simd16_permute2f128_ps(temp2, temp2, 0x4E); // (2, 3, 0, 1) => 10 11 00 01 => 0x4E
+        v2[i] = _simd16_blend_ps(temp2, perm2, 0x1C1C);      // 0011 1000 0011 1000
  #endif
      }
  
      SetNextPaState_simd16(pa, PaTriList0_simd16, PaTriList0, PaTriListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
      return true;
+
+    // clang-format on
  }
  
  #endif
  void PaTriListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
  #if USE_SIMD16_FRONTEND
-    const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
-    const simd16vector &c = PaGetSimdVector_simd16(pa, 2, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
+    const simd16vector& c = PaGetSimdVector_simd16(pa, 2, slot);
  
      if (pa.useAlternateOffset)
      {
@@ -929,9 +1009,9 @@ void PaTriListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4
      // hold at least 8 triangles worth of data. We want to assemble a single
      // triangle with data in horizontal form.
  
-    const simdvector &a = PaGetSimdVector(pa, 0, slot);
-    const simdvector &b = PaGetSimdVector(pa, 1, slot);
-    const simdvector &c = PaGetSimdVector(pa, 2, slot);
+    const simdvector& a = PaGetSimdVector(pa, 0, slot);
+    const simdvector& b = PaGetSimdVector(pa, 1, slot);
+    const simdvector& c = PaGetSimdVector(pa, 2, slot);
  
      // Convert from vertical to horizontal.
      // Tri Pattern - provoking vertex is always v0
@@ -988,7 +1068,7 @@ void PaTriListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4
  bool PaTriStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SetNextPaState(pa, PaTriStrip1, PaTriStripSingle0);
-    return false;    // Not enough vertices to assemble 8 triangles.
+    return false; // Not enough vertices to assemble 8 triangles.
  }
  
  bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
@@ -999,7 +1079,7 @@ bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1009,7 +1089,7 @@ bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1019,13 +1099,13 @@ bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
-    simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+    simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+    simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
  
  #endif
      simdscalar s;
  
-    for(int i = 0; i < 4; ++i)
+    for (int i = 0; i < 4; ++i)
      {
          simdscalar a0 = a[i];
          simdscalar b0 = b[i];
@@ -1035,9 +1115,9 @@ bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
          //  v1 -> 13355779
          //  v2 -> 22446688
          simdvector& v0 = verts[0];
-        v0[i] = a0;
+        v0[i]          = a0;
  
-        //  s -> 4567891011 
+        //  s -> 4567891011
          s = _simd_permute2f128_ps(a0, b0, 0x21);
          //  s -> 23456789
          s = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
@@ -1055,17 +1135,19 @@ bool PaTriStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      return true;
  }
  
-#if  ENABLE_AVX512_SIMD16
+#if ENABLE_AVX512_SIMD16
  bool PaTriStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SetNextPaState_simd16(pa, PaTriStrip1_simd16, PaTriStrip1, PaTriStripSingle0);
-    return false;    // Not enough vertices to assemble 16 triangles.
+    return false; // Not enough vertices to assemble 16 triangles.
  }
  
  bool PaTriStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
-    const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+    // clang-format off
+
+    const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
      const simd16mask mask0 = 0xF000;
  
@@ -1073,37 +1155,39 @@ bool PaTriStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
      //  v1 -> a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
      //  v2 -> a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
  
-    simd16vector &v0 = verts[0];
-    simd16vector &v1 = verts[1];
-    simd16vector &v2 = verts[2];
+    simd16vector& v0 = verts[0];
+    simd16vector& v1 = verts[1];
+    simd16vector& v2 = verts[2];
  
      // for simd16 x, y, z, and w
      for (int i = 0; i < 4; i += 1)
      {
-        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
-        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
  
-        simd16scalar perm0 = _simd16_permute2f128_ps(tempa, tempa, 0x39);// (0 3 2 1) = 00 11 10 01 // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF a0 a1 a2 a3
-        simd16scalar perm1 = _simd16_permute2f128_ps(tempb, tempb, 0x39);// (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
+        simd16scalar perm0 = _simd16_permute2f128_ps(tempa, tempa, 0x39); // (0 3 2 1) = 00 11 10 01 // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF a0 a1 a2 a3
+        simd16scalar perm1 = _simd16_permute2f128_ps(tempb, tempb, 0x39); // (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
  
-        simd16scalar blend = _simd16_blend_ps(perm0, perm1, mask0);                                 // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1 b2 b3
-        simd16scalar shuff = _simd16_shuffle_ps(tempa, blend, _MM_SHUFFLE(1, 0, 3, 2));             // a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1
+        simd16scalar blend = _simd16_blend_ps(perm0, perm1, mask0);                                  // a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1 b2 b3
+        simd16scalar shuff = _simd16_shuffle_ps(tempa, blend, _MM_SHUFFLE(1, 0, 3, 2));              // a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0 b1
  
-        v0[i] = tempa;                                                                              // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
-        v1[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(3, 1, 3, 1));                          // a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
-        v2[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(2, 2, 2, 2));                          // a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
+        v0[i] = tempa;                                                                               // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
+        v1[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(3, 1, 3, 1));                           // a1 a3 a3 a5 a5 a7 a7 a9 a9 aB aB aD aD aF aF b1
+        v2[i] = _simd16_shuffle_ps(tempa, shuff, _MM_SHUFFLE(2, 2, 2, 2));                           // a2 a2 a4 a4 a6 a6 a8 a8 aA aA aC aC aE aE b0 b0
      }
  
      SetNextPaState_simd16(pa, PaTriStrip1_simd16, PaTriStrip1, PaTriStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
      return true;
+
+    // clang-format on
  }
  
  #endif
  void PaTriStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
  #if USE_SIMD16_FRONTEND
-    const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
      if (pa.useAlternateOffset)
      {
@@ -1198,8 +1282,8 @@ void PaTriStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
          break;
      };
  #else
-    const simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
-    const simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+    const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+    const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
  
      // Convert from vertical to horizontal.
      // Tri Pattern - provoking vertex is always v0
@@ -1256,7 +1340,7 @@ void PaTriStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
  bool PaTriFan0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SetNextPaState(pa, PaTriFan1, PaTriFanSingle0);
-    return false;    // Not enough vertices to assemble 8 triangles.
+    return false; // Not enough vertices to assemble 8 triangles.
  }
  
  bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
@@ -1266,11 +1350,11 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      simdvector a;
      simdvector b;
  
-    const simd16vector &leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
+    const simd16vector& leadvert_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1282,7 +1366,7 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1294,15 +1378,15 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    const simdvector &leadVert = PaGetSimdVector(pa, pa.first, slot);
-    const simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
-    const simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+    const simdvector& leadVert = PaGetSimdVector(pa, pa.first, slot);
+    const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+    const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
  
  #endif
      simdscalar s;
  
      // need to fill vectors 1/2 with new verts, and v0 with anchor vert.
-    for(int i = 0; i < 4; ++i)
+    for (int i = 0; i < 4; ++i)
      {
          simdscalar a0 = a[i];
          simdscalar b0 = b[i];
@@ -1310,15 +1394,15 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
          simdscalar comp = leadVert[i];
  
          simdvector& v0 = verts[0];
-        v0[i] = _simd_shuffle_ps(comp, comp, _MM_SHUFFLE(0, 0, 0, 0));
-        v0[i] = _simd_permute2f128_ps(v0[i], comp, 0x00);
+        v0[i]          = _simd_shuffle_ps(comp, comp, _MM_SHUFFLE(0, 0, 0, 0));
+        v0[i]          = _simd_permute2f128_ps(v0[i], comp, 0x00);
  
          simdvector& v2 = verts[2];
-        s = _simd_permute2f128_ps(a0, b0, 0x21);
-        v2[i] = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
+        s              = _simd_permute2f128_ps(a0, b0, 0x21);
+        v2[i]          = _simd_shuffle_ps(a0, s, _MM_SHUFFLE(1, 0, 3, 2));
  
          simdvector& v1 = verts[1];
-        v1[i] = _simd_shuffle_ps(a0, v2[i], _MM_SHUFFLE(2, 1, 2, 1));
+        v1[i]          = _simd_shuffle_ps(a0, v2[i], _MM_SHUFFLE(2, 1, 2, 1));
      }
  
      SetNextPaState(pa, PaTriFan1, PaTriFanSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
@@ -1329,14 +1413,16 @@ bool PaTriFan1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  bool PaTriFan0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SetNextPaState_simd16(pa, PaTriFan1_simd16, PaTriFan1, PaTriFanSingle0);
-    return false;    // Not enough vertices to assemble 16 triangles.
+    return false; // Not enough vertices to assemble 16 triangles.
  }
  
  bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
-    const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot);
-    const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot);
+    // clang-format off
+
+    const simd16vector& a = PaGetSimdVector_simd16(pa, pa.first, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, pa.prev, slot);
+    const simd16vector& c = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
      const simd16mask mask0 = 0xF000;
  
@@ -1344,49 +1430,45 @@ bool PaTriFan1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
      //  v1 -> b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
      //  v2 -> b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
  
-    simd16vector &v0 = verts[0];
-    simd16vector &v1 = verts[1];
-    simd16vector &v2 = verts[2];
+    simd16vector& v0 = verts[0];
+    simd16vector& v1 = verts[1];
+    simd16vector& v2 = verts[2];
  
      // for simd16 x, y, z, and w
      for (uint32_t i = 0; i < 4; i += 1)
      {
-        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
-        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
-        simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float *>(&c[i]));
+        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
+        simd16scalar tempc = _simd16_loadu_ps(reinterpret_cast<const float*>(&c[i]));
  
-        simd16scalar shuff = _simd16_shuffle_ps(tempa, tempa, _MM_SHUFFLE(0, 0, 0, 0));             // a0 a0 a0 a0 a4 a4 a4 a4 a0 a0 a0 a0 a4 a4 a4 a4
+        simd16scalar shuff = _simd16_shuffle_ps(tempa, tempa, _MM_SHUFFLE(0, 0, 0, 0));              // a0 a0 a0 a0 a4 a4 a4 a4 a0 a0 a0 a0 a4 a4 a4 a4
  
-        v0[i] = _simd16_permute2f128_ps(shuff, shuff, 0x00);                                        // a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0
+        v0[i] = _simd16_permute2f128_ps(shuff, shuff, 0x00);                                         // a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0 a0
  
-        simd16scalar temp0 = _simd16_permute2f128_ps(tempb, tempb, 0x39);// (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
-        simd16scalar temp1 = _simd16_permute2f128_ps(tempc, tempc, 0x39);// (0 3 2 1) = 00 11 10 01 // c4 c5 c6 c7 c8 c9 cA cB cC cD cE cF c0 c1 c2 c3
+        simd16scalar temp0 = _simd16_permute2f128_ps(tempb, tempb, 0x39); // (0 3 2 1) = 00 11 10 01 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF b0 b1 b2 b3
+        simd16scalar temp1 = _simd16_permute2f128_ps(tempc, tempc, 0x39); // (0 3 2 1) = 00 11 10 01 // c4 c5 c6 c7 c8 c9 cA cB cC cD cE cF c0 c1 c2 c3
  
-        simd16scalar blend = _simd16_blend_ps(temp0, temp1, mask0);                                 // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1 c2 c3
-#if 0
+        simd16scalar blend = _simd16_blend_ps(temp0, temp1, mask0);                                  // b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1 c2 c3
  
-        v2[i] = _simd16_shuffle_ps(tempb, blend, _MM_SHUFFLE(1, 0, 3, 2));                          // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
-        v1[i] = _simd16_shuffle_ps(tempb, v2[i], _MM_SHUFFLE(2, 1, 2, 1));                          // b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
-#else
-
-        simd16scalar temp2 = _simd16_shuffle_ps(tempb, blend, _MM_SHUFFLE(1, 0, 3, 2));             // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
+        simd16scalar temp2 = _simd16_shuffle_ps(tempb, blend, _MM_SHUFFLE(1, 0, 3, 2));              // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
  
-        v1[i] = _simd16_shuffle_ps(tempb, temp2, _MM_SHUFFLE(2, 1, 2, 1));                          // b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
-        v2[i] = temp2;                                                                              // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
-#endif
+        v1[i] = _simd16_shuffle_ps(tempb, temp2, _MM_SHUFFLE(2, 1, 2, 1));                           // b1 b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0
+        v2[i] = temp2;                                                                               // b2 b3 b4 b5 b6 b7 b8 b9 bA bB bC bD bE bF c0 c1
      }
  
      SetNextPaState_simd16(pa, PaTriFan1_simd16, PaTriFan1, PaTriFanSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
      return true;
+
+    // clang-format on
  }
  
  #endif
  void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
  #if USE_SIMD16_FRONTEND
-    const simd16vector &a = PaGetSimdVector_simd16(pa, pa.first, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, pa.prev, slot);
-    const simd16vector &c = PaGetSimdVector_simd16(pa, pa.cur, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, pa.first, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, pa.prev, slot);
+    const simd16vector& c = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
      if (pa.useAlternateOffset)
      {
@@ -1420,9 +1502,9 @@ void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4s
          verts[2] = swizzleLaneN(c, primIndex - 14);
      }
  #else
-    const simdvector &a = PaGetSimdVector(pa, pa.first, slot);
-    const simdvector &b = PaGetSimdVector(pa, pa.prev, slot);
-    const simdvector &c = PaGetSimdVector(pa, pa.cur, slot);
+    const simdvector& a = PaGetSimdVector(pa, pa.first, slot);
+    const simdvector& b = PaGetSimdVector(pa, pa.prev, slot);
+    const simdvector& c = PaGetSimdVector(pa, pa.cur, slot);
  
      // vert 0 from leading vertex
      verts[0] = swizzleLane0(a);
@@ -1452,7 +1534,7 @@ void PaTriFanSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4s
  bool PaQuadList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SetNextPaState(pa, PaQuadList1, PaQuadListSingle0);
-    return false;    // Not enough vertices to assemble 8 triangles.
+    return false; // Not enough vertices to assemble 8 triangles.
  }
  
  bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
@@ -1463,7 +1545,7 @@ bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1473,7 +1555,7 @@ bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1483,13 +1565,13 @@ bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    simdvector &a = PaGetSimdVector(pa, 0, slot);
-    simdvector &b = PaGetSimdVector(pa, 1, slot);
+    simdvector& a = PaGetSimdVector(pa, 0, slot);
+    simdvector& b = PaGetSimdVector(pa, 1, slot);
  
  #endif
      simdscalar s1, s2;
  
-    for(int i = 0; i < 4; ++i)
+    for (int i = 0; i < 4; ++i)
      {
          simdscalar a0 = a[i];
          simdscalar b0 = b[i];
@@ -1498,13 +1580,13 @@ bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
          s2 = _mm256_permute2f128_ps(a0, b0, 0x31);
  
          simdvector& v0 = verts[0];
-        v0[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(0, 0, 0, 0));
+        v0[i]          = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(0, 0, 0, 0));
  
          simdvector& v1 = verts[1];
-        v1[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(2, 1, 2, 1));
+        v1[i]          = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(2, 1, 2, 1));
  
          simdvector& v2 = verts[2];
-        v2[i] = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(3, 2, 3, 2));
+        v2[i]          = _simd_shuffle_ps(s1, s2, _MM_SHUFFLE(3, 2, 3, 2));
      }
  
      SetNextPaState(pa, PaQuadList0, PaQuadListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
@@ -1515,46 +1597,50 @@ bool PaQuadList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  bool PaQuadList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SetNextPaState_simd16(pa, PaQuadList1_simd16, PaQuadList1, PaQuadListSingle0);
-    return false;    // Not enough vertices to assemble 16 triangles.
+    return false; // Not enough vertices to assemble 16 triangles.
  }
  
  bool PaQuadList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
-    const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+    // clang-format off
+
+    const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
  
      //  v0 -> a0 a0 a4 a4 a8 a8 aC aC b0 b0 b0 b0 b0 b0 bC bC
      //  v1 -> a1 a2 a5 a6 a9 aA aD aE b1 b2 b5 b6 b9 bA bD bE
      //  v2 -> a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
  
-    simd16vector &v0 = verts[0];
-    simd16vector &v1 = verts[1];
-    simd16vector &v2 = verts[2];
+    simd16vector& v0 = verts[0];
+    simd16vector& v1 = verts[1];
+    simd16vector& v2 = verts[2];
  
      // for simd16 x, y, z, and w
      for (uint32_t i = 0; i < 4; i += 1)
      {
-        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
-        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
  
-        simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88);// (2 0 2 0) = 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b8 b9 bA bB
-        simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD);// (3 1 3 1) = 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
+        simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88); // (2 0 2 0) = 10 00 10 00 // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b8 b9 bA bB
+        simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD); // (3 1 3 1) = 11 01 11 01 // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
  
-        v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(0, 0, 0, 0));                          // a0 a0 a4 a4 a8 a8 aC aC b0 b0 b4 b4 b8 b8 bC bC
-        v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 1, 2, 1));                          // a1 a2 a5 a6 a9 aA aD aE b1 b2 b6 b6 b9 bA bD bE
-        v2[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 2, 3, 2));                          // a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
+        v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(0, 0, 0, 0));                           // a0 a0 a4 a4 a8 a8 aC aC b0 b0 b4 b4 b8 b8 bC bC
+        v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 1, 2, 1));                           // a1 a2 a5 a6 a9 aA aD aE b1 b2 b6 b6 b9 bA bD bE
+        v2[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 2, 3, 2));                           // a2 a3 a6 a7 aA aB aE aF b2 b3 b6 b7 bA bB bE bF
      }
  
      SetNextPaState_simd16(pa, PaQuadList0_simd16, PaQuadList0, PaQuadListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
      return true;
+
+    // clang-format on
  }
  
  #endif
  void PaQuadListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
  #if USE_SIMD16_FRONTEND
-    const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
  
      if (pa.useAlternateOffset)
      {
@@ -1661,8 +1747,8 @@ void PaQuadListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
          break;
      }
  #else
-    const simdvector &a = PaGetSimdVector(pa, 0, slot);
-    const simdvector &b = PaGetSimdVector(pa, 1, slot);
+    const simdvector& a = PaGetSimdVector(pa, 0, slot);
+    const simdvector& b = PaGetSimdVector(pa, 1, slot);
  
      switch (primIndex)
      {
@@ -1736,7 +1822,7 @@ bool PaLineLoop1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  #if USE_SIMD16_FRONTEND
          simdvector first;
  
-        const simd16vector &first_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
+        const simd16vector& first_16 = PaGetSimdVector_simd16(pa, pa.first, slot);
  
          if (!pa.useAlternateOffset)
          {
@@ -1754,14 +1840,14 @@ bool PaLineLoop1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
          }
  
  #else
-        simdvector &first = PaGetSimdVector(pa, pa.first, slot);
+        simdvector& first = PaGetSimdVector(pa, pa.first, slot);
  
  #endif
          for (int i = 0; i < 4; i++)
          {
-            float *firstVtx = (float *)&(first[i]);
-            float *targetVtx = (float *)&(verts[1][i]);
-            targetVtx[lane] = firstVtx[0];
+            float* firstVtx  = (float*)&(first[i]);
+            float* targetVtx = (float*)&(verts[1][i]);
+            targetVtx[lane]  = firstVtx[0];
          }
      }
  
@@ -1785,17 +1871,18 @@ bool PaLineLoop1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
          // loop reconnect now
          const int lane = pa.numPrims - pa.numPrimsComplete - 1;
  
-        const simd16vector &first = PaGetSimdVector_simd16(pa, pa.first, slot);
+        const simd16vector& first = PaGetSimdVector_simd16(pa, pa.first, slot);
  
          for (int i = 0; i < 4; i++)
          {
-            float *firstVtx = (float *)&(first[i]);
-            float *targetVtx = (float *)&(verts[1][i]);
-            targetVtx[lane] = firstVtx[0];
+            float* firstVtx  = (float*)&(first[i]);
+            float* targetVtx = (float*)&(verts[1][i]);
+            targetVtx[lane]  = firstVtx[0];
          }
      }
  
-    SetNextPaState_simd16(pa, PaLineLoop1_simd16, PaLineLoop1, PaLineLoopSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
+    SetNextPaState_simd16(
+        pa, PaLineLoop1_simd16, PaLineLoop1, PaLineLoopSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
      return true;
  }
  
@@ -1807,11 +1894,11 @@ void PaLineLoopSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
      if (pa.numPrimsComplete + primIndex == pa.numPrims - 1)
      {
  #if USE_SIMD16_FRONTEND
-        const simd16vector &first = PaGetSimdVector_simd16(pa, pa.first, slot);
+        const simd16vector& first = PaGetSimdVector_simd16(pa, pa.first, slot);
  
          verts[1] = swizzleLane0(first);
  #else
-        const simdvector &first = PaGetSimdVector(pa, pa.first, slot);
+        const simdvector& first = PaGetSimdVector(pa, pa.first, slot);
  
          verts[1] = swizzleLane0(first);
  #endif
@@ -1821,7 +1908,7 @@ void PaLineLoopSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
  bool PaLineList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SetNextPaState(pa, PaLineList1, PaLineListSingle0);
-    return false;    // Not enough vertices to assemble 8 lines
+    return false; // Not enough vertices to assemble 8 lines
  }
  
  bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
@@ -1832,7 +1919,7 @@ bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1842,7 +1929,7 @@ bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -1852,8 +1939,8 @@ bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    simdvector &a = PaGetSimdVector(pa, 0, slot);
-    simdvector &b = PaGetSimdVector(pa, 1, slot);
+    simdvector& a = PaGetSimdVector(pa, 0, slot);
+    simdvector& b = PaGetSimdVector(pa, 1, slot);
  
  #endif
      /// @todo: verify provoking vertex is correct
@@ -1885,43 +1972,47 @@ bool PaLineList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  bool PaLineList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SetNextPaState_simd16(pa, PaLineList1_simd16, PaLineList1, PaLineListSingle0);
-    return false;    // Not enough vertices to assemble 16 lines
+    return false; // Not enough vertices to assemble 16 lines
  }
  
  bool PaLineList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
-    const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+    // clang-format off
+
+    const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
  
      // v0 -> a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
      // v1 -> a1 a3 a5 a7 a9 aB aD aF b1 b3 b4 b7 b9 bB bD bF
  
-    simd16vector &v0 = verts[0];
-    simd16vector &v1 = verts[1];
+    simd16vector& v0 = verts[0];
+    simd16vector& v1 = verts[1];
  
      // for simd16 x, y, z, and w
      for (int i = 0; i < 4; i += 1)
      {
-        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
-        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
  
-        simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88);// (2 0 2 0) 10 00 10 00   // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b9 b9 bA bB
-        simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD);// (3 1 3 1) 11 01 11 01   // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
+        simd16scalar temp0 = _simd16_permute2f128_ps(tempa, tempb, 0x88); // (2 0 2 0) 10 00 10 00   // a0 a1 a2 a3 a8 a9 aA aB b0 b1 b2 b3 b9 b9 bA bB
+        simd16scalar temp1 = _simd16_permute2f128_ps(tempa, tempb, 0xDD); // (3 1 3 1) 11 01 11 01   // a4 a5 a6 a7 aC aD aE aF b4 b5 b6 b7 bC bD bE bF
  
-        v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 0, 2, 0));                          // a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
-        v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 1, 3, 1));                          // a1 a3 a5 a7 a9 aB aD aF b1 b3 b5 b7 b9 bB bD bF
+        v0[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(2, 0, 2, 0));                           // a0 a2 a4 a6 a8 aA aC aE b0 b2 b4 b6 b8 bA bC bE
+        v1[i] = _simd16_shuffle_ps(temp0, temp1, _MM_SHUFFLE(3, 1, 3, 1));                           // a1 a3 a5 a7 a9 aB aD aF b1 b3 b5 b7 b9 bB bD bF
      }
  
      SetNextPaState_simd16(pa, PaLineList0_simd16, PaLineList0, PaLineListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
      return true;
+
+    // clang-format on
  }
  
  #endif
  void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
  #if USE_SIMD16_FRONTEND
-    const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, 1, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, 1, slot);
  
      if (pa.useAlternateOffset)
      {
@@ -1996,8 +2087,8 @@ void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
          break;
      }
  #else
-    const simdvector &a = PaGetSimdVector(pa, 0, slot);
-    const simdvector &b = PaGetSimdVector(pa, 1, slot);
+    const simdvector& a = PaGetSimdVector(pa, 0, slot);
+    const simdvector& b = PaGetSimdVector(pa, 1, slot);
  
      switch (primIndex)
      {
@@ -2040,7 +2131,7 @@ void PaLineListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd
  bool PaLineStrip0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SetNextPaState(pa, PaLineStrip1, PaLineStripSingle0);
-    return false;    // Not enough vertices to assemble 8 lines
+    return false; // Not enough vertices to assemble 8 lines
  }
  
  bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
@@ -2051,7 +2142,7 @@ bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, pa.prev, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -2061,7 +2152,7 @@ bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -2071,8 +2162,8 @@ bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
-    simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+    simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+    simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
  
  #endif
      /// @todo: verify provoking vertex is correct
@@ -2085,7 +2176,7 @@ bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  
      verts[0] = a;
  
-    for(uint32_t i = 0; i < 4; ++i)
+    for (uint32_t i = 0; i < 4; ++i)
      {
          // 1 2 3 x 5 6 7 x
          __m256 vPermA = _mm256_permute_ps(a.v[i], 0x39); // indices hi->low 00 11 10 01 (0 3 2 1)
@@ -2106,47 +2197,51 @@ bool PaLineStrip1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  bool PaLineStrip0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SetNextPaState_simd16(pa, PaLineStrip1_simd16, PaLineStrip1, PaLineStripSingle0);
-    return false;    // Not enough vertices to assemble 16 lines
+    return false; // Not enough vertices to assemble 16 lines
  }
  
  bool PaLineStrip1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
+    // clang-format off
+
      const simd16scalari perm = _simd16_set_epi32(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
  
-    const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
      const simd16mask mask0 = 0x0001;
  
      // v0 -> a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
      // v1 -> a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
  
-    simd16vector &v0 = verts[0];
-    simd16vector &v1 = verts[1];
+    simd16vector& v0 = verts[0];
+    simd16vector& v1 = verts[1];
  
-    v0 = a;                                                                                         // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
+    v0 = a; // a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
  
      // for simd16 x, y, z, and w
      for (int i = 0; i < 4; i += 1)
      {
-        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float *>(&a[i]));
-        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float *>(&b[i]));
+        simd16scalar tempa = _simd16_loadu_ps(reinterpret_cast<const float*>(&a[i]));
+        simd16scalar tempb = _simd16_loadu_ps(reinterpret_cast<const float*>(&b[i]));
  
-        simd16scalar temp = _simd16_blend_ps(tempa, tempb, mask0);                                  // b0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
+        simd16scalar temp = _simd16_blend_ps(tempa, tempb, mask0); // b0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF
  
-        v1[i] = _simd16_permute_ps(temp, perm);                                                     // a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
+        v1[i] = _simd16_permute_ps(temp, perm);                    // a1 a2 a3 a4 a5 a6 a7 a8 a9 aA aB aC aD aE aF b0
      }
  
      SetNextPaState_simd16(pa, PaLineStrip1_simd16, PaLineStrip1, PaLineStripSingle0, 0, PA_STATE_OPT::SIMD_WIDTH);
      return true;
+
+    // clang-format on
  }
  
  #endif
  void PaLineStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
  #if USE_SIMD16_FRONTEND
-    const simd16vector &a = PaGetSimdVector_simd16(pa, pa.prev, slot);
-    const simd16vector &b = PaGetSimdVector_simd16(pa, pa.cur, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, pa.prev, slot);
+    const simd16vector& b = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
      if (pa.useAlternateOffset)
      {
@@ -2221,8 +2316,8 @@ void PaLineStripSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, sim
          break;
      }
  #else
-    const simdvector &a = PaGetSimdVector(pa, pa.prev, slot);
-    const simdvector &b = PaGetSimdVector(pa, pa.cur, slot);
+    const simdvector& a = PaGetSimdVector(pa, pa.prev, slot);
+    const simdvector& b = PaGetSimdVector(pa, pa.cur, slot);
  
      switch (primIndex)
      {
@@ -2267,7 +2362,7 @@ bool PaPoints0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  #if USE_SIMD16_FRONTEND
      simdvector a;
  
-    const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
  
      if (!pa.useAlternateOffset)
      {
@@ -2285,10 +2380,10 @@ bool PaPoints0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      }
  
  #else
-    simdvector &a = PaGetSimdVector(pa, 0, slot);
+    simdvector& a = PaGetSimdVector(pa, 0, slot);
  
  #endif
-    verts[0] = a;  // points only have 1 vertex.
+    verts[0] = a; // points only have 1 vertex.
  
      SetNextPaState(pa, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
      return true;
@@ -2297,11 +2392,12 @@ bool PaPoints0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  #if ENABLE_AVX512_SIMD16
  bool PaPoints0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
-    simd16vector &a = PaGetSimdVector_simd16(pa, pa.cur, slot);
+    simd16vector& a = PaGetSimdVector_simd16(pa, pa.cur, slot);
  
-    verts[0] = a;  // points only have 1 vertex.
+    verts[0] = a; // points only have 1 vertex.
  
-    SetNextPaState_simd16(pa, PaPoints0_simd16, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
+    SetNextPaState_simd16(
+        pa, PaPoints0_simd16, PaPoints0, PaPointsSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
      return true;
  }
  
@@ -2309,7 +2405,7 @@ bool PaPoints0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  void PaPointsSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
  #if USE_SIMD16_FRONTEND
-    const simd16vector &a = PaGetSimdVector_simd16(pa, 0, slot);
+    const simd16vector& a = PaGetSimdVector_simd16(pa, 0, slot);
  
      if (pa.useAlternateOffset)
      {
@@ -2318,7 +2414,7 @@ void PaPointsSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4s
  
      verts[0] = swizzleLaneN(a, primIndex);
  #else
-    const simdvector &a = PaGetSimdVector(pa, 0, slot);
+    const simdvector& a = PaGetSimdVector(pa, 0, slot);
  
      verts[0] = swizzleLaneN(a, primIndex);
  #endif
@@ -2332,7 +2428,7 @@ bool PaRectList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
      SetNextPaState(pa, PaRectList1, PaRectListSingle0);
      return false;
  }
- 
+
  //////////////////////////////////////////////////////////////////////////
  /// @brief State 1 for RECT_LIST topology.
  ///   Rect lists has the following format.
@@ -2341,16 +2437,16 @@ bool PaRectList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  ///         | \ |      | \ |      | \ |       | \ |
  ///      v1 o---o   v4 o---o   v7 o---o   v10 o---o
  ///            v0         v3         v6          v9
-/// 
+///
  ///   Only 3 vertices of the rectangle are supplied. The 4th vertex is implied.
-/// 
+///
  ///   tri0 = { v0, v1, v2 }  tri1 = { v0, v2, w } <-- w = v0 - v1 + v2
  ///   tri2 = { v3, v4, v5 }  tri3 = { v3, v5, x } <-- x = v3 - v4 + v5
  ///   etc.
-/// 
+///
  ///   PA outputs 3 simdvectors for each of the triangle vertices v0, v1, v2
  ///   where v0 contains all the first vertices for 8 triangles.
-/// 
+///
  ///     Result:
  ///      verts[0] = { v0, v0, v3, v3, v6, v6, v9, v9 }
  ///      verts[1] = { v1, v2, v4, v5, v7, v8, v10, v11 }
@@ -2358,20 +2454,18 @@ bool PaRectList0(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  ///
  /// @param pa - State for PA state machine.
  /// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList1(
-    PA_STATE_OPT& pa,
-    uint32_t slot,
-    simdvector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList1(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
-    // SIMD vectors a and b are the last two vertical outputs from the vertex shader.
+// SIMD vectors a and b are the last two vertical outputs from the vertex shader.
  #if USE_SIMD16_FRONTEND
      simdvector a;
      simdvector b;
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -2381,54 +2475,60 @@ bool PaRectList1(
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
              a[i] = _simd16_extract_ps(b_16[i], 0);
-            b[i] = _simd16_extract_ps(b_16[i], 1);;
+            b[i] = _simd16_extract_ps(b_16[i], 1);
+            ;
          }
      }
  
  #else
-    simdvector &a = PaGetSimdVector(pa, 0, slot);           // a[] = { v0, v1,  v2,  v3,  v4,  v5,  v6,  v7 }
-    simdvector &b = PaGetSimdVector(pa, 1, slot);           // b[] = { v8, v9, v10, v11, v12, v13, v14, v15 }
+    simdvector& a = PaGetSimdVector(pa, 0, slot); // a[] = { v0, v1,  v2,  v3,  v4,  v5,  v6,  v7 }
+    simdvector& b = PaGetSimdVector(pa, 1, slot); // b[] = { v8, v9, v10, v11, v12, v13, v14, v15 }
  
  #endif
      __m256 tmp0, tmp1, tmp2;
  
      // Loop over each component in the simdvector.
-    for(int i = 0; i < 4; ++i)
+    for (int i = 0; i < 4; ++i)
      {
-        simdvector& v0 = verts[0];                          // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
-        tmp0 = _mm256_permute2f128_ps(b[i], b[i], 0x01);    // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
-        v0[i] = _mm256_blend_ps(a[i], tmp0, 0x20);          //   v0 = {  v0,   *,   *,  v3,  *, v9,  v6,  * } where * is don't care.
-        tmp1  = _mm256_permute_ps(v0[i], 0xF0);             // tmp1 = {  v0,  v0,  v3,  v3,  *,  *,  *,  * }
-        v0[i] = _mm256_permute_ps(v0[i], 0x5A);             //   v0 = {   *,   *,   *,   *,  v6, v6, v9, v9 }
-        v0[i] = _mm256_blend_ps(tmp1, v0[i], 0xF0);         //   v0 = {  v0,  v0,  v3,  v3,  v6, v6, v9, v9 }
+        simdvector& v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
+        tmp0           = _mm256_permute2f128_ps(
+            b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
+        v0[i] = _mm256_blend_ps(
+            a[i],
+            tmp0,
+            0x20); //   v0 = {  v0,   *,   *,  v3,  *, v9,  v6, * } where * is don't care.
+        tmp1  = _mm256_permute_ps(v0[i], 0xF0); // tmp1 = {  v0,  v0,  v3,  v3,  *,  *,  *, * }
+        v0[i] = _mm256_permute_ps(v0[i], 0x5A); //   v0 = {   *,   *,   *,   *,  v6, v6, v9, v9 }
+        v0[i] =
+            _mm256_blend_ps(tmp1, v0[i], 0xF0); //   v0 = {  v0,  v0,  v3,  v3,  v6, v6, v9, v9 }
  
          /// NOTE This is a bit expensive due to conflicts between vertices in 'a' and 'b'.
          ///      AVX2 should make this much cheaper.
-        simdvector& v1 = verts[1];                          // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
-        v1[i] = _mm256_permute_ps(a[i], 0x09);              //   v1 = { v1, v2,  *,  *,  *, *,  *, * }
-        tmp1  = _mm256_permute_ps(a[i], 0x43);              // tmp1 = {  *,  *,  *,  *, v7, *, v4, v5 }
-        tmp2  = _mm256_blend_ps(v1[i], tmp1, 0xF0);         // tmp2 = { v1, v2,  *,  *, v7, *, v4, v5 }
-        tmp1  = _mm256_permute2f128_ps(tmp2, tmp2, 0x1);    // tmp1 = { v7,  *, v4,  v5, *  *,  *,  * }
-        v1[i] = _mm256_permute_ps(tmp0, 0xE0);              //   v1 = {  *,  *,  *,  *,  *, v8, v10, v11 }
-        v1[i] = _mm256_blend_ps(tmp2, v1[i], 0xE0);         //   v1 = { v1, v2,  *,  *, v7, v8, v10, v11 }
-        v1[i] = _mm256_blend_ps(v1[i], tmp1, 0x0C);         //   v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
+        simdvector& v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
+        v1[i]          = _mm256_permute_ps(a[i], 0x09);  //   v1 = { v1, v2,  *,  *,  *, *,  *, * }
+        tmp1           = _mm256_permute_ps(a[i], 0x43);  // tmp1 = {  *,  *,  *,  *, v7, *, v4, v5 }
+        tmp2  = _mm256_blend_ps(v1[i], tmp1, 0xF0);      // tmp2 = { v1, v2,  *,  *, v7, *, v4, v5 }
+        tmp1  = _mm256_permute2f128_ps(tmp2, tmp2, 0x1); // tmp1 = { v7,  *, v4,  v5, *, *,  *,  * }
+        v1[i] = _mm256_permute_ps(tmp0, 0xE0);      //   v1 = {  *,  *,  *,  *,  *, v8, v10, v11 }
+        v1[i] = _mm256_blend_ps(tmp2, v1[i], 0xE0); //   v1 = { v1, v2,  *,  *, v7, v8, v10, v11 }
+        v1[i] = _mm256_blend_ps(v1[i], tmp1, 0x0C); //   v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
  
          // verts[2] = { v2,  w, v5,  x, v8,  y, v11, z }
-        simdvector& v2 = verts[2];                          // verts[2] needs to be { v2,  w, v5,  x, v8,  y, v11, z }
-        v2[i] = _mm256_permute_ps(tmp0, 0x30);              //   v2 = { *, *, *, *, v8, *, v11, * }
-        tmp1  = _mm256_permute_ps(tmp2, 0x31);              // tmp1 = { v2, *, v5, *, *, *, *, * }
-        v2[i] = _mm256_blend_ps(tmp1, v2[i], 0xF0);
+        simdvector& v2 = verts[2]; // verts[2] needs to be { v2,  w, v5,  x, v8,  y, v11, z }
+        v2[i]          = _mm256_permute_ps(tmp0, 0x30); //   v2 = { *, *, *, *, v8, *, v11, * }
+        tmp1           = _mm256_permute_ps(tmp2, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
+        v2[i]          = _mm256_blend_ps(tmp1, v2[i], 0xF0);
  
          // Need to compute 4th implied vertex for the rectangle.
          tmp2  = _mm256_sub_ps(v0[i], v1[i]);
-        tmp2  = _mm256_add_ps(tmp2, v2[i]);                 // tmp2 = {  w,  *,  x, *, y,  *,  z,  * }
-        tmp2  = _mm256_permute_ps(tmp2, 0xA0);              // tmp2 = {  *,  w,  *, x, *,   y,  *,  z }
-        v2[i] = _mm256_blend_ps(v2[i], tmp2, 0xAA);         //   v2 = { v2,  w, v5, x, v8,  y, v11, z }
+        tmp2  = _mm256_add_ps(tmp2, v2[i]);         // tmp2 = {  w,  *,  x, *, y,  *,  z,  * }
+        tmp2  = _mm256_permute_ps(tmp2, 0xA0);      // tmp2 = {  *,  w,  *, x, *,   y,  *,  z }
+        v2[i] = _mm256_blend_ps(v2[i], tmp2, 0xAA); //   v2 = { v2,  w, v5, x, v8,  y, v11, z }
      }
  
      SetNextPaState(pa, PaRectList1, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
@@ -2440,11 +2540,9 @@ bool PaRectList1(
  ///        Not implemented unless there is a use case for more then 8 rects.
  /// @param pa - State for PA state machine.
  /// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList2(
-    PA_STATE_OPT& pa,
-    uint32_t slot,
-    simdvector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList2(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])
  {
      SWR_INVALID("Is rect list used for anything other then clears?");
      SetNextPaState(pa, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
@@ -2469,16 +2567,16 @@ bool PaRectList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  ///         | \ |      | \ |      | \ |       | \ |
  ///      v1 o---o   v4 o---o   v7 o---o   v10 o---o
  ///            v0         v3         v6          v9
-/// 
+///
  ///   Only 3 vertices of the rectangle are supplied. The 4th vertex is implied.
-/// 
+///
  ///   tri0 = { v0, v1, v2 }  tri1 = { v0, v2, w } <-- w = v0 - v1 + v2
  ///   tri2 = { v3, v4, v5 }  tri3 = { v3, v5, x } <-- x = v3 - v4 + v5
  ///   etc.
-/// 
+///
  ///   PA outputs 3 simdvectors for each of the triangle vertices v0, v1, v2
  ///   where v0 contains all the first vertices for 8 triangles.
-/// 
+///
  ///     Result:
  ///      verts[0] = { v0, v0, v3, v3, v6, v6, v9, v9 }
  ///      verts[1] = { v1, v2, v4, v5, v7, v8, v10, v11 }
@@ -2486,18 +2584,19 @@ bool PaRectList0_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  ///
  /// @param pa - State for PA state machine.
  /// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList1_simd16(
-    PA_STATE_OPT& pa,
-    uint32_t slot,
-    simd16vector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList1_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
+    // clang-format off
+
      simdvector a;
      simdvector b;
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot); // a[] = { v0, v1,  v2,  v3,  v4,  v5,  v6,  v7, v8, v9, v10, v11, v12, v13, v14, v15 }
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot); // a[] = { v0, v1,  v2,  v3,  v4,  v5,  v6,  v7,
+                                                                        //         v8, v9, v10, v11, v12, v13, v14, v15 }
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -2507,7 +2606,7 @@ bool PaRectList1_simd16(
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot); // b[] = { v16...but not used by this implementation.. }
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot); // b[] = { v16...but not used by this implementation.. }
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -2516,45 +2615,45 @@ bool PaRectList1_simd16(
          }
      }
  
-    simd16vector &v0 = verts[0];                            // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
-    simd16vector &v1 = verts[1];                            // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
-    simd16vector &v2 = verts[2];                            // verts[2] needs to be { v2,  w, v5,  x, v8,  y, v11, z }
+    simd16vector& v0 = verts[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6,  v9,  v9 }
+    simd16vector& v1 = verts[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
+    simd16vector& v2 = verts[2]; // verts[2] needs to be { v2,  w, v5,  x, v8,  y, v11,   z }
  
      // Loop over each component in the simdvector.
      for (int i = 0; i < 4; i += 1)
      {
-        simdscalar v0_lo;                                   // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
-        simdscalar v1_lo;                                   // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
-        simdscalar v2_lo;                                   // verts[2] needs to be { v2,  w, v5,  x, v8,  y, v11, z }
+        simdscalar v0_lo; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
+        simdscalar v1_lo; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
+        simdscalar v2_lo; // verts[2] needs to be { v2,  w, v5,  x, v8,  y, v11, z }
  
          __m256 tmp0, tmp1, tmp2;
  
-        tmp0 = _mm256_permute2f128_ps(b[i], b[i], 0x01);    // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
-        v0_lo = _mm256_blend_ps(a[i], tmp0, 0x20);          //   v0 = {  v0,   *,   *,  v3,  *, v9,  v6,  * } where * is don't care.
-        tmp1 = _mm256_permute_ps(v0_lo, 0xF0);              // tmp1 = {  v0,  v0,  v3,  v3,  *,  *,  *,  * }
-        v0_lo = _mm256_permute_ps(v0_lo, 0x5A);             //   v0 = {   *,   *,   *,   *,  v6, v6, v9, v9 }
-        v0_lo = _mm256_blend_ps(tmp1, v0_lo, 0xF0);         //   v0 = {  v0,  v0,  v3,  v3,  v6, v6, v9, v9 }
+        tmp0  = _mm256_permute2f128_ps(b[i], b[i], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
+        v0_lo = _mm256_blend_ps(a[i], tmp0, 0x20);        //   v0 = {  v0,   *,   *,  v3,  *, v9,  v6,   * } where * is don't care.
+        tmp1  = _mm256_permute_ps(v0_lo, 0xF0);           // tmp1 = {  v0,  v0,  v3,  v3,  *,  *,   *,   * }
+        v0_lo = _mm256_permute_ps(v0_lo, 0x5A);           //   v0 = {   *,   *,   *,   *,  v6, v6, v9,  v9 }
+        v0_lo = _mm256_blend_ps(tmp1, v0_lo, 0xF0);       //   v0 = {  v0,  v0,  v3,  v3,  v6, v6, v9,  v9 }
  
          /// NOTE This is a bit expensive due to conflicts between vertices in 'a' and 'b'.
          ///      AVX2 should make this much cheaper.
-        v1_lo = _mm256_permute_ps(a[i], 0x09);              //   v1 = { v1, v2,  *,  *,  *, *,  *, * }
-        tmp1 = _mm256_permute_ps(a[i], 0x43);               // tmp1 = {  *,  *,  *,  *, v7, *, v4, v5 }
-        tmp2 = _mm256_blend_ps(v1_lo, tmp1, 0xF0);          // tmp2 = { v1, v2,  *,  *, v7, *, v4, v5 }
-        tmp1 = _mm256_permute2f128_ps(tmp2, tmp2, 0x1);     // tmp1 = { v7,  *, v4,  v5, *  *,  *,  * }
-        v1_lo = _mm256_permute_ps(tmp0, 0xE0);              //   v1 = {  *,  *,  *,  *,  *, v8, v10, v11 }
-        v1_lo = _mm256_blend_ps(tmp2, v1_lo, 0xE0);         //   v1 = { v1, v2,  *,  *, v7, v8, v10, v11 }
-        v1_lo = _mm256_blend_ps(v1_lo, tmp1, 0x0C);         //   v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
+        v1_lo = _mm256_permute_ps(a[i], 0x09);            //   v1 = { v1, v2,  *,  *,  *,  *,   *,   * }
+        tmp1  = _mm256_permute_ps(a[i], 0x43);            // tmp1 = {  *,  *,  *,  *, v7,  *,  v4,  v5 }
+        tmp2  = _mm256_blend_ps(v1_lo, tmp1, 0xF0);       // tmp2 = { v1, v2,  *,  *, v7,  *,  v4,  v5 }
+        tmp1  = _mm256_permute2f128_ps(tmp2, tmp2, 0x1);  // tmp1 = { v7,  *, v4,  v5, *,  *,   *,   * }
+        v1_lo = _mm256_permute_ps(tmp0, 0xE0);            //   v1 = {  *,  *,  *,  *,  *, v8, v10, v11 }
+        v1_lo = _mm256_blend_ps(tmp2, v1_lo, 0xE0);       //   v1 = { v1, v2,  *,  *, v7, v8, v10, v11 }
+        v1_lo = _mm256_blend_ps(v1_lo, tmp1, 0x0C);       //   v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
  
          // verts[2] = { v2,  w, v5,  x, v8,  y, v11, z }
-        v2_lo = _mm256_permute_ps(tmp0, 0x30);              //   v2 = { *, *, *, *, v8, *, v11, * }
-        tmp1 = _mm256_permute_ps(tmp2, 0x31);               // tmp1 = { v2, *, v5, *, *, *, *, * }
+        v2_lo = _mm256_permute_ps(tmp0, 0x30);            //   v2 = { *,  *,  *, *, v8, *, v11, * }
+        tmp1  = _mm256_permute_ps(tmp2, 0x31);            // tmp1 = { v2, *, v5, *,  *, *,   *, * }
          v2_lo = _mm256_blend_ps(tmp1, v2_lo, 0xF0);
  
          // Need to compute 4th implied vertex for the rectangle.
-        tmp2 = _mm256_sub_ps(v0_lo, v1_lo);
-        tmp2 = _mm256_add_ps(tmp2, v2_lo);                  // tmp2 = {  w,  *,  x, *, y,  *,  z,  * }
-        tmp2 = _mm256_permute_ps(tmp2, 0xA0);               // tmp2 = {  *,  w,  *, x, *,   y,  *,  z }
-        v2_lo = _mm256_blend_ps(v2_lo, tmp2, 0xAA);         //   v2 = { v2,  w, v5, x, v8,  y, v11, z }
+        tmp2  = _mm256_sub_ps(v0_lo, v1_lo);
+        tmp2  = _mm256_add_ps(tmp2, v2_lo);               // tmp2 = {  w,  *,  x, *, y,  *,  z,  * }
+        tmp2  = _mm256_permute_ps(tmp2, 0xA0);            // tmp2 = {  *,  w,  *, x, *,  y,  *,  z }
+        v2_lo = _mm256_blend_ps(v2_lo, tmp2, 0xAA);       //   v2 = { v2,  w, v5, x, v8, y, v11, z }
  
          v0[i] = _simd16_insert_ps(_simd16_setzero_ps(), v0_lo, 0);
          v1[i] = _simd16_insert_ps(_simd16_setzero_ps(), v1_lo, 0);
@@ -2563,6 +2662,8 @@ bool PaRectList1_simd16(
  
      SetNextPaState_simd16(pa, PaRectList1_simd16, PaRectList1, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
      return true;
+
+    // clang-format on
  }
  
  //////////////////////////////////////////////////////////////////////////
@@ -2570,14 +2671,13 @@ bool PaRectList1_simd16(
  ///        Not implemented unless there is a use case for more then 8 rects.
  /// @param pa - State for PA state machine.
  /// @param slot - Index into VS output which is either a position (slot 0) or attribute.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-bool PaRectList2_simd16(
-    PA_STATE_OPT& pa,
-    uint32_t slot,
-    simd16vector verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+bool PaRectList2_simd16(PA_STATE_OPT& pa, uint32_t slot, simd16vector verts[])
  {
      SWR_INVALID("Is rect list used for anything other then clears?");
-    SetNextPaState_simd16(pa, PaRectList0_simd16, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
+    SetNextPaState_simd16(
+        pa, PaRectList0_simd16, PaRectList0, PaRectListSingle0, 0, PA_STATE_OPT::SIMD_WIDTH, true);
      return true;
  }
  
@@ -2591,23 +2691,20 @@ bool PaRectList2_simd16(
  /// @param pa - State for PA state machine.
  /// @param slot - Index into VS output for a given attribute.
  /// @param primIndex - Binner processes each triangle individually.
-/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
-void PaRectListSingle0(
-    PA_STATE_OPT& pa,
-    uint32_t slot,
-    uint32_t primIndex,
-    simd4scalar verts[])
+/// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1,
+/// etc.
+void PaRectListSingle0(PA_STATE_OPT& pa, uint32_t slot, uint32_t primIndex, simd4scalar verts[])
  {
-    // We have 12 simdscalars contained within 3 simdvectors which
-    // hold at least 8 triangles worth of data. We want to assemble a single
-    // triangle with data in horizontal form.
+// We have 12 simdscalars contained within 3 simdvectors which
+// hold at least 8 triangles worth of data. We want to assemble a single
+// triangle with data in horizontal form.
  #if USE_SIMD16_FRONTEND
      simdvector a;
      simdvector b;
  
      if (!pa.useAlternateOffset)
      {
-        const simd16vector &a_16 = PaGetSimdVector_simd16(pa, 0, slot);
+        const simd16vector& a_16 = PaGetSimdVector_simd16(pa, 0, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
@@ -2617,12 +2714,13 @@ void PaRectListSingle0(
      }
      else
      {
-        const simd16vector &b_16 = PaGetSimdVector_simd16(pa, 1, slot);
+        const simd16vector& b_16 = PaGetSimdVector_simd16(pa, 1, slot);
  
          for (uint32_t i = 0; i < 4; i += 1)
          {
              a[i] = _simd16_extract_ps(b_16[i], 0);
-            b[i] = _simd16_extract_ps(b_16[i], 1);;
+            b[i] = _simd16_extract_ps(b_16[i], 1);
+            ;
          }
      }
  
@@ -2631,7 +2729,7 @@ void PaRectListSingle0(
  
  #endif
      // Convert from vertical to horizontal.
-    switch(primIndex)
+    switch (primIndex)
      {
      case 0:
          verts[0] = swizzleLane0(a);
@@ -2654,10 +2752,17 @@ void PaRectListSingle0(
      };
  }
  
-PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT *in_pDC, uint32_t in_numPrims, uint8_t* pStream, uint32_t in_streamSizeInVerts, 
-    uint32_t in_vertexStride, bool in_isStreaming, uint32_t numVertsPerPrim, PRIMITIVE_TOPOLOGY topo) : 
-    PA_STATE(in_pDC, pStream, in_streamSizeInVerts, in_vertexStride, numVertsPerPrim), numPrims(in_numPrims), numPrimsComplete(0), numSimdPrims(0), 
-    cur(0), prev(0), first(0), counter(0), reset(false), pfnPaFunc(nullptr), isStreaming(in_isStreaming)
+PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT*      in_pDC,
+                           uint32_t           in_numPrims,
+                           uint8_t*           pStream,
+                           uint32_t           in_streamSizeInVerts,
+                           uint32_t           in_vertexStride,
+                           bool               in_isStreaming,
+                           uint32_t           numVertsPerPrim,
+                           PRIMITIVE_TOPOLOGY topo) :
+    PA_STATE(in_pDC, pStream, in_streamSizeInVerts, in_vertexStride, numVertsPerPrim),
+    numPrims(in_numPrims), numPrimsComplete(0), numSimdPrims(0), cur(0), prev(0), first(0),
+    counter(0), reset(false), pfnPaFunc(nullptr), isStreaming(in_isStreaming)
  {
      const API_STATE& state = GetApiState(pDC);
  
@@ -2669,271 +2774,271 @@ PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT *in_pDC, uint32_t in_numPrims, uint8_t*
  #endif
      switch (this->binTopology)
      {
-        case TOP_TRIANGLE_LIST:
-            this->pfnPaFunc = PaTriList0;
+    case TOP_TRIANGLE_LIST:
+        this->pfnPaFunc = PaTriList0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaTriList0_simd16;
+        this->pfnPaFunc_simd16 = PaTriList0_simd16;
  #endif
-            break;
-        case TOP_TRIANGLE_STRIP:
-            this->pfnPaFunc = PaTriStrip0;
+        break;
+    case TOP_TRIANGLE_STRIP:
+        this->pfnPaFunc = PaTriStrip0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
+        this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
  #endif
-            break;
-        case TOP_TRIANGLE_FAN:
-            this->pfnPaFunc = PaTriFan0;
+        break;
+    case TOP_TRIANGLE_FAN:
+        this->pfnPaFunc = PaTriFan0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaTriFan0_simd16;
+        this->pfnPaFunc_simd16 = PaTriFan0_simd16;
  #endif
-            break;
-        case TOP_QUAD_LIST:
-            this->pfnPaFunc = PaQuadList0;
+        break;
+    case TOP_QUAD_LIST:
+        this->pfnPaFunc = PaQuadList0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaQuadList0_simd16;
+        this->pfnPaFunc_simd16 = PaQuadList0_simd16;
  #endif
-            this->numPrims = in_numPrims * 2;    // Convert quad primitives into triangles
-            break;
-        case TOP_QUAD_STRIP:
-            // quad strip pattern when decomposed into triangles is the same as verts strips
-            this->pfnPaFunc = PaTriStrip0;
+        this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
+        break;
+    case TOP_QUAD_STRIP:
+        // quad strip pattern when decomposed into triangles is the same as verts strips
+        this->pfnPaFunc = PaTriStrip0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
+        this->pfnPaFunc_simd16 = PaTriStrip0_simd16;
  #endif
-            this->numPrims = in_numPrims * 2;    // Convert quad primitives into triangles
-            break;
-        case TOP_LINE_LIST:
-            this->pfnPaFunc = PaLineList0;
+        this->numPrims = in_numPrims * 2; // Convert quad primitives into triangles
+        break;
+    case TOP_LINE_LIST:
+        this->pfnPaFunc = PaLineList0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaLineList0_simd16;
+        this->pfnPaFunc_simd16 = PaLineList0_simd16;
  #endif
-            this->numPrims = in_numPrims;
-            break;
-        case TOP_LINE_STRIP:
-            this->pfnPaFunc = PaLineStrip0;
+        this->numPrims = in_numPrims;
+        break;
+    case TOP_LINE_STRIP:
+        this->pfnPaFunc = PaLineStrip0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaLineStrip0_simd16;
+        this->pfnPaFunc_simd16 = PaLineStrip0_simd16;
  #endif
-            this->numPrims = in_numPrims;
-            break;
-        case TOP_LINE_LOOP:
-            this->pfnPaFunc = PaLineLoop0;
+        this->numPrims = in_numPrims;
+        break;
+    case TOP_LINE_LOOP:
+        this->pfnPaFunc = PaLineLoop0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaLineLoop0_simd16;
+        this->pfnPaFunc_simd16 = PaLineLoop0_simd16;
  #endif
-            this->numPrims = in_numPrims;
-            break;
-        case TOP_POINT_LIST:
-            this->pfnPaFunc = PaPoints0;
+        this->numPrims = in_numPrims;
+        break;
+    case TOP_POINT_LIST:
+        this->pfnPaFunc = PaPoints0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPoints0_simd16;
+        this->pfnPaFunc_simd16 = PaPoints0_simd16;
  #endif
-            this->numPrims = in_numPrims;
-            break;
-        case TOP_RECT_LIST:
-            this->pfnPaFunc = PaRectList0;
+        this->numPrims = in_numPrims;
+        break;
+    case TOP_RECT_LIST:
+        this->pfnPaFunc = PaRectList0;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaRectList0_simd16;
+        this->pfnPaFunc_simd16 = PaRectList0_simd16;
  #endif
-            this->numPrims = in_numPrims * 2;
-            break;
+        this->numPrims = in_numPrims * 2;
+        break;
  
-        case TOP_PATCHLIST_1:
-            this->pfnPaFunc = PaPatchList<1>;
+    case TOP_PATCHLIST_1:
+        this->pfnPaFunc = PaPatchList<1>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<1>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<1>;
  #endif
-            break;
-        case TOP_PATCHLIST_2:
-            this->pfnPaFunc = PaPatchList<2>;
+        break;
+    case TOP_PATCHLIST_2:
+        this->pfnPaFunc = PaPatchList<2>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<2>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<2>;
  #endif
-            break;
-        case TOP_PATCHLIST_3:
-            this->pfnPaFunc = PaPatchList<3>;
+        break;
+    case TOP_PATCHLIST_3:
+        this->pfnPaFunc = PaPatchList<3>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<3>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<3>;
  #endif
-            break;
-        case TOP_PATCHLIST_4:
-            this->pfnPaFunc = PaPatchList<4>;
+        break;
+    case TOP_PATCHLIST_4:
+        this->pfnPaFunc = PaPatchList<4>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<4>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<4>;
  #endif
-            break;
-        case TOP_PATCHLIST_5:
-            this->pfnPaFunc = PaPatchList<5>;
+        break;
+    case TOP_PATCHLIST_5:
+        this->pfnPaFunc = PaPatchList<5>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<5>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<5>;
  #endif
-            break;
-        case TOP_PATCHLIST_6:
-            this->pfnPaFunc = PaPatchList<6>;
+        break;
+    case TOP_PATCHLIST_6:
+        this->pfnPaFunc = PaPatchList<6>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<6>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<6>;
  #endif
-            break;
-        case TOP_PATCHLIST_7:
-            this->pfnPaFunc = PaPatchList<7>;
+        break;
+    case TOP_PATCHLIST_7:
+        this->pfnPaFunc = PaPatchList<7>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<7>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<7>;
  #endif
-            break;
-        case TOP_PATCHLIST_8:
-            this->pfnPaFunc = PaPatchList<8>;
+        break;
+    case TOP_PATCHLIST_8:
+        this->pfnPaFunc = PaPatchList<8>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<8>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<8>;
  #endif
-            break;
-        case TOP_PATCHLIST_9:
-            this->pfnPaFunc = PaPatchList<9>;
+        break;
+    case TOP_PATCHLIST_9:
+        this->pfnPaFunc = PaPatchList<9>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<9>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<9>;
  #endif
-            break;
-        case TOP_PATCHLIST_10:
-            this->pfnPaFunc = PaPatchList<10>;
+        break;
+    case TOP_PATCHLIST_10:
+        this->pfnPaFunc = PaPatchList<10>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<10>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<10>;
  #endif
-            break;
-        case TOP_PATCHLIST_11:
-            this->pfnPaFunc = PaPatchList<11>;
+        break;
+    case TOP_PATCHLIST_11:
+        this->pfnPaFunc = PaPatchList<11>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<11>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<11>;
  #endif
-            break;
-        case TOP_PATCHLIST_12:
-            this->pfnPaFunc = PaPatchList<12>;
+        break;
+    case TOP_PATCHLIST_12:
+        this->pfnPaFunc = PaPatchList<12>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<12>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<12>;
  #endif
-            break;
-        case TOP_PATCHLIST_13:
-            this->pfnPaFunc = PaPatchList<13>;
+        break;
+    case TOP_PATCHLIST_13:
+        this->pfnPaFunc = PaPatchList<13>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<13>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<13>;
  #endif
-            break;
-        case TOP_PATCHLIST_14:
-            this->pfnPaFunc = PaPatchList<14>;
+        break;
+    case TOP_PATCHLIST_14:
+        this->pfnPaFunc = PaPatchList<14>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<14>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<14>;
  #endif
-            break;
-        case TOP_PATCHLIST_15:
-            this->pfnPaFunc = PaPatchList<15>;
+        break;
+    case TOP_PATCHLIST_15:
+        this->pfnPaFunc = PaPatchList<15>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<15>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<15>;
  #endif
-            break;
-        case TOP_PATCHLIST_16:
-            this->pfnPaFunc = PaPatchList<16>;
+        break;
+    case TOP_PATCHLIST_16:
+        this->pfnPaFunc = PaPatchList<16>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<16>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<16>;
  #endif
-            break;
-        case TOP_PATCHLIST_17:
-            this->pfnPaFunc = PaPatchList<17>;
+        break;
+    case TOP_PATCHLIST_17:
+        this->pfnPaFunc = PaPatchList<17>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<17>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<17>;
  #endif
-            break;
-        case TOP_PATCHLIST_18:
-            this->pfnPaFunc = PaPatchList<18>;
+        break;
+    case TOP_PATCHLIST_18:
+        this->pfnPaFunc = PaPatchList<18>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<18>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<18>;
  #endif
-            break;
-        case TOP_PATCHLIST_19:
-            this->pfnPaFunc = PaPatchList<19>;
+        break;
+    case TOP_PATCHLIST_19:
+        this->pfnPaFunc = PaPatchList<19>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<19>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<19>;
  #endif
-            break;
-        case TOP_PATCHLIST_20:
-            this->pfnPaFunc = PaPatchList<20>;
+        break;
+    case TOP_PATCHLIST_20:
+        this->pfnPaFunc = PaPatchList<20>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<20>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<20>;
  #endif
-            break;
-        case TOP_PATCHLIST_21:
-            this->pfnPaFunc = PaPatchList<21>;
+        break;
+    case TOP_PATCHLIST_21:
+        this->pfnPaFunc = PaPatchList<21>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<21>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<21>;
  #endif
-            break;
-        case TOP_PATCHLIST_22:
-            this->pfnPaFunc = PaPatchList<22>;
+        break;
+    case TOP_PATCHLIST_22:
+        this->pfnPaFunc = PaPatchList<22>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<22>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<22>;
  #endif
-            break;
-        case TOP_PATCHLIST_23:
-            this->pfnPaFunc = PaPatchList<23>;
+        break;
+    case TOP_PATCHLIST_23:
+        this->pfnPaFunc = PaPatchList<23>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<23>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<23>;
  #endif
-            break;
-        case TOP_PATCHLIST_24:
-            this->pfnPaFunc = PaPatchList<24>;
+        break;
+    case TOP_PATCHLIST_24:
+        this->pfnPaFunc = PaPatchList<24>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<24>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<24>;
  #endif
-            break;
-        case TOP_PATCHLIST_25:
-            this->pfnPaFunc = PaPatchList<25>;
+        break;
+    case TOP_PATCHLIST_25:
+        this->pfnPaFunc = PaPatchList<25>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<25>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<25>;
  #endif
-            break;
-        case TOP_PATCHLIST_26:
-            this->pfnPaFunc = PaPatchList<26>;
+        break;
+    case TOP_PATCHLIST_26:
+        this->pfnPaFunc = PaPatchList<26>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<26>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<26>;
  #endif
-            break;
-        case TOP_PATCHLIST_27:
-            this->pfnPaFunc = PaPatchList<27>;
+        break;
+    case TOP_PATCHLIST_27:
+        this->pfnPaFunc = PaPatchList<27>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<27>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<27>;
  #endif
-            break;
-        case TOP_PATCHLIST_28:
-            this->pfnPaFunc = PaPatchList<28>;
+        break;
+    case TOP_PATCHLIST_28:
+        this->pfnPaFunc = PaPatchList<28>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<28>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<28>;
  #endif
-            break;
-        case TOP_PATCHLIST_29:
-            this->pfnPaFunc = PaPatchList<29>;
+        break;
+    case TOP_PATCHLIST_29:
+        this->pfnPaFunc = PaPatchList<29>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<29>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<29>;
  #endif
-            break;
-        case TOP_PATCHLIST_30:
-            this->pfnPaFunc = PaPatchList<30>;
+        break;
+    case TOP_PATCHLIST_30:
+        this->pfnPaFunc = PaPatchList<30>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<30>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<30>;
  #endif
-            break;
-        case TOP_PATCHLIST_31:
-            this->pfnPaFunc = PaPatchList<31>;
+        break;
+    case TOP_PATCHLIST_31:
+        this->pfnPaFunc = PaPatchList<31>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<31>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<31>;
  #endif
-            break;
-        case TOP_PATCHLIST_32:
-            this->pfnPaFunc = PaPatchList<32>;
+        break;
+    case TOP_PATCHLIST_32:
+        this->pfnPaFunc = PaPatchList<32>;
  #if ENABLE_AVX512_SIMD16
-            this->pfnPaFunc_simd16 = PaPatchList_simd16<32>;
+        this->pfnPaFunc_simd16 = PaPatchList_simd16<32>;
  #endif
-            break;
+        break;
  
-        default:
-            SWR_INVALID("Invalid topology: %d", this->binTopology);
-            break;
+    default:
+        SWR_INVALID("Invalid topology: %d", this->binTopology);
+        break;
      };
  
      this->pfnPaFuncReset = this->pfnPaFunc;
@@ -2943,95 +3048,94 @@ PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT *in_pDC, uint32_t in_numPrims, uint8_t*
  
  #if USE_SIMD16_FRONTEND
      simd16scalari id16 = _simd16_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-    simd16scalari id82 = _simd16_set_epi32( 7,  7,  6,  6,  5,  5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0);
+    simd16scalari id82 = _simd16_set_epi32(7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0);
  
  #else
      simdscalari id8 = _simd_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
      simdscalari id4 = _simd_set_epi32(3, 3, 2, 2, 1, 1, 0, 0);
  
  #endif
-    switch(this->binTopology)
+    switch (this->binTopology)
      {
-        case TOP_TRIANGLE_LIST:
-        case TOP_TRIANGLE_STRIP:
-        case TOP_TRIANGLE_FAN:
-        case TOP_LINE_STRIP:
-        case TOP_LINE_LIST:
-        case TOP_LINE_LOOP:
+    case TOP_TRIANGLE_LIST:
+    case TOP_TRIANGLE_STRIP:
+    case TOP_TRIANGLE_FAN:
+    case TOP_LINE_STRIP:
+    case TOP_LINE_LIST:
+    case TOP_LINE_LOOP:
  #if USE_SIMD16_FRONTEND
-            this->primIDIncr = 16;
-            this->primID = id16;
+        this->primIDIncr = 16;
+        this->primID     = id16;
  #else
-            this->primIDIncr = 8;
-            this->primID = id8;
+        this->primIDIncr = 8;
+        this->primID = id8;
  #endif
-            break;
-        case TOP_QUAD_LIST:
-        case TOP_QUAD_STRIP:
-        case TOP_RECT_LIST:
+        break;
+    case TOP_QUAD_LIST:
+    case TOP_QUAD_STRIP:
+    case TOP_RECT_LIST:
  #if USE_SIMD16_FRONTEND
-            this->primIDIncr = 8;
-            this->primID = id82;
+        this->primIDIncr = 8;
+        this->primID     = id82;
  #else
-            this->primIDIncr = 4;
-            this->primID = id4;
+        this->primIDIncr = 4;
+        this->primID = id4;
  #endif
-            break;
-        case TOP_POINT_LIST:
+        break;
+    case TOP_POINT_LIST:
  #if USE_SIMD16_FRONTEND
-            this->primIDIncr = 16;
-            this->primID = id16;
+        this->primIDIncr = 16;
+        this->primID     = id16;
  #else
-            this->primIDIncr = 8;
-            this->primID = id8;
-#endif
-            break;
-        case TOP_PATCHLIST_1:
-        case TOP_PATCHLIST_2:
-        case TOP_PATCHLIST_3:
-        case TOP_PATCHLIST_4:
-        case TOP_PATCHLIST_5:
-        case TOP_PATCHLIST_6:
-        case TOP_PATCHLIST_7:
-        case TOP_PATCHLIST_8:
-        case TOP_PATCHLIST_9:
-        case TOP_PATCHLIST_10:
-        case TOP_PATCHLIST_11:
-        case TOP_PATCHLIST_12:
-        case TOP_PATCHLIST_13:
-        case TOP_PATCHLIST_14:
-        case TOP_PATCHLIST_15:
-        case TOP_PATCHLIST_16:
-        case TOP_PATCHLIST_17:
-        case TOP_PATCHLIST_18:
-        case TOP_PATCHLIST_19:
-        case TOP_PATCHLIST_20:
-        case TOP_PATCHLIST_21:
-        case TOP_PATCHLIST_22:
-        case TOP_PATCHLIST_23:
-        case TOP_PATCHLIST_24:
-        case TOP_PATCHLIST_25:
-        case TOP_PATCHLIST_26:
-        case TOP_PATCHLIST_27:
-        case TOP_PATCHLIST_28:
-        case TOP_PATCHLIST_29:
-        case TOP_PATCHLIST_30:
-        case TOP_PATCHLIST_31:
-        case TOP_PATCHLIST_32:
-            // Always run KNOB_SIMD_WIDTH number of patches at a time.
+        this->primIDIncr = 8;
+        this->primID = id8;
+#endif
+        break;
+    case TOP_PATCHLIST_1:
+    case TOP_PATCHLIST_2:
+    case TOP_PATCHLIST_3:
+    case TOP_PATCHLIST_4:
+    case TOP_PATCHLIST_5:
+    case TOP_PATCHLIST_6:
+    case TOP_PATCHLIST_7:
+    case TOP_PATCHLIST_8:
+    case TOP_PATCHLIST_9:
+    case TOP_PATCHLIST_10:
+    case TOP_PATCHLIST_11:
+    case TOP_PATCHLIST_12:
+    case TOP_PATCHLIST_13:
+    case TOP_PATCHLIST_14:
+    case TOP_PATCHLIST_15:
+    case TOP_PATCHLIST_16:
+    case TOP_PATCHLIST_17:
+    case TOP_PATCHLIST_18:
+    case TOP_PATCHLIST_19:
+    case TOP_PATCHLIST_20:
+    case TOP_PATCHLIST_21:
+    case TOP_PATCHLIST_22:
+    case TOP_PATCHLIST_23:
+    case TOP_PATCHLIST_24:
+    case TOP_PATCHLIST_25:
+    case TOP_PATCHLIST_26:
+    case TOP_PATCHLIST_27:
+    case TOP_PATCHLIST_28:
+    case TOP_PATCHLIST_29:
+    case TOP_PATCHLIST_30:
+    case TOP_PATCHLIST_31:
+    case TOP_PATCHLIST_32:
+        // Always run KNOB_SIMD_WIDTH number of patches at a time.
  #if USE_SIMD16_FRONTEND
-            this->primIDIncr = 16;
-            this->primID = id16;
+        this->primIDIncr = 16;
+        this->primID     = id16;
  #else
-            this->primIDIncr = 8;
-            this->primID = id8;
+        this->primIDIncr = 8;
+        this->primID = id8;
  #endif
-            break;
+        break;
  
-        default:
-            SWR_INVALID("Invalid topology: %d", this->binTopology);
-            break;
+    default:
+        SWR_INVALID("Invalid topology: %d", this->binTopology);
+        break;
      };
-
  }
  #endif
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp

index 67c28ad97c43202c6a9f45d11f129bc08018f4b8..a392035700dd07ad89314441ffae1a7cf7f1661c 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rasterizer.cpp
-*
-* @brief Implementation for the rasterizer.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rasterizer.cpp
+ *
+ * @brief Implementation for the rasterizer.
+ *
+ ******************************************************************************/
  
  #include <vector>
  #include <algorithm>
@@ -39,11 +39,12 @@
  #include "memory/tilingtraits.h"
  #include "rasterizer_impl.h"
  
-PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
+PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT]
+                              [STATE_VALID_TRI_EDGE_COUNT][2];
  
-void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
+void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
  {
-    const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData);
+    const TRIANGLE_WORK_DESC& workDesc = *((TRIANGLE_WORK_DESC*)pData);
  #if KNOB_ENABLE_TOSS_POINTS
      if (KNOB_TOSS_BIN_TRIS)
      {
@@ -54,23 +55,24 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
      // bloat line to two tris and call the triangle rasterizer twice
      RDTSC_BEGIN(BERasterizeLine, pDC->drawId);
  
-    const API_STATE &state = GetApiState(pDC);
-    const SWR_RASTSTATE &rastState = state.rastState;
+    const API_STATE&     state     = GetApiState(pDC);
+    const SWR_RASTSTATE& rastState = state.rastState;
  
      // macrotile dimensioning
      uint32_t macroX, macroY;
      MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
-    int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
-    int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
-    int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
+    int32_t macroBoxLeft   = macroX * KNOB_MACROTILE_X_DIM_FIXED;
+    int32_t macroBoxRight  = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
+    int32_t macroBoxTop    = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
      int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
  
-    const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
+    const SWR_RECT& scissorInFixedPoint =
+        state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
  
      // create a copy of the triangle buffer to write our adjusted vertices to
      OSALIGNSIMD(float) newTriBuffer[4 * 4];
      TRIANGLE_WORK_DESC newWorkDesc = workDesc;
-    newWorkDesc.pTriBuffer = &newTriBuffer[0];
+    newWorkDesc.pTriBuffer         = &newTriBuffer[0];
  
      // create a copy of the attrib buffer to write our adjusted attribs to
      OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
@@ -81,20 +83,20 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
  
      __m128 vX, vY, vZ, vRecipW;
  
-    vX = _mm_load_ps(workDesc.pTriBuffer);
-    vY = _mm_load_ps(workDesc.pTriBuffer + 4);
-    vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
+    vX      = _mm_load_ps(workDesc.pTriBuffer);
+    vY      = _mm_load_ps(workDesc.pTriBuffer + 4);
+    vZ      = _mm_load_ps(workDesc.pTriBuffer + 8);
      vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
  
      // triangle 0
      // v0,v1 -> v0,v0,v1
-    __m128 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0));
-    __m128 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0));
-    __m128 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0));
+    __m128 vXa      = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0));
+    __m128 vYa      = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0));
+    __m128 vZa      = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0));
      __m128 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 1, 0, 0));
  
      __m128 vLineWidth = _mm_set1_ps(pDC->pState->state.rastState.lineWidth);
-    __m128 vAdjust = _mm_mul_ps(vLineWidth, vBloat0);
+    __m128 vAdjust    = _mm_mul_ps(vLineWidth, vBloat0);
      if (workDesc.triFlags.yMajor)
      {
          vXa = _mm_add_ps(vAdjust, vXa);
@@ -123,7 +125,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
      }
  
      // Store user clip distances for triangle 0
-    float newClipBuffer[3 * 8];
+    float    newClipBuffer[3 * 8];
      uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
      if (numClipDist)
      {
@@ -151,8 +153,12 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
      // setup triangle rasterizer function
      PFN_WORK_FUNC pfnTriRast;
      // conservative rast not supported for points/lines
-    pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
-        SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false));
+    pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
+                                   rastState.bIsCenterPattern,
+                                   false,
+                                   SWR_INPUT_COVERAGE_NONE,
+                                   EdgeValToEdgeState(ALL_EDGES_VALID),
+                                   (pDC->pState->state.scissorsTileAligned == false));
  
      // make sure this macrotile intersects the triangle
      __m128i vXai = fpToFixedPoint(vXa);
@@ -160,23 +166,20 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
      OSALIGNSIMD(SWR_RECT) bboxA;
      calcBoundingBoxInt(vXai, vYai, bboxA);
  
-    if (!(bboxA.xmin > macroBoxRight ||
-        bboxA.xmin > scissorInFixedPoint.xmax ||
-        bboxA.xmax - 1 < macroBoxLeft ||
-        bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
-        bboxA.ymin > macroBoxBottom ||
-        bboxA.ymin > scissorInFixedPoint.ymax ||
-        bboxA.ymax - 1 < macroBoxTop ||
-        bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
+    if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
+          bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
+          bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
+          bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
+    {
          // rasterize triangle
          pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
      }
  
      // triangle 1
      // v0,v1 -> v1,v1,v0
-    vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1));
-    vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1));
-    vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1));
+    vXa      = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1));
+    vYa      = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1));
+    vZa      = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1));
      vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 0, 1, 1));
  
      vAdjust = _mm_mul_ps(vLineWidth, vBloat1);
@@ -233,14 +236,11 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
      vYai = fpToFixedPoint(vYa);
      calcBoundingBoxInt(vXai, vYai, bboxA);
  
-    if (!(bboxA.xmin > macroBoxRight ||
-        bboxA.xmin > scissorInFixedPoint.xmax ||
-        bboxA.xmax - 1 < macroBoxLeft ||
-        bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
-        bboxA.ymin > macroBoxBottom ||
-        bboxA.ymin > scissorInFixedPoint.ymax ||
-        bboxA.ymax - 1 < macroBoxTop ||
-        bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
+    if (!(bboxA.xmin > macroBoxRight || bboxA.xmin > scissorInFixedPoint.xmax ||
+          bboxA.xmax - 1 < macroBoxLeft || bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
+          bboxA.ymin > macroBoxBottom || bboxA.ymin > scissorInFixedPoint.ymax ||
+          bboxA.ymax - 1 < macroBoxTop || bboxA.ymax - 1 < scissorInFixedPoint.ymin))
+    {
          // rasterize triangle
          pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
      }
@@ -248,7 +248,7 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
      RDTSC_BEGIN(BERasterizeLine, 1);
  }
  
-void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
+void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
  {
  #if KNOB_ENABLE_TOSS_POINTS
      if (KNOB_TOSS_BIN_TRIS)
@@ -257,21 +257,19 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
      }
  #endif
  
-    const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
-    const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
+    const TRIANGLE_WORK_DESC& workDesc     = *(const TRIANGLE_WORK_DESC*)pData;
+    const BACKEND_FUNCS&      backendFuncs = pDC->pState->backendFuncs;
  
-    // map x,y relative offsets from start of raster tile to bit position in 
+    // map x,y relative offsets from start of raster tile to bit position in
      // coverage mask for the point
-    static const uint32_t coverageMap[8][8] = {
-        { 0, 1, 4, 5, 8, 9, 12, 13 },
-        { 2, 3, 6, 7, 10, 11, 14, 15 },
-        { 16, 17, 20, 21, 24, 25, 28, 29 },
-        { 18, 19, 22, 23, 26, 27, 30, 31 },
-        { 32, 33, 36, 37, 40, 41, 44, 45 },
-        { 34, 35, 38, 39, 42, 43, 46, 47 },
-        { 48, 49, 52, 53, 56, 57, 60, 61 },
-        { 50, 51, 54, 55, 58, 59, 62, 63 }
-    };
+    static const uint32_t coverageMap[8][8] = {{0, 1, 4, 5, 8, 9, 12, 13},
+                                               {2, 3, 6, 7, 10, 11, 14, 15},
+                                               {16, 17, 20, 21, 24, 25, 28, 29},
+                                               {18, 19, 22, 23, 26, 27, 30, 31},
+                                               {32, 33, 36, 37, 40, 41, 44, 45},
+                                               {34, 35, 38, 39, 42, 43, 46, 47},
+                                               {48, 49, 52, 53, 56, 57, 60, 61},
+                                               {50, 51, 54, 55, 58, 59, 62, 63}};
  
      OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
  
@@ -279,7 +277,7 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
      // @todo use structs for readability
      uint32_t tileAlignedX = *(uint32_t*)workDesc.pTriBuffer;
      uint32_t tileAlignedY = *(uint32_t*)(workDesc.pTriBuffer + 1);
-    float z = *(workDesc.pTriBuffer + 2);
+    float    z            = *(workDesc.pTriBuffer + 2);
  
      // construct triangle descriptor for point
      // no interpolation, set up i,j for constant interpolation of z and attribs
@@ -294,27 +292,32 @@ void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTi
  
      // no persp divide needed for points
      triDesc.pAttribs = triDesc.pPerspAttribs = workDesc.pAttribs;
-    triDesc.triFlags = workDesc.triFlags;
-    triDesc.recipDet = 1.0f;
+    triDesc.triFlags                         = workDesc.triFlags;
+    triDesc.recipDet                         = 1.0f;
      triDesc.OneOverW[0] = triDesc.OneOverW[1] = triDesc.OneOverW[2] = 1.0f;
      triDesc.I[0] = triDesc.I[1] = triDesc.I[2] = 0.0f;
      triDesc.J[0] = triDesc.J[1] = triDesc.J[2] = 0.0f;
      triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
  
      RenderOutputBuffers renderBuffers;
-    GetRenderHotTiles(pDC, workerId, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT, 
-        renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
+    GetRenderHotTiles(pDC,
+                      workerId,
+                      macroTile,
+                      tileAlignedX >> KNOB_TILE_X_DIM_SHIFT,
+                      tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
+                      renderBuffers,
+                      triDesc.triFlags.renderTargetArrayIndex);
  
      RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
      backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
      RDTSC_END(BEPixelBackend, 0);
  }
  
-void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
+void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
  {
-    const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
-    const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
-    const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
+    const TRIANGLE_WORK_DESC& workDesc     = *(const TRIANGLE_WORK_DESC*)pData;
+    const SWR_RASTSTATE&      rastState    = pDC->pState->state.rastState;
+    const SWR_BACKEND_STATE&  backendState = pDC->pState->state.backendState;
  
      bool isPointSpriteTexCoordEnabled = backendState.pointSpriteTexCoordMask != 0;
  
@@ -326,28 +329,28 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
      // create a copy of the triangle buffer to write our adjusted vertices to
      OSALIGNSIMD(float) newTriBuffer[4 * 4];
      TRIANGLE_WORK_DESC newWorkDesc = workDesc;
-    newWorkDesc.pTriBuffer = &newTriBuffer[0];
+    newWorkDesc.pTriBuffer         = &newTriBuffer[0];
  
      // create a copy of the attrib buffer to write our adjusted attribs to
      OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
      newWorkDesc.pAttribs = &newAttribBuffer[0];
  
      newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
-    newWorkDesc.numAttribs = workDesc.numAttribs;
-    newWorkDesc.triFlags = workDesc.triFlags;
+    newWorkDesc.numAttribs      = workDesc.numAttribs;
+    newWorkDesc.triFlags        = workDesc.triFlags;
  
      // construct two tris by bloating point by point size
      float halfPointSize = workDesc.triFlags.pointSize * 0.5f;
-    float lowerX = x - halfPointSize;
-    float upperX = x + halfPointSize;
-    float lowerY = y - halfPointSize;
-    float upperY = y + halfPointSize;
+    float lowerX        = x - halfPointSize;
+    float upperX        = x + halfPointSize;
+    float lowerY        = y - halfPointSize;
+    float upperY        = y + halfPointSize;
  
      // tri 0
-    float *pBuf = &newTriBuffer[0];
-    *pBuf++ = lowerX;
-    *pBuf++ = lowerX;
-    *pBuf++ = upperX;
+    float* pBuf = &newTriBuffer[0];
+    *pBuf++     = lowerX;
+    *pBuf++     = lowerX;
+    *pBuf++     = upperX;
      pBuf++;
      *pBuf++ = lowerY;
      *pBuf++ = upperY;
@@ -359,8 +362,12 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
      // setup triangle rasterizer function
      PFN_WORK_FUNC pfnTriRast;
      // conservative rast not supported for points/lines
-    pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
-        SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false));
+    pfnTriRast = GetRasterizerFunc(rastState.sampleCount,
+                                   rastState.bIsCenterPattern,
+                                   false,
+                                   SWR_INPUT_COVERAGE_NONE,
+                                   EdgeValToEdgeState(ALL_EDGES_VALID),
+                                   (pDC->pState->state.scissorsTileAligned == false));
  
      // overwrite texcoords for point sprites
      if (isPointSpriteTexCoordEnabled)
@@ -370,8 +377,8 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
          newWorkDesc.pAttribs = &newAttribBuffer[0];
  
          // overwrite texcoord for point sprites
-        uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
-        DWORD texCoordAttrib = 0;
+        uint32_t texCoordMask   = backendState.pointSpriteTexCoordMask;
+        DWORD    texCoordAttrib = 0;
  
          while (_BitScanForward(&texCoordAttrib, texCoordMask))
          {
@@ -400,7 +407,7 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
      pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
  
      // tri 1
-    pBuf = &newTriBuffer[0];
+    pBuf    = &newTriBuffer[0];
      *pBuf++ = lowerX;
      *pBuf++ = upperX;
      *pBuf++ = upperX;
@@ -412,8 +419,8 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
  
      if (isPointSpriteTexCoordEnabled)
      {
-        uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
-        DWORD texCoordAttrib = 0;
+        uint32_t texCoordMask   = backendState.pointSpriteTexCoordMask;
+        DWORD    texCoordAttrib = 0;
  
          while (_BitScanForward(&texCoordAttrib, texCoordMask))
          {
@@ -424,7 +431,6 @@ void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile,
                  pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
                  pTexAttrib[1] = _mm_set_ps(1, 0, 1, 1);
                  pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
-
              }
              else
              {
@@ -444,20 +450,19 @@ void InitRasterizerFunctions()
  }
  
  // Selector for correct templated RasterizeTriangle function
-PFN_WORK_FUNC GetRasterizerFunc(
-    SWR_MULTISAMPLE_COUNT numSamples,
-    bool IsCenter,
-    bool IsConservative,
-    SWR_INPUT_COVERAGE InputCoverage,
-    uint32_t EdgeEnable,
-    bool RasterizeScissorEdges
-)
+PFN_WORK_FUNC GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples,
+                                bool                  IsCenter,
+                                bool                  IsConservative,
+                                SWR_INPUT_COVERAGE    InputCoverage,
+                                uint32_t              EdgeEnable,
+                                bool                  RasterizeScissorEdges)
  {
      SWR_ASSERT(numSamples >= 0 && numSamples < SWR_MULTISAMPLE_TYPE_COUNT);
      SWR_ASSERT(InputCoverage >= 0 && InputCoverage < SWR_INPUT_COVERAGE_COUNT);
      SWR_ASSERT(EdgeEnable < STATE_VALID_TRI_EDGE_COUNT);
  
-    PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage][EdgeEnable][RasterizeScissorEdges];
+    PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage]
+                                         [EdgeEnable][RasterizeScissorEdges];
      SWR_ASSERT(func);
  
      return func;
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h

index 414d0f078196333a4eed9e32e16ffdcdf8247b60..f15cc1931295cc256de1375d0aedf3e72c17c3e7 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rasterizer.h
-*
-* @brief Definitions for the rasterizer.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rasterizer.h
+ *
+ * @brief Definitions for the rasterizer.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "context.h"
@@ -32,9 +32,9 @@
  #include "conservativeRast.h"
  #include "multisample.h"
  
-void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
-void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData);
+void RasterizeLine(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
+void RasterizeSimplePoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
+void RasterizeTriPoint(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData);
  void InitRasterizerFunctions();
  
  INLINE
@@ -56,43 +56,43 @@ enum TriEdgesStates
  
  enum TriEdgesValues
  {
-    NO_VALID_EDGES = 0,
-    E0_E1_VALID = 0x3,
-    E0_E2_VALID = 0x5,
-    E1_E2_VALID = 0x6,
+    NO_VALID_EDGES  = 0,
+    E0_E1_VALID     = 0x3,
+    E0_E2_VALID     = 0x5,
+    E1_E2_VALID     = 0x6,
      ALL_EDGES_VALID = 0x7,
      VALID_TRI_EDGE_COUNT,
  };
  
  // Selector for correct templated RasterizeTriangle function
-PFN_WORK_FUNC GetRasterizerFunc(
-    SWR_MULTISAMPLE_COUNT numSamples,
-    bool IsCenter,
-    bool IsConservative,
-    SWR_INPUT_COVERAGE InputCoverage,
-    uint32_t EdgeEnable,
-    bool RasterizeScissorEdges);
+PFN_WORK_FUNC GetRasterizerFunc(SWR_MULTISAMPLE_COUNT numSamples,
+                                bool                  IsCenter,
+                                bool                  IsConservative,
+                                SWR_INPUT_COVERAGE    InputCoverage,
+                                uint32_t              EdgeEnable,
+                                bool                  RasterizeScissorEdges);
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief ValidTriEdges convenience typedefs used for templated function 
+/// @brief ValidTriEdges convenience typedefs used for templated function
  /// specialization supported Fixed Point precisions
  typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> AllEdgesValidT;
-typedef std::integral_constant<uint32_t, E0_E1_VALID> E0E1ValidT;
-typedef std::integral_constant<uint32_t, E0_E2_VALID> E0E2ValidT;
-typedef std::integral_constant<uint32_t, E1_E2_VALID> E1E2ValidT;
-typedef std::integral_constant<uint32_t, NO_VALID_EDGES> NoEdgesValidT;
+typedef std::integral_constant<uint32_t, E0_E1_VALID>     E0E1ValidT;
+typedef std::integral_constant<uint32_t, E0_E2_VALID>     E0E2ValidT;
+typedef std::integral_constant<uint32_t, E1_E2_VALID>     E1E2ValidT;
+typedef std::integral_constant<uint32_t, NO_VALID_EDGES>  NoEdgesValidT;
  
  typedef std::integral_constant<uint32_t, STATE_ALL_EDGES_VALID> StateAllEdgesValidT;
-typedef std::integral_constant<uint32_t, STATE_E0_E1_VALID> StateE0E1ValidT;
-typedef std::integral_constant<uint32_t, STATE_E0_E2_VALID> StateE0E2ValidT;
-typedef std::integral_constant<uint32_t, STATE_E1_E2_VALID> StateE1E2ValidT;
-typedef std::integral_constant<uint32_t, STATE_NO_VALID_EDGES> StateNoEdgesValidT;
+typedef std::integral_constant<uint32_t, STATE_E0_E1_VALID>     StateE0E1ValidT;
+typedef std::integral_constant<uint32_t, STATE_E0_E2_VALID>     StateE0E2ValidT;
+typedef std::integral_constant<uint32_t, STATE_E1_E2_VALID>     StateE1E2ValidT;
+typedef std::integral_constant<uint32_t, STATE_NO_VALID_EDGES>  StateNoEdgesValidT;
  
  // some specializations to convert from edge state to edge bitmask values
  template <typename EdgeMask>
  struct EdgeMaskVal
  {
-    static_assert(EdgeMask::value > STATE_ALL_EDGES_VALID, "Primary EdgeMaskVal shouldn't be instantiated");
+    static_assert(EdgeMask::value > STATE_ALL_EDGES_VALID,
+                  "Primary EdgeMaskVal shouldn't be instantiated");
  };
  
  template <>
@@ -128,15 +128,15 @@ struct EdgeMaskVal<StateNoEdgesValidT>
  INLINE uint32_t EdgeValToEdgeState(uint32_t val)
  {
      SWR_ASSERT(val < VALID_TRI_EDGE_COUNT, "Unexpected tri edge mask");
-    static const uint32_t edgeValToEdgeState[VALID_TRI_EDGE_COUNT] = { 0, 0, 0, 1, 0, 2, 3, 4 };
-    return  edgeValToEdgeState[val];
+    static const uint32_t edgeValToEdgeState[VALID_TRI_EDGE_COUNT] = {0, 0, 0, 1, 0, 2, 3, 4};
+    return edgeValToEdgeState[val];
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @struct RasterScissorEdgesT
-/// @brief Primary RasterScissorEdgesT templated struct that holds compile 
-/// time information about the number of edges needed to be rasterized, 
-/// If either the scissor rect or conservative rast is enabled, 
+/// @brief Primary RasterScissorEdgesT templated struct that holds compile
+/// time information about the number of edges needed to be rasterized,
+/// If either the scissor rect or conservative rast is enabled,
  /// the scissor test is enabled and the rasterizer will test
  /// 3 triangle edges + 4 scissor edges for coverage.
  /// @tparam RasterScissorEdgesT: number of multisamples
@@ -145,20 +145,20 @@ INLINE uint32_t EdgeValToEdgeState(uint32_t val)
  template <typename RasterScissorEdgesT, typename ConservativeT, typename EdgeMaskT>
  struct RasterEdgeTraits
  {
-    typedef std::true_type RasterizeScissorEdgesT;
+    typedef std::true_type                      RasterizeScissorEdgesT;
      typedef std::integral_constant<uint32_t, 7> NumEdgesT;
-    //typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
+    // typedef std::integral_constant<uint32_t, EdgeMaskT::value> ValidEdgeMaskT;
      typedef typename EdgeMaskVal<EdgeMaskT>::T ValidEdgeMaskT;
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief specialization of RasterEdgeTraits. If neither scissor rect
-/// nor conservative rast is enabled, only test 3 triangle edges 
+/// nor conservative rast is enabled, only test 3 triangle edges
  /// for coverage
  template <typename EdgeMaskT>
  struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT>
  {
-    typedef std::false_type RasterizeScissorEdgesT;
+    typedef std::false_type                     RasterizeScissorEdgesT;
      typedef std::integral_constant<uint32_t, 3> NumEdgesT;
      // no need for degenerate edge masking in non-conservative case; rasterize all triangle edges
      typedef std::integral_constant<uint32_t, ALL_EDGES_VALID> ValidEdgeMaskT;
@@ -166,45 +166,72 @@ struct RasterEdgeTraits<std::false_type, std::false_type, EdgeMaskT>
  
  //////////////////////////////////////////////////////////////////////////
  /// @struct RasterizerTraits
-/// @brief templated struct that holds compile time information used 
+/// @brief templated struct that holds compile time information used
  /// during rasterization. Inherits EdgeTraits and ConservativeRastBETraits.
  /// @tparam NumSamplesT: number of multisamples
  /// @tparam ConservativeT: is this a conservative rasterization
  /// @tparam InputCoverageT: what type of input coverage is the PS expecting?
  /// (only used with conservative rasterization)
  /// @tparam RasterScissorEdgesT: do we need to rasterize with a scissor?
-template <typename NumSamplesT, typename CenterPatternT, typename ConservativeT, typename InputCoverageT, typename EdgeEnableT, typename RasterScissorEdgesT>
+template <typename NumSamplesT,
+          typename CenterPatternT,
+          typename ConservativeT,
+          typename InputCoverageT,
+          typename EdgeEnableT,
+          typename RasterScissorEdgesT>
  struct _RasterizerTraits : public ConservativeRastBETraits<ConservativeT, InputCoverageT>,
-                                public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, EdgeEnableT>
+                           public RasterEdgeTraits<RasterScissorEdgesT, ConservativeT, EdgeEnableT>
  {
-    typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value), CenterPatternT::value> MT;
+    typedef MultisampleTraits<static_cast<SWR_MULTISAMPLE_COUNT>(NumSamplesT::value),
+                              CenterPatternT::value>
+        MT;
  
      /// Fixed point precision the rasterizer is using
      typedef FixedPointTraits<Fixed_16_8> PrecisionT;
      /// Fixed point precision of the edge tests used during rasterization
      typedef FixedPointTraits<Fixed_X_16> EdgePrecisionT;
  
-    // If conservative rast or MSAA center pattern is enabled, only need a single sample coverage test, with the result copied to all samples
-    typedef std::integral_constant<int, ConservativeT::value ? 1 : MT::numCoverageSamples> NumCoverageSamplesT;
+    // If conservative rast or MSAA center pattern is enabled, only need a single sample coverage
+    // test, with the result copied to all samples
+    typedef std::integral_constant<int, ConservativeT::value ? 1 : MT::numCoverageSamples>
+        NumCoverageSamplesT;
  
-    static_assert(EdgePrecisionT::BitsT::value >=  ConservativeRastBETraits<ConservativeT, InputCoverageT>::ConservativePrecisionT::BitsT::value,
-                  "Rasterizer edge fixed point precision < required conservative rast precision");
+    static_assert(
+        EdgePrecisionT::BitsT::value >=
+            ConservativeRastBETraits<ConservativeT,
+                                     InputCoverageT>::ConservativePrecisionT::BitsT::value,
+        "Rasterizer edge fixed point precision < required conservative rast precision");
  
      /// constants used to offset between different types of raster tiles
-    static const int colorRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
-    static const int depthRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
-    static const int stencilRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)) * MT::numSamples};
-    static const int colorRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * colorRasterTileStep};
-    static const int depthRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM)* depthRasterTileStep};
-    static const int stencilRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * stencilRasterTileStep};
+    static const int colorRasterTileStep{
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8)) *
+        MT::numSamples};
+    static const int depthRasterTileStep{
+        (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8)) *
+        MT::numSamples};
+    static const int stencilRasterTileStep{(KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM *
+                                            (FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8)) *
+                                           MT::numSamples};
+    static const int colorRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
+                                            colorRasterTileStep};
+    static const int depthRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
+                                            depthRasterTileStep};
+    static const int stencilRasterTileRowStep{(KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) *
+                                              stencilRasterTileStep};
  };
  
-template <uint32_t NumSamplesT, uint32_t CenterPatternT, uint32_t ConservativeT, uint32_t InputCoverageT, uint32_t EdgeEnableT, uint32_t RasterScissorEdgesT>
-struct RasterizerTraits final : public _RasterizerTraits <
-    std::integral_constant<uint32_t, NumSamplesT>,
-    std::integral_constant<bool, CenterPatternT != 0>,
-    std::integral_constant<bool, ConservativeT != 0>,
-    std::integral_constant<uint32_t, InputCoverageT>,
-    std::integral_constant<uint32_t, EdgeEnableT>,
-    std::integral_constant<bool, RasterScissorEdgesT != 0> >
-{};
+template <uint32_t NumSamplesT,
+          uint32_t CenterPatternT,
+          uint32_t ConservativeT,
+          uint32_t InputCoverageT,
+          uint32_t EdgeEnableT,
+          uint32_t RasterScissorEdgesT>
+struct RasterizerTraits final
+    : public _RasterizerTraits<std::integral_constant<uint32_t, NumSamplesT>,
+                               std::integral_constant<bool, CenterPatternT != 0>,
+                               std::integral_constant<bool, ConservativeT != 0>,
+                               std::integral_constant<uint32_t, InputCoverageT>,
+                               std::integral_constant<uint32_t, EdgeEnableT>,
+                               std::integral_constant<bool, RasterScissorEdgesT != 0>>
+{
+};
diff --git a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h

index ca39d7c38f8ff0ee4e1f1e5176c73447d1a94cbd..20206eaaaf581ecd7801d2aeca6f80154724c83e 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file rasterizer.cpp
-*
-* @brief Implementation for the rasterizer.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file rasterizer.cpp
+ *
+ * @brief Implementation for the rasterizer.
+ *
+ ******************************************************************************/
  
  #include <vector>
  #include <algorithm>
@@ -37,18 +37,29 @@
  #include "tilemgr.h"
  #include "memory/tilingtraits.h"
  
-extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
+extern PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT]
+                                     [STATE_VALID_TRI_EDGE_COUNT][2];
  
  template <uint32_t numSamples = 1>
-void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t x, uint32_t y, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex);
+void GetRenderHotTiles(DRAW_CONTEXT*        pDC,
+                       uint32_t             workerId,
+                       uint32_t             macroID,
+                       uint32_t             x,
+                       uint32_t             y,
+                       RenderOutputBuffers& renderBuffers,
+                       uint32_t             renderTargetArrayIndex);
  template <typename RT>
-void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers);
+void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers& buffers);
  template <typename RT>
-void StepRasterTileY(uint32_t colorHotTileMask, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow);
+void StepRasterTileY(uint32_t             colorHotTileMask,
+                     RenderOutputBuffers& buffers,
+                     RenderOutputBuffers& startBufferRow);
  
-#define MASKTOVEC(i3,i2,i1,i0) {-i0,-i1,-i2,-i3}
-static const __m256d gMaskToVecpd[] =
-{
+#define MASKTOVEC(i3, i2, i1, i0) \
+    {                             \
+        -i0, -i1, -i2, -i3        \
+    }
+static const __m256d gMaskToVecpd[] = {
      MASKTOVEC(0, 0, 0, 0),
      MASKTOVEC(0, 0, 0, 1),
      MASKTOVEC(0, 0, 1, 0),
@@ -74,11 +85,11 @@ struct POS
  
  struct EDGE
  {
-    double a, b;                // a, b edge coefficients in fix8
-    double stepQuadX;           // step to adjacent horizontal quad in fix16
-    double stepQuadY;           // step to adjacent vertical quad in fix16
-    double stepRasterTileX;     // step to adjacent horizontal raster tile in fix16
-    double stepRasterTileY;     // step to adjacent vertical raster tile in fix16
+    double a, b;            // a, b edge coefficients in fix8
+    double stepQuadX;       // step to adjacent horizontal quad in fix16
+    double stepQuadY;       // step to adjacent vertical quad in fix16
+    double stepRasterTileX; // step to adjacent horizontal raster tile in fix16
+    double stepRasterTileY; // step to adjacent vertical raster tile in fix16
  
      __m256d vQuadOffsets;       // offsets for 4 samples of a quad
      __m256d vRasterTileOffsets; // offsets for the 4 corners of a raster tile
@@ -86,12 +97,15 @@ struct EDGE
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief rasterize a raster tile partially covered by the triangle
-/// @param vEdge0-2 - edge equations evaluated at sample pos at each of the 4 corners of a raster tile
+/// @param vEdge0-2 - edge equations evaluated at sample pos at each of the 4 corners of a raster
+/// tile
  /// @param vA, vB - A & B coefs for each edge of the triangle (Ax + Bx + C)
  /// @param vStepQuad0-2 - edge equations evaluated at the UL corners of the 2x2 pixel quad.
  ///        Used to step between quads when sweeping over the raster tile.
-template<uint32_t NumEdges, typename EdgeMaskT>
-INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdges], EDGE *pRastEdges)
+template <uint32_t NumEdges, typename EdgeMaskT>
+INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT* pDC,
+                                     double        startEdges[NumEdges],
+                                     EDGE*         pRastEdges)
  {
      uint64_t coverageMask = 0;
  
@@ -111,50 +125,49 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
  
      // fast unrolled version for 8x8 tile
  #if KNOB_TILE_X_DIM == 8 && KNOB_TILE_Y_DIM == 8
-    int edgeMask[NumEdges];
+    int      edgeMask[NumEdges];
      uint64_t mask;
  
-    auto eval_lambda = [&](int e){edgeMask[e] = _mm256_movemask_pd(vEdges[e]);};
-    auto update_lambda = [&](int e){mask &= edgeMask[e];};
-    auto incx_lambda = [&](int e){vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]);};
-    auto incy_lambda = [&](int e){vEdges[e] = _mm256_add_pd(vEdges[e], vStepY[e]);};
-    auto decx_lambda = [&](int e){vEdges[e] = _mm256_sub_pd(vEdges[e], vStepX[e]);};
+    auto eval_lambda   = [&](int e) { edgeMask[e] = _mm256_movemask_pd(vEdges[e]); };
+    auto update_lambda = [&](int e) { mask &= edgeMask[e]; };
+    auto incx_lambda   = [&](int e) { vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]); };
+    auto incy_lambda   = [&](int e) { vEdges[e] = _mm256_add_pd(vEdges[e], vStepY[e]); };
+    auto decx_lambda   = [&](int e) { vEdges[e] = _mm256_sub_pd(vEdges[e], vStepX[e]); };
  
  // evaluate which pixels in the quad are covered
-#define EVAL \
-            UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);
+#define EVAL UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(eval_lambda);
  
      // update coverage mask
      // if edge 0 is degenerate and will be skipped; init the mask
-#define UPDATE_MASK(bit) \
-            if(std::is_same<EdgeMaskT, E1E2ValidT>::value || std::is_same<EdgeMaskT, NoEdgesValidT>::value){\
-                mask = 0xf;\
-            }\
-            else{\
-                mask = edgeMask[0]; \
-            }\
-            UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
-            coverageMask |= (mask << bit);
-
-    // step in the +x direction to the next quad 
-#define INCX \
-            UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);
-
-    // step in the +y direction to the next quad 
-#define INCY \
-            UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);
-
-    // step in the -x direction to the next quad 
-#define DECX \
-            UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);
-
-    // sweep 2x2 quad back and forth through the raster tile, 
+#define UPDATE_MASK(bit)                                                  \
+    if (std::is_same<EdgeMaskT, E1E2ValidT>::value ||                     \
+        std::is_same<EdgeMaskT, NoEdgesValidT>::value)                    \
+    {                                                                     \
+        mask = 0xf;                                                       \
+    }                                                                     \
+    else                                                                  \
+    {                                                                     \
+        mask = edgeMask[0];                                               \
+    }                                                                     \
+    UnrollerLMask<1, NumEdges, 1, EdgeMaskT::value>::step(update_lambda); \
+    coverageMask |= (mask << bit);
+
+    // step in the +x direction to the next quad
+#define INCX UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incx_lambda);
+
+    // step in the +y direction to the next quad
+#define INCY UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(incy_lambda);
+
+    // step in the -x direction to the next quad
+#define DECX UnrollerLMask<0, NumEdges, 1, EdgeMaskT::value>::step(decx_lambda);
+
+    // sweep 2x2 quad back and forth through the raster tile,
      // computing coverage masks for the entire tile
  
      // raster tile
-    // 0  1  2  3  4  5  6  7 
+    // 0  1  2  3  4  5  6  7
      // x  x
-    // x  x ------------------>  
+    // x  x ------------------>
      //                   x  x  |
      // <-----------------x  x  V
      // ..
@@ -173,7 +186,7 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
      UPDATE_MASK(12);
      INCY;
  
-    //row 1
+    // row 1
      EVAL;
      UPDATE_MASK(28);
      DECX;
@@ -215,7 +228,7 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
      UPDATE_MASK(48);
  #else
      uint32_t bit = 0;
-    for (uint32_t y = 0; y < KNOB_TILE_Y_DIM/2; ++y)
+    for (uint32_t y = 0; y < KNOB_TILE_Y_DIM / 2; ++y)
      {
          __m256d vStartOfRowEdge[NumEdges];
          for (uint32_t e = 0; e < NumEdges; ++e)
@@ -223,7 +236,7 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
              vStartOfRowEdge[e] = vEdges[e];
          }
  
-        for (uint32_t x = 0; x < KNOB_TILE_X_DIM/2; ++x)
+        for (uint32_t x = 0; x < KNOB_TILE_X_DIM / 2; ++x)
          {
              int edgeMask[NumEdges];
              for (uint32_t e = 0; e < NumEdges; ++e)
@@ -243,7 +256,7 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
              {
                  vEdges[e] = _mm256_add_pd(vEdges[e], vStepX[e]);
              }
-            bit+=4;
+            bit += 4;
          }
  
          // step to the next row
@@ -254,20 +267,19 @@ INLINE uint64_t rasterizePartialTile(DRAW_CONTEXT *pDC, double startEdges[NumEdg
      }
  #endif
      return coverageMask;
-
  }
  // Top left rule:
  // Top: if an edge is horizontal, and it is above other edges in tri pixel space, it is a 'top' edge
-// Left: if an edge is not horizontal, and it is on the left side of the triangle in pixel space, it is a 'left' edge
-// Top left: a sample is in if it is a top or left edge.
-// Out: !(horizontal && above) = !horizontal && below
-// Out: !horizontal && left = !(!horizontal && left) = horizontal and right 
-INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256d &vEdge) 
+// Left: if an edge is not horizontal, and it is on the left side of the triangle in pixel space, it
+// is a 'left' edge Top left: a sample is in if it is a top or left edge. Out: !(horizontal &&
+// above) = !horizontal && below Out: !horizontal && left = !(!horizontal && left) = horizontal and
+// right
+INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256d& vEdge)
  {
      // if vA < 0, vC--
      // if vA == 0 && vB < 0, vC--
  
-    __m256d vEdgeOut = vEdge;
+    __m256d vEdgeOut    = vEdge;
      __m256d vEdgeAdjust = _mm256_sub_pd(vEdge, _mm256_set1_pd(1.0));
  
      // if vA < 0 (line is not horizontal and below)
@@ -275,7 +287,7 @@ INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256
  
      // if vA == 0 && vB < 0 (line is horizontal and we're on the left edge of a tri)
      __m128i vCmp = _mm_cmpeq_epi32(vA, _mm_setzero_si128());
-    int msk2 = _mm_movemask_ps(_mm_castsi128_ps(vCmp));
+    int     msk2 = _mm_movemask_ps(_mm_castsi128_ps(vCmp));
      msk2 &= _mm_movemask_ps(_mm_castsi128_ps(vB));
  
      // if either of these are true and we're on the line (edge == 0), bump it outside the line
@@ -285,17 +297,19 @@ INLINE void adjustTopLeftRuleIntFix16(const __m128i vA, const __m128i vB, __m256
  //////////////////////////////////////////////////////////////////////////
  /// @brief calculates difference in precision between the result of manh
  /// calculation and the edge precision, based on compile time trait values
-template<typename RT>
+template <typename RT>
  constexpr int64_t ManhToEdgePrecisionAdjust()
  {
-    static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
+    static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >=
+                      RT::EdgePrecisionT::BitsT::value,
                    "Inadequate precision of result of manh calculation ");
-    return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) - RT::EdgePrecisionT::BitsT::value);
+    return ((RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value) -
+            RT::EdgePrecisionT::BitsT::value);
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @struct adjustEdgeConservative
-/// @brief Primary template definition used for partially specializing 
+/// @brief Primary template definition used for partially specializing
  /// the adjustEdgeConservative function. This struct should never
  /// be instantiated.
  /// @tparam RT: rasterizer traits
@@ -306,38 +320,42 @@ struct adjustEdgeConservative
      //////////////////////////////////////////////////////////////////////////
      /// @brief Performs calculations to adjust each edge of a triangle away
      /// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
-    /// direction. 
+    /// direction.
      ///
      /// Uncertainty regions arise from fixed point rounding, which
      /// can snap a vertex +/- by min fixed point value.
      /// Adding 1/2 pixel in x/y bumps the edge equation tests out towards the pixel corners.
-    /// This allows the rasterizer to test for coverage only at the pixel center, 
+    /// This allows the rasterizer to test for coverage only at the pixel center,
      /// instead of having to test individual pixel corners for conservative coverage
-    INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
+    INLINE adjustEdgeConservative(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
      {
-        // Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge away 
-        // from the pixel center (in the direction of the edge normal A/B)
+        // Assumes CCW winding order. Subtracting from the evaluated edge equation moves the edge
+        // away from the pixel center (in the direction of the edge normal A/B)
  
          // edge = Ax + Bx + C - (manh/e)
          // manh = manhattan distance = abs(A) + abs(B)
          // e = absolute rounding error from snapping from float to fixed point precision
  
-        // 'fixed point' multiply (in double to be avx1 friendly) 
+        // 'fixed point' multiply (in double to be avx1 friendly)
          // need doubles to hold result of a fixed multiply: 16.8 * 16.9 = 32.17, for example
-        __m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)), vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
-        __m256d manh = _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)),
-                                     _mm256_mul_pd(vBai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)));
-
-        static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >= RT::EdgePrecisionT::BitsT::value,
+        __m256d vAai = _mm256_cvtepi32_pd(_mm_abs_epi32(vAi)),
+                vBai = _mm256_cvtepi32_pd(_mm_abs_epi32(vBi));
+        __m256d manh =
+            _mm256_add_pd(_mm256_mul_pd(vAai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)),
+                          _mm256_mul_pd(vBai, _mm256_set1_pd(ConservativeEdgeOffsetT::value)));
+
+        static_assert(RT::PrecisionT::BitsT::value + RT::ConservativePrecisionT::BitsT::value >=
+                          RT::EdgePrecisionT::BitsT::value,
                        "Inadequate precision of result of manh calculation ");
  
-        // rasterizer incoming edge precision is x.16, so we need to get our edge offset into the same precision
-        // since we're doing fixed math in double format, multiply by multiples of 1/2 instead of a bit shift right
+        // rasterizer incoming edge precision is x.16, so we need to get our edge offset into the
+        // same precision since we're doing fixed math in double format, multiply by multiples of
+        // 1/2 instead of a bit shift right
          manh = _mm256_mul_pd(manh, _mm256_set1_pd(ManhToEdgePrecisionAdjust<RT>() * 0.5));
  
-        // move the edge away from the pixel center by the required conservative precision + 1/2 pixel
-        // this allows the rasterizer to do a single conservative coverage test to see if the primitive
-        // intersects the pixel at all
+        // move the edge away from the pixel center by the required conservative precision + 1/2
+        // pixel this allows the rasterizer to do a single conservative coverage test to see if the
+        // primitive intersects the pixel at all
          vEdge = _mm256_sub_pd(vEdge, manh);
      };
  };
@@ -347,43 +365,51 @@ struct adjustEdgeConservative
  template <typename RT>
  struct adjustEdgeConservative<RT, std::integral_constant<int32_t, 0>>
  {
-    INLINE adjustEdgeConservative(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge) {};
+    INLINE adjustEdgeConservative(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge){};
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief calculates the distance a degenerate BBox needs to be adjusted 
+/// @brief calculates the distance a degenerate BBox needs to be adjusted
  /// for conservative rast based on compile time trait values
-template<typename RT>
+template <typename RT>
  constexpr int64_t ConservativeScissorOffset()
  {
-    static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0, "Rasterizer precision > conservative precision");
-    // if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox when calculating scissor edges
-    typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1> DegenerateEdgeOffsetT;
+    static_assert(RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value >= 0,
+                  "Rasterizer precision > conservative precision");
+    // if we have a degenerate triangle, we need to compensate for adjusting the degenerate BBox
+    // when calculating scissor edges
+    typedef std::integral_constant<int32_t, (RT::ValidEdgeMaskT::value == ALL_EDGES_VALID) ? 0 : 1>
+        DegenerateEdgeOffsetT;
      // 1/2 pixel edge offset + conservative offset - degenerateTriangle
-    return RT::ConservativeEdgeOffsetT::value - (DegenerateEdgeOffsetT::value << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
+    return RT::ConservativeEdgeOffsetT::value -
+           (DegenerateEdgeOffsetT::value
+            << (RT::ConservativePrecisionT::BitsT::value - RT::PrecisionT::BitsT::value));
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Performs calculations to adjust each a vector of evaluated edges out
  /// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
-/// direction. 
+/// direction.
  template <typename RT>
-INLINE void adjustScissorEdge(const double a, const double b, __m256d &vEdge)
+INLINE void adjustScissorEdge(const double a, const double b, __m256d& vEdge)
  {
      int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
-    int64_t manh = ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >> ManhToEdgePrecisionAdjust<RT>();
+    int64_t manh =
+        ((aabs * ConservativeScissorOffset<RT>()) + (babs * ConservativeScissorOffset<RT>())) >>
+        ManhToEdgePrecisionAdjust<RT>();
      vEdge = _mm256_sub_pd(vEdge, _mm256_set1_pd(manh));
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Performs calculations to adjust each a scalar evaluated edge out
  /// from the pixel center by 1/2 pixel + uncertainty region in both the x and y
-/// direction. 
+/// direction.
  template <typename RT, typename OffsetT>
  INLINE double adjustScalarEdge(const double a, const double b, const double Edge)
  {
      int64_t aabs = std::abs(static_cast<int64_t>(a)), babs = std::abs(static_cast<int64_t>(b));
-    int64_t manh = ((aabs * OffsetT::value) + (babs * OffsetT::value)) >> ManhToEdgePrecisionAdjust<RT>();
+    int64_t manh =
+        ((aabs * OffsetT::value) + (babs * OffsetT::value)) >> ManhToEdgePrecisionAdjust<RT>();
      return (Edge - manh);
  };
  
@@ -392,12 +418,14 @@ INLINE double adjustScalarEdge(const double a, const double b, const double Edge
  template <typename RT, typename EdgeOffsetT>
  struct adjustEdgesFix16
  {
-    INLINE adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
+    INLINE adjustEdgesFix16(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
      {
-        static_assert(std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
-                      "Edge equation expected to be in x.16 fixed point");
+        static_assert(
+            std::is_same<typename RT::EdgePrecisionT, FixedPointTraits<Fixed_X_16>>::value,
+            "Edge equation expected to be in x.16 fixed point");
  
-        static_assert(RT::IsConservativeT::value, "Edge offset assumes conservative rasterization is enabled");
+        static_assert(RT::IsConservativeT::value,
+                      "Edge offset assumes conservative rasterization is enabled");
  
          // need to apply any edge offsets before applying the top-left rule
          adjustEdgeConservative<RT, EdgeOffsetT>(vAi, vBi, vEdge);
@@ -411,7 +439,7 @@ struct adjustEdgesFix16
  template <typename RT>
  struct adjustEdgesFix16<RT, std::integral_constant<int32_t, 0>>
  {
-    INLINE adjustEdgesFix16(const __m128i &vAi, const __m128i &vBi, __m256d &vEdge)
+    INLINE adjustEdgesFix16(const __m128i& vAi, const __m128i& vBi, __m256d& vEdge)
      {
          adjustTopLeftRuleIntFix16(vAi, vBi, vEdge);
      }
@@ -449,7 +477,8 @@ INLINE float ComputeMaxDepthSlope(const SWR_TRIANGLE_DESC* pDesc)
      return std::max(dzdx, dzdy);
  }
  
-INLINE float ComputeBiasFactor(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pDesc, const float* z)
+INLINE float
+ComputeBiasFactor(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pDesc, const float* z)
  {
      if (pState->depthFormat == R24_UNORM_X8_TYPELESS)
      {
@@ -464,7 +493,7 @@ INLINE float ComputeBiasFactor(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_D
          SWR_ASSERT(pState->depthFormat == R32_FLOAT);
  
          // for f32 depth, factor = 2^(exponent(max(abs(z) - 23)
-        float zMax = std::max(fabsf(z[0]), std::max(fabsf(z[1]), fabsf(z[2])));
+        float    zMax    = std::max(fabsf(z[0]), std::max(fabsf(z[1]), fabsf(z[2])));
          uint32_t zMaxInt = *(uint32_t*)&zMax;
          zMaxInt &= 0x7f800000;
          zMax = *(float*)&zMaxInt;
@@ -473,7 +502,8 @@ INLINE float ComputeBiasFactor(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_D
      }
  }
  
-INLINE float ComputeDepthBias(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pTri, const float* z)
+INLINE float
+ComputeDepthBias(const SWR_RASTSTATE* pState, const SWR_TRIANGLE_DESC* pTri, const float* z)
  {
      if (pState->depthBias == 0 && pState->slopeScaledDepthBias == 0)
      {
@@ -512,7 +542,8 @@ __declspec(thread) volatile uint64_t gToss;
  
  static const uint32_t vertsPerTri = 3, componentsPerAttrib = 4;
  // try to avoid _chkstk insertions; make this thread local
-static THREAD OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib];
+static THREAD
+OSALIGNLINE(float) perspAttribsTLS[vertsPerTri * SWR_VTX_NUM_SLOTS * componentsPerAttrib];
  
  INLINE
  void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
@@ -534,11 +565,13 @@ void ComputeEdgeData(int32_t a, int32_t b, EDGE& edge)
  
      __m256d vQuadStepXFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.a), vQuadOffsetsXIntFix8);
      __m256d vQuadStepYFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.b), vQuadOffsetsYIntFix8);
-    edge.vQuadOffsets = _mm256_add_pd(vQuadStepXFix16, vQuadStepYFix16);
+    edge.vQuadOffsets       = _mm256_add_pd(vQuadStepXFix16, vQuadStepYFix16);
  
      // compute raster tile offsets
-    const __m256d vTileOffsetsXIntFix8 = _mm256_set_pd((KNOB_TILE_X_DIM - 1)*FIXED_POINT_SCALE, 0, (KNOB_TILE_X_DIM - 1)*FIXED_POINT_SCALE, 0);
-    const __m256d vTileOffsetsYIntFix8 = _mm256_set_pd((KNOB_TILE_Y_DIM - 1)*FIXED_POINT_SCALE, (KNOB_TILE_Y_DIM - 1)*FIXED_POINT_SCALE, 0, 0);
+    const __m256d vTileOffsetsXIntFix8 = _mm256_set_pd(
+        (KNOB_TILE_X_DIM - 1) * FIXED_POINT_SCALE, 0, (KNOB_TILE_X_DIM - 1) * FIXED_POINT_SCALE, 0);
+    const __m256d vTileOffsetsYIntFix8 = _mm256_set_pd(
+        (KNOB_TILE_Y_DIM - 1) * FIXED_POINT_SCALE, (KNOB_TILE_Y_DIM - 1) * FIXED_POINT_SCALE, 0, 0);
  
      __m256d vTileStepXFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.a), vTileOffsetsXIntFix8);
      __m256d vTileStepYFix16 = _mm256_mul_pd(_mm256_set1_pd(edge.b), vTileOffsetsYIntFix8);
@@ -552,30 +585,33 @@ void ComputeEdgeData(const POS& p0, const POS& p1, EDGE& edge)
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Primary template definition used for partially specializing 
-/// the UpdateEdgeMasks function. Offset evaluated edges from UL pixel 
+/// @brief Primary template definition used for partially specializing
+/// the UpdateEdgeMasks function. Offset evaluated edges from UL pixel
  /// corner to sample position, and test for coverage
  /// @tparam sampleCount: multisample count
  template <typename NumSamplesT>
-INLINE void UpdateEdgeMasks(const __m256d (&vEdgeTileBbox)[3], const __m256d* vEdgeFix16,
-                            int32_t &mask0, int32_t &mask1, int32_t &mask2)
+INLINE void UpdateEdgeMasks(const __m256d (&vEdgeTileBbox)[3],
+                            const __m256d* vEdgeFix16,
+                            int32_t&       mask0,
+                            int32_t&       mask1,
+                            int32_t&       mask2)
  {
      __m256d vSampleBboxTest0, vSampleBboxTest1, vSampleBboxTest2;
      // evaluate edge equations at the tile multisample bounding box
      vSampleBboxTest0 = _mm256_add_pd(vEdgeTileBbox[0], vEdgeFix16[0]);
      vSampleBboxTest1 = _mm256_add_pd(vEdgeTileBbox[1], vEdgeFix16[1]);
      vSampleBboxTest2 = _mm256_add_pd(vEdgeTileBbox[2], vEdgeFix16[2]);
-    mask0 = _mm256_movemask_pd(vSampleBboxTest0);
-    mask1 = _mm256_movemask_pd(vSampleBboxTest1);
-    mask2 = _mm256_movemask_pd(vSampleBboxTest2);
+    mask0            = _mm256_movemask_pd(vSampleBboxTest0);
+    mask1            = _mm256_movemask_pd(vSampleBboxTest1);
+    mask2            = _mm256_movemask_pd(vSampleBboxTest2);
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief UpdateEdgeMasks<SingleSampleT> specialization, instantiated
  /// when only rasterizing a single coverage test point
  template <>
-INLINE void UpdateEdgeMasks<SingleSampleT>(const __m256d(&)[3], const __m256d* vEdgeFix16,
-                                           int32_t &mask0, int32_t &mask1, int32_t &mask2)
+INLINE void UpdateEdgeMasks<SingleSampleT>(
+    const __m256d (&)[3], const __m256d* vEdgeFix16, int32_t& mask0, int32_t& mask1, int32_t& mask2)
  {
      mask0 = _mm256_movemask_pd(vEdgeFix16[0]);
      mask1 = _mm256_movemask_pd(vEdgeFix16[1]);
@@ -585,7 +621,7 @@ INLINE void UpdateEdgeMasks<SingleSampleT>(const __m256d(&)[3], const __m256d* v
  //////////////////////////////////////////////////////////////////////////
  /// @struct ComputeScissorEdges
  /// @brief Primary template definition. Allows the function to be generically
-/// called. When paired with below specializations, will result in an empty 
+/// called. When paired with below specializations, will result in an empty
  /// inlined function if scissor is not enabled
  /// @tparam RasterScissorEdgesT: is scissor enabled?
  /// @tparam IsConservativeT: is conservative rast enabled?
@@ -593,21 +629,29 @@ INLINE void UpdateEdgeMasks<SingleSampleT>(const __m256d(&)[3], const __m256d* v
  template <typename RasterScissorEdgesT, typename IsConservativeT, typename RT>
  struct ComputeScissorEdges
  {
-    INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y, 
-                              EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]){};
+    INLINE ComputeScissorEdges(const SWR_RECT& triBBox,
+                               const SWR_RECT& scissorBBox,
+                               const int32_t   x,
+                               const int32_t   y,
+                               EDGE (&rastEdges)[RT::NumEdgesT::value],
+                               __m256d (&vEdgeFix16)[7]){};
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief ComputeScissorEdges<std::true_type, std::true_type, RT> partial 
+/// @brief ComputeScissorEdges<std::true_type, std::true_type, RT> partial
  /// specialization. Instantiated when conservative rast and scissor are enabled
  template <typename RT>
  struct ComputeScissorEdges<std::true_type, std::true_type, RT>
  {
      //////////////////////////////////////////////////////////////////////////
-    /// @brief Intersect tri bbox with scissor, compute scissor edge vectors, 
+    /// @brief Intersect tri bbox with scissor, compute scissor edge vectors,
      /// evaluate edge equations and offset them away from pixel center.
-    INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
-                              EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
+    INLINE ComputeScissorEdges(const SWR_RECT& triBBox,
+                               const SWR_RECT& scissorBBox,
+                               const int32_t   x,
+                               const int32_t   y,
+                               EDGE (&rastEdges)[RT::NumEdgesT::value],
+                               __m256d (&vEdgeFix16)[7])
      {
          // if conservative rasterizing, triangle bbox intersected with scissor bbox is used
          SWR_RECT scissor;
@@ -627,12 +671,17 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
          ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
          ComputeEdgeData(topRight, topLeft, rastEdges[6]);
  
-        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
-        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
-        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
-        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
-
-        // if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
+        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) +
+                                       (rastEdges[3].b * (y - scissor.ymin)));
+        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) +
+                                       (rastEdges[4].b * (y - scissor.ymax)));
+        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) +
+                                       (rastEdges[5].b * (y - scissor.ymax)));
+        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) +
+                                       (rastEdges[6].b * (y - scissor.ymin)));
+
+        // if conservative rasterizing, need to bump the scissor edges out by the conservative
+        // uncertainty distance, else do nothing
          adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
          adjustScissorEdge<RT>(rastEdges[4].a, rastEdges[4].b, vEdgeFix16[4]);
          adjustScissorEdge<RT>(rastEdges[5].a, rastEdges[5].b, vEdgeFix16[5]);
@@ -645,7 +694,7 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief ComputeScissorEdges<std::true_type, std::false_type, RT> partial 
+/// @brief ComputeScissorEdges<std::true_type, std::false_type, RT> partial
  /// specialization. Instantiated when scissor is enabled and conservative rast
  /// is disabled.
  template <typename RT>
@@ -653,14 +702,18 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
  {
      //////////////////////////////////////////////////////////////////////////
      /// @brief Compute scissor edge vectors and evaluate edge equations
-    INLINE ComputeScissorEdges(const SWR_RECT &, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
-                              EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
+    INLINE ComputeScissorEdges(const SWR_RECT&,
+                               const SWR_RECT& scissorBBox,
+                               const int32_t   x,
+                               const int32_t   y,
+                               EDGE (&rastEdges)[RT::NumEdgesT::value],
+                               __m256d (&vEdgeFix16)[7])
      {
-        const SWR_RECT &scissor = scissorBBox;
-        POS topLeft{scissor.xmin, scissor.ymin};
-        POS bottomLeft{scissor.xmin, scissor.ymax};
-        POS topRight{scissor.xmax, scissor.ymin};
-        POS bottomRight{scissor.xmax, scissor.ymax};
+        const SWR_RECT& scissor = scissorBBox;
+        POS             topLeft{scissor.xmin, scissor.ymin};
+        POS             bottomLeft{scissor.xmin, scissor.ymax};
+        POS             topRight{scissor.xmax, scissor.ymin};
+        POS             bottomRight{scissor.xmax, scissor.ymax};
  
          // construct 4 scissor edges in ccw direction
          ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
@@ -668,10 +721,14 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
          ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
          ComputeEdgeData(topRight, topLeft, rastEdges[6]);
  
-        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
-        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
-        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
-        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
+        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) +
+                                       (rastEdges[3].b * (y - scissor.ymin)));
+        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) +
+                                       (rastEdges[4].b * (y - scissor.ymax)));
+        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) +
+                                       (rastEdges[5].b * (y - scissor.ymax)));
+        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) +
+                                       (rastEdges[6].b * (y - scissor.ymin)));
  
          // Upper left rule for scissor
          vEdgeFix16[3] = _mm256_sub_pd(vEdgeFix16[3], _mm256_set1_pd(1.0));
@@ -723,7 +780,8 @@ INLINE bool TrivialRejectTest<E1E2ValidT>(const int, const int mask1, const int
  template <>
  INLINE bool TrivialRejectTest<AllEdgesValidT>(const int mask0, const int mask1, const int mask2)
  {
-    return (!(mask0 && mask1 && mask2)) ? true : false;;
+    return (!(mask0 && mask1 && mask2)) ? true : false;
+    ;
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -737,7 +795,7 @@ INLINE bool TrivialRejectTest<NoEdgesValidT>(const int, const int, const int)
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Primary function template for TrivialAcceptTest. Always returns
-/// false, since it will only be called for degenerate tris, and as such 
+/// false, since it will only be called for degenerate tris, and as such
  /// will never cover the entire raster tile
  template <typename ScissorEnableT>
  INLINE bool TrivialAcceptTest(const int, const int, const int)
@@ -760,27 +818,33 @@ INLINE bool TrivialAcceptTest<std::false_type>(const int mask0, const int mask1,
  template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
  struct GenerateSVInnerCoverage
  {
-    INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*,  uint64_t &){};
+    INLINE GenerateSVInnerCoverage(DRAW_CONTEXT*, uint32_t, EDGE*, double*, uint64_t&){};
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Specialization of GenerateSVInnerCoverage where all edges
-/// are non-degenerate and SVInnerCoverage is requested. Offsets the evaluated 
+/// are non-degenerate and SVInnerCoverage is requested. Offsets the evaluated
  /// edge values from OuterConservative to InnerConservative and rasterizes.
  template <typename RT>
  struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
  {
-    INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC, uint32_t workerId, EDGE* pRastEdges, double* pStartQuadEdges,  uint64_t &innerCoverageMask)
+    INLINE GenerateSVInnerCoverage(DRAW_CONTEXT* pDC,
+                                   uint32_t      workerId,
+                                   EDGE*         pRastEdges,
+                                   double*       pStartQuadEdges,
+                                   uint64_t&     innerCoverageMask)
      {
          double startQuadEdgesAdj[RT::NumEdgesT::value];
-        for(uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
+        for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
          {
-            startQuadEdgesAdj[e] = adjustScalarEdge<RT, typename RT::InnerConservativeEdgeOffsetT>(pRastEdges[e].a, pRastEdges[e].b, pStartQuadEdges[e]);
+            startQuadEdgesAdj[e] = adjustScalarEdge<RT, typename RT::InnerConservativeEdgeOffsetT>(
+                pRastEdges[e].a, pRastEdges[e].b, pStartQuadEdges[e]);
          }
  
          // not trivial accept or reject, must rasterize full tile
          RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
-        innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdgesAdj, pRastEdges);
+        innerCoverageMask = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
+            pDC, startQuadEdgesAdj, pRastEdges);
          RDTSC_END(BERasterizePartial, 0);
      }
  };
@@ -791,43 +855,62 @@ struct GenerateSVInnerCoverage<RT, AllEdgesValidT, InnerConservativeCoverageT>
  template <typename RT, typename ValidEdgeMaskT, typename InputCoverageT>
  struct UpdateEdgeMasksInnerConservative
  {
-    INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3], const __m256d*,
-                                           const __m128i, const __m128i, int32_t &, int32_t &, int32_t &){};
+    INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3],
+                                            const __m256d*,
+                                            const __m128i,
+                                            const __m128i,
+                                            int32_t&,
+                                            int32_t&,
+                                            int32_t&){};
  };
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Specialization of UpdateEdgeMasksInnerConservative where all edges
-/// are non-degenerate and SVInnerCoverage is requested. Offsets the edges 
-/// evaluated at raster tile corners to inner conservative position and 
+/// are non-degenerate and SVInnerCoverage is requested. Offsets the edges
+/// evaluated at raster tile corners to inner conservative position and
  /// updates edge masks
  template <typename RT>
  struct UpdateEdgeMasksInnerConservative<RT, AllEdgesValidT, InnerConservativeCoverageT>
  {
-    INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3], const __m256d* vEdgeFix16,
-                                           const __m128i vAi, const __m128i vBi, int32_t &mask0, int32_t &mask1, int32_t &mask2)
+    INLINE UpdateEdgeMasksInnerConservative(const __m256d (&vEdgeTileBbox)[3],
+                                            const __m256d* vEdgeFix16,
+                                            const __m128i  vAi,
+                                            const __m128i  vBi,
+                                            int32_t&       mask0,
+                                            int32_t&       mask1,
+                                            int32_t&       mask2)
      {
          __m256d vTempEdge[3]{vEdgeFix16[0], vEdgeFix16[1], vEdgeFix16[2]};
  
-        // instead of keeping 2 copies of evaluated edges around, just compensate for the outer 
+        // instead of keeping 2 copies of evaluated edges around, just compensate for the outer
          // conservative evaluated edge when adjusting the edge in for inner conservative tests
-        adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(vAi, vBi, vTempEdge[0]);
-        adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(vAi, vBi, vTempEdge[1]);
-        adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(vAi, vBi, vTempEdge[2]);
-
-        UpdateEdgeMasks<typename RT::NumCoverageSamplesT>(vEdgeTileBbox, vTempEdge, mask0, mask1, mask2);
+        adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
+            vAi, vBi, vTempEdge[0]);
+        adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
+            vAi, vBi, vTempEdge[1]);
+        adjustEdgeConservative<RT, typename RT::InnerConservativeEdgeOffsetT>(
+            vAi, vBi, vTempEdge[2]);
+
+        UpdateEdgeMasks<typename RT::NumCoverageSamplesT>(
+            vEdgeTileBbox, vTempEdge, mask0, mask1, mask2);
      }
  };
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Specialization of UpdateEdgeMasksInnerConservative where SVInnerCoverage 
-/// is requested but at least one edge is degenerate. Since a degenerate triangle cannot 
+/// @brief Specialization of UpdateEdgeMasksInnerConservative where SVInnerCoverage
+/// is requested but at least one edge is degenerate. Since a degenerate triangle cannot
  /// cover an entire raster tile, set mask0 to 0 to force it down the
  /// rastierizePartialTile path
  template <typename RT, typename ValidEdgeMaskT>
  struct UpdateEdgeMasksInnerConservative<RT, ValidEdgeMaskT, InnerConservativeCoverageT>
  {
-    INLINE UpdateEdgeMasksInnerConservative(const __m256d (&)[3], const __m256d*,
-                                   const __m128i, const __m128i, int32_t &mask0, int32_t &, int32_t &)
+    INLINE UpdateEdgeMasksInnerConservative(const __m256d (&)[3],
+                                            const __m256d*,
+                                            const __m128i,
+                                            const __m128i,
+                                            int32_t& mask0,
+                                            int32_t&,
+                                            int32_t&)
      {
          // set one mask to zero to force the triangle down the rastierizePartialTile path
          mask0 = 0;
@@ -837,7 +920,7 @@ struct UpdateEdgeMasksInnerConservative<RT, ValidEdgeMaskT, InnerConservativeCov
  template <typename RT>
  void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pDesc)
  {
-    const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
+    const TRIANGLE_WORK_DESC& workDesc = *((TRIANGLE_WORK_DESC*)pDesc);
  #if KNOB_ENABLE_TOSS_POINTS
      if (KNOB_TOSS_BIN_TRIS)
      {
@@ -847,24 +930,25 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      RDTSC_BEGIN(BERasterizeTriangle, pDC->drawId);
      RDTSC_BEGIN(BETriangleSetup, pDC->drawId);
  
-    const API_STATE &state = GetApiState(pDC);
-    const SWR_RASTSTATE &rastState = state.rastState;
+    const API_STATE&     state        = GetApiState(pDC);
+    const SWR_RASTSTATE& rastState    = state.rastState;
      const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
  
      OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
      triDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
  
      __m128 vX, vY, vZ, vRecipW;
-    
+
      // pTriBuffer data layout: grouped components of the 3 triangle points and 1 don't care
      // eg: vX = [x0 x1 x2 dc]
-    vX = _mm_load_ps(workDesc.pTriBuffer);
-    vY = _mm_load_ps(workDesc.pTriBuffer + 4);
-    vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
+    vX      = _mm_load_ps(workDesc.pTriBuffer);
+    vY      = _mm_load_ps(workDesc.pTriBuffer + 4);
+    vZ      = _mm_load_ps(workDesc.pTriBuffer + 8);
      vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
  
      // convert to fixed point
-    static_assert(std::is_same<typename RT::PrecisionT, FixedPointTraits<Fixed_16_8>>::value, "Rasterizer expects 16.8 fixed point precision");
+    static_assert(std::is_same<typename RT::PrecisionT, FixedPointTraits<Fixed_16_8>>::value,
+                  "Rasterizer expects 16.8 fixed point precision");
      __m128i vXi = fpToFixedPoint(vX);
      __m128i vYi = fpToFixedPoint(vY);
  
@@ -879,12 +963,12 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
  
      __m128i vAi, vBi;
      triangleSetupABInt(vXi, vYi, vAi, vBi);
-    
+
      // determinant
      float det = calcDeterminantInt(vAi, vBi);
  
      // Verts in Pixel Coordinate Space at this point
-    // Det > 0 = CW winding order 
+    // Det > 0 = CW winding order
      // Convert CW triangles to CCW
      if (det > 0.0)
      {
@@ -899,9 +983,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      // Finish triangle setup - C edge coef
      triangleSetupC(vX, vY, vA, vB, vC);
  
-    if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
+    if (RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
      {
-        // If we have degenerate edge(s) to rasterize, set I and J coefs 
+        // If we have degenerate edge(s) to rasterize, set I and J coefs
          // to 0 for constant interpolation of attributes
          triDesc.I[0] = 0.0f;
          triDesc.I[1] = 0.0f;
@@ -915,7 +999,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      }
      else
      {
-        // only extract coefs for 2 of the barycentrics; the 3rd can be 
+        // only extract coefs for 2 of the barycentrics; the 3rd can be
          // determined from the barycentric equation:
          // i + j + k = 1 <=> k = 1 - j - i
          _MM_EXTRACT_FLOAT(triDesc.I[0], vA, 1);
@@ -926,7 +1010,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
          _MM_EXTRACT_FLOAT(triDesc.J[2], vC, 2);
  
          // compute recipDet, used to calculate barycentric i and j in the backend
-        triDesc.recipDet = 1.0f/det;
+        triDesc.recipDet = 1.0f / det;
      }
  
      OSALIGNSIMD(float) oneOverW[4];
@@ -935,31 +1019,31 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      triDesc.OneOverW[1] = oneOverW[1] - oneOverW[2];
      triDesc.OneOverW[2] = oneOverW[2];
  
-    // calculate perspective correct coefs per vertex attrib 
-    float* pPerspAttribs = perspAttribsTLS;
-    float* pAttribs = workDesc.pAttribs;
+    // calculate perspective correct coefs per vertex attrib
+    float* pPerspAttribs  = perspAttribsTLS;
+    float* pAttribs       = workDesc.pAttribs;
      triDesc.pPerspAttribs = pPerspAttribs;
-    triDesc.pAttribs = pAttribs;
-    float *pRecipW = workDesc.pTriBuffer + 12;
-    triDesc.pRecipW = pRecipW;
-    __m128 vOneOverWV0 = _mm_broadcast_ss(pRecipW);
-    __m128 vOneOverWV1 = _mm_broadcast_ss(pRecipW+=1);
-    __m128 vOneOverWV2 = _mm_broadcast_ss(pRecipW+=1);
-    for(uint32_t i = 0; i < workDesc.numAttribs; i++)
+    triDesc.pAttribs      = pAttribs;
+    float* pRecipW        = workDesc.pTriBuffer + 12;
+    triDesc.pRecipW       = pRecipW;
+    __m128 vOneOverWV0    = _mm_broadcast_ss(pRecipW);
+    __m128 vOneOverWV1    = _mm_broadcast_ss(pRecipW += 1);
+    __m128 vOneOverWV2    = _mm_broadcast_ss(pRecipW += 1);
+    for (uint32_t i = 0; i < workDesc.numAttribs; i++)
      {
          __m128 attribA = _mm_load_ps(pAttribs);
-        __m128 attribB = _mm_load_ps(pAttribs+=4);
-        __m128 attribC = _mm_load_ps(pAttribs+=4);
-        pAttribs+=4;
+        __m128 attribB = _mm_load_ps(pAttribs += 4);
+        __m128 attribC = _mm_load_ps(pAttribs += 4);
+        pAttribs += 4;
  
          attribA = _mm_mul_ps(attribA, vOneOverWV0);
          attribB = _mm_mul_ps(attribB, vOneOverWV1);
          attribC = _mm_mul_ps(attribC, vOneOverWV2);
  
          _mm_store_ps(pPerspAttribs, attribA);
-        _mm_store_ps(pPerspAttribs+=4, attribB);
-        _mm_store_ps(pPerspAttribs+=4, attribC);
-        pPerspAttribs+=4;
+        _mm_store_ps(pPerspAttribs += 4, attribB);
+        _mm_store_ps(pPerspAttribs += 4, attribC);
+        pPerspAttribs += 4;
      }
  
      // compute bary Z
@@ -969,7 +1053,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      triDesc.Z[0] = a[0] - a[2];
      triDesc.Z[1] = a[1] - a[2];
      triDesc.Z[2] = a[2];
-        
+
      // add depth bias
      triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8);
  
@@ -977,12 +1061,17 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      OSALIGNSIMD(SWR_RECT) bbox;
      calcBoundingBoxInt(vXi, vYi, bbox);
  
-    const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
+    const SWR_RECT& scissorInFixedPoint =
+        state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
  
-    if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
+    if (RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
      {
-        // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
-        bbox.xmin--;    bbox.xmax++;    bbox.ymin--;    bbox.ymax++;
+        // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is
+        // valid
+        bbox.xmin--;
+        bbox.xmax++;
+        bbox.ymin--;
+        bbox.ymax++;
          SWR_ASSERT(scissorInFixedPoint.xmin >= 0 && scissorInFixedPoint.ymin >= 0,
                     "Conservative rast degenerate handling requires a valid scissor rect");
      }
@@ -996,12 +1085,13 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
  
      triDesc.triFlags = workDesc.triFlags;
  
-    // further constrain backend to intersecting bounding box of macro tile and scissored triangle bbox
+    // further constrain backend to intersecting bounding box of macro tile and scissored triangle
+    // bbox
      uint32_t macroX, macroY;
      MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
-    int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
-    int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
-    int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
+    int32_t macroBoxLeft   = macroX * KNOB_MACROTILE_X_DIM_FIXED;
+    int32_t macroBoxRight  = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
+    int32_t macroBoxTop    = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
      int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
  
      intersect.xmin = std::max(intersect.xmin, macroBoxLeft);
@@ -1009,19 +1099,21 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      intersect.xmax = std::min(intersect.xmax, macroBoxRight);
      intersect.ymax = std::min(intersect.ymax, macroBoxBottom);
  
-    SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
+    SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax &&
+               intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 &&
+               intersect.ymax >= 0);
  
      RDTSC_END(BETriangleSetup, 0);
  
      // update triangle desc
-    uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
-    uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
-    uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
-    uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t minTileX  = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t minTileY  = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t maxTileX  = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t maxTileY  = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
      uint32_t numTilesX = maxTileX - minTileX + 1;
      uint32_t numTilesY = maxTileY - minTileY + 1;
  
-    if (numTilesX == 0 || numTilesY == 0) 
+    if (numTilesX == 0 || numTilesY == 0)
      {
          RDTSC_EVENT(BEEmptyTriangle, 1, 0);
          RDTSC_END(BERasterizeTriangle, 1);
@@ -1040,7 +1132,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
  
      // single sample rasterization evaluates edges at pixel center,
      // multisample evaluates edges UL pixel corner and steps to each sample position
-    if(std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
+    if (std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
      {
          // Add 0.5, in fixed point, to offset to pixel center
          x += (FIXED_POINT_SCALE / 2);
@@ -1051,7 +1143,7 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      __m128i vTopLeftY = _mm_set1_epi32(y);
  
      // evaluate edge equations at top-left pixel using 64bit math
-    // 
+    //
      // line = Ax + By + C
      // solving for C:
      // C = -Ax - By
@@ -1061,21 +1153,21 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      // line = Ax - By - Ax0 - By0
      // line = A(x - x0) + B(y - y0)
      // dX = (x-x0), dY = (y-y0)
-    // so all this simplifies to 
+    // so all this simplifies to
      // edge = A(dX) + B(dY), our first test at the top left of the bbox we're rasterizing within
  
      __m128i vDeltaX = _mm_sub_epi32(vTopLeftX, vXi);
      __m128i vDeltaY = _mm_sub_epi32(vTopLeftY, vYi);
  
      // evaluate A(dx) and B(dY) for all points
-    __m256d vAipd = _mm256_cvtepi32_pd(vAi);
-    __m256d vBipd = _mm256_cvtepi32_pd(vBi);
+    __m256d vAipd     = _mm256_cvtepi32_pd(vAi);
+    __m256d vBipd     = _mm256_cvtepi32_pd(vBi);
      __m256d vDeltaXpd = _mm256_cvtepi32_pd(vDeltaX);
      __m256d vDeltaYpd = _mm256_cvtepi32_pd(vDeltaY);
  
      __m256d vAiDeltaXFix16 = _mm256_mul_pd(vAipd, vDeltaXpd);
      __m256d vBiDeltaYFix16 = _mm256_mul_pd(vBipd, vDeltaYpd);
-    __m256d vEdge = _mm256_add_pd(vAiDeltaXFix16, vBiDeltaYFix16);
+    __m256d vEdge          = _mm256_add_pd(vAiDeltaXFix16, vBiDeltaYFix16);
  
      // apply any edge adjustments(top-left, crast, etc)
      adjustEdgesFix16<RT, typename RT::ConservativeEdgeOffsetT>(vAi, vBi, vEdge);
@@ -1098,8 +1190,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      ComputeEdgeData(aAi[2], aBi[2], rastEdges[2]);
  
      // Compute and store triangle edge data if scissor needs to rasterized
-    ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>
-                       (bbox, scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
+    ComputeScissorEdges<typename RT::RasterizeScissorEdgesT, typename RT::IsConservativeT, RT>(
+        bbox, scissorInFixedPoint, x, y, rastEdges, vEdgeFix16);
  
      // Evaluate edge equations at sample positions of each of the 4 corners of a raster tile
      // used to for testing if entire raster tile is inside a triangle
@@ -1117,9 +1209,9 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
      __m256d vEdgeTileBbox[3];
      if (NumCoverageSamplesT::value > 1)
      {
-        const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
-        const __m128i vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
-        const __m128i vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
+        const SWR_MULTISAMPLE_POS& samplePos         = rastState.samplePositions;
+        const __m128i              vTileSampleBBoxXh = samplePos.TileSampleOffsetsX();
+        const __m128i              vTileSampleBBoxYh = samplePos.TileSampleOffsetsY();
  
          __m256d vTileSampleBBoxXFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxXh);
          __m256d vTileSampleBBoxYFix8 = _mm256_cvtepi32_pd(vTileSampleBBoxYh);
@@ -1128,24 +1220,33 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
          // used to for testing if entire raster tile is inside a triangle
          for (uint32_t e = 0; e < 3; ++e)
          {
-            __m256d vResultAxFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vTileSampleBBoxXFix8);
-            __m256d vResultByFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vTileSampleBBoxYFix8);
+            __m256d vResultAxFix16 =
+                _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vTileSampleBBoxXFix8);
+            __m256d vResultByFix16 =
+                _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vTileSampleBBoxYFix8);
              vEdgeTileBbox[e] = _mm256_add_pd(vResultAxFix16, vResultByFix16);
  
              // adjust for msaa tile bbox edges outward for conservative rast, if enabled
-            adjustEdgeConservative<RT, typename RT::ConservativeEdgeOffsetT>(vAi, vBi, vEdgeTileBbox[e]);
+            adjustEdgeConservative<RT, typename RT::ConservativeEdgeOffsetT>(
+                vAi, vBi, vEdgeTileBbox[e]);
          }
      }
  
      RDTSC_END(BEStepSetup, 0);
  
-    uint32_t tY = minTileY;
-    uint32_t tX = minTileX;
+    uint32_t tY   = minTileY;
+    uint32_t tX   = minTileX;
      uint32_t maxY = maxTileY;
      uint32_t maxX = maxTileX;
  
      RenderOutputBuffers renderBuffers, currentRenderBufferRow;
-    GetRenderHotTiles<RT::MT::numSamples>(pDC, workerId, macroTile, minTileX, minTileY, renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
+    GetRenderHotTiles<RT::MT::numSamples>(pDC,
+                                          workerId,
+                                          macroTile,
+                                          minTileX,
+                                          minTileY,
+                                          renderBuffers,
+                                          triDesc.triFlags.renderTargetArrayIndex);
      currentRenderBufferRow = renderBuffers;
  
      // rasterize and generate coverage masks per sample
@@ -1168,26 +1269,31 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
              for (uint32_t sampleNum = 0; sampleNum < NumCoverageSamplesT::value; sampleNum++)
              {
                  // trivial reject, at least one edge has all 4 corners of raster tile outside
-                bool trivialReject = TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);
+                bool trivialReject =
+                    TrivialRejectTest<typename RT::ValidEdgeMaskT>(mask0, mask1, mask2);
  
                  if (!trivialReject)
                  {
                      // trivial accept mask
                      triDesc.coverageMask[sampleNum] = 0xffffffffffffffffULL;
  
-                    // Update the raster tile edge masks based on inner conservative edge offsets, if enabled
-                    UpdateEdgeMasksInnerConservative<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>
-                        (vEdgeTileBbox, vEdgeFix16, vAi, vBi, mask0, mask1, mask2);
+                    // Update the raster tile edge masks based on inner conservative edge offsets,
+                    // if enabled
+                    UpdateEdgeMasksInnerConservative<RT,
+                                                     typename RT::ValidEdgeMaskT,
+                                                     typename RT::InputCoverageT>(
+                        vEdgeTileBbox, vEdgeFix16, vAi, vBi, mask0, mask1, mask2);
  
                      // @todo Make this a bit smarter to allow use of trivial accept when:
                      //   1) scissor/vp intersection rect is raster tile aligned
                      //   2) raster tile is entirely within scissor/vp intersection rect
                      if (TrivialAcceptTest<typename RT::RasterizeScissorEdgesT>(mask0, mask1, mask2))
                      {
-                        // trivial accept, all 4 corners of all 3 edges are negative 
+                        // trivial accept, all 4 corners of all 3 edges are negative
                          // i.e. raster tile completely inside triangle
                          triDesc.anyCoveredSamples = triDesc.coverageMask[sampleNum];
-                        if(std::is_same<typename RT::InputCoverageT, InnerConservativeCoverageT>::value)
+                        if (std::is_same<typename RT::InputCoverageT,
+                                         InnerConservativeCoverageT>::value)
                          {
                              triDesc.innerCoverageMask = 0xffffffffffffffffULL;
                          }
@@ -1196,9 +1302,10 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                      else
                      {
                          __m256d vEdgeAtSample[RT::NumEdgesT::value];
-                        if(std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
+                        if (std::is_same<NumCoverageSamplesT, SingleSampleT>::value)
                          {
-                            // should get optimized out for single sample case (global value numbering or copy propagation)
+                            // should get optimized out for single sample case (global value
+                            // numbering or copy propagation)
                              for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
                              {
                                  vEdgeAtSample[e] = vEdgeFix16[e];
@@ -1206,23 +1313,25 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                          }
                          else
                          {
-                            const SWR_MULTISAMPLE_POS &samplePos = rastState.samplePositions;
-                            __m128i vSampleOffsetXh = samplePos.vXi(sampleNum);
-                            __m128i vSampleOffsetYh = samplePos.vYi(sampleNum);
+                            const SWR_MULTISAMPLE_POS& samplePos       = rastState.samplePositions;
+                            __m128i                    vSampleOffsetXh = samplePos.vXi(sampleNum);
+                            __m128i                    vSampleOffsetYh = samplePos.vYi(sampleNum);
                              __m256d vSampleOffsetX = _mm256_cvtepi32_pd(vSampleOffsetXh);
                              __m256d vSampleOffsetY = _mm256_cvtepi32_pd(vSampleOffsetYh);
  
                              // step edge equation tests from UL tile corner to pixel sample position
                              for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
                              {
-                                __m256d vResultAxFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vSampleOffsetX);
-                                __m256d vResultByFix16 = _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vSampleOffsetY);
+                                __m256d vResultAxFix16 =
+                                    _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].a), vSampleOffsetX);
+                                __m256d vResultByFix16 =
+                                    _mm256_mul_pd(_mm256_set1_pd(rastEdges[e].b), vSampleOffsetY);
                                  vEdgeAtSample[e] = _mm256_add_pd(vResultAxFix16, vResultByFix16);
                                  vEdgeAtSample[e] = _mm256_add_pd(vEdgeFix16[e], vEdgeAtSample[e]);
                              }
                          }
  
-                        double startQuadEdges[RT::NumEdgesT::value];
+                        double        startQuadEdges[RT::NumEdgesT::value];
                          const __m256i vLane0Mask = _mm256_set_epi32(0, 0, 0, 0, 0, 0, -1, -1);
                          for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
                          {
@@ -1231,19 +1340,25 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
  
                          // not trivial accept or reject, must rasterize full tile
                          RDTSC_BEGIN(BERasterizePartial, pDC->drawId);
-                        triDesc.coverageMask[sampleNum] = rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(pDC, startQuadEdges, rastEdges);
+                        triDesc.coverageMask[sampleNum] =
+                            rasterizePartialTile<RT::NumEdgesT::value, typename RT::ValidEdgeMaskT>(
+                                pDC, startQuadEdges, rastEdges);
                          RDTSC_END(BERasterizePartial, 0);
  
-                        triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum]; 
-                        
+                        triDesc.anyCoveredSamples |= triDesc.coverageMask[sampleNum];
+
                          // Output SV InnerCoverage, if needed
-                        GenerateSVInnerCoverage<RT, typename RT::ValidEdgeMaskT, typename RT::InputCoverageT>(pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
+                        GenerateSVInnerCoverage<RT,
+                                                typename RT::ValidEdgeMaskT,
+                                                typename RT::InputCoverageT>(
+                            pDC, workerId, rastEdges, startQuadEdges, triDesc.innerCoverageMask);
                      }
                  }
                  else
                  {
-                    // if we're calculating coverage per sample, need to store it off. otherwise no covered samples, don't need to do anything
-                    if(NumCoverageSamplesT::value > 1)
+                    // if we're calculating coverage per sample, need to store it off. otherwise no
+                    // covered samples, don't need to do anything
+                    if (NumCoverageSamplesT::value > 1)
                      {
                          triDesc.coverageMask[sampleNum] = 0;
                      }
@@ -1252,19 +1367,22 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
              }
  
  #if KNOB_ENABLE_TOSS_POINTS
-            if(KNOB_TOSS_RS)
+            if (KNOB_TOSS_RS)
              {
                  gToss = triDesc.coverageMask[0];
              }
              else
  #endif
-            if(triDesc.anyCoveredSamples)
+                if (triDesc.anyCoveredSamples)
              {
-                // if conservative rast and MSAA are enabled, conservative coverage for a pixel means all samples in that pixel are covered
-                // copy conservative coverage result to all samples
-                if(RT::IsConservativeT::value)
+                // if conservative rast and MSAA are enabled, conservative coverage for a pixel
+                // means all samples in that pixel are covered copy conservative coverage result to
+                // all samples
+                if (RT::IsConservativeT::value)
                  {
-                    auto copyCoverage = [&](int sample){triDesc.coverageMask[sample] = triDesc.coverageMask[0]; };
+                    auto copyCoverage = [&](int sample) {
+                        triDesc.coverageMask[sample] = triDesc.coverageMask[0];
+                    };
                      UnrollerL<1, RT::MT::numSamples, 1>::step(copyCoverage);
                  }
  
@@ -1272,14 +1390,20 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
                  AR_EVENT(RasterTileCount(pDC->drawId, 1));
  
                  RDTSC_BEGIN(BEPixelBackend, pDC->drawId);
-                backendFuncs.pfnBackend(pDC, workerId, tileX << KNOB_TILE_X_DIM_SHIFT, tileY << KNOB_TILE_Y_DIM_SHIFT, triDesc, renderBuffers);
+                backendFuncs.pfnBackend(pDC,
+                                        workerId,
+                                        tileX << KNOB_TILE_X_DIM_SHIFT,
+                                        tileY << KNOB_TILE_Y_DIM_SHIFT,
+                                        triDesc,
+                                        renderBuffers);
                  RDTSC_END(BEPixelBackend, 0);
              }
  
              // step to the next tile in X
              for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
              {
-                vEdgeFix16[e] = _mm256_add_pd(vEdgeFix16[e], _mm256_set1_pd(rastEdges[e].stepRasterTileX));
+                vEdgeFix16[e] =
+                    _mm256_add_pd(vEdgeFix16[e], _mm256_set1_pd(rastEdges[e].stepRasterTileX));
              }
              StepRasterTileX<RT>(state.colorHottileEnable, renderBuffers);
          }
@@ -1287,7 +1411,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
          // step to the next tile in Y
          for (uint32_t e = 0; e < RT::NumEdgesT::value; ++e)
          {
-            vEdgeFix16[e] = _mm256_add_pd(vStartOfRowEdge[e], _mm256_set1_pd(rastEdges[e].stepRasterTileY));
+            vEdgeFix16[e] =
+                _mm256_add_pd(vStartOfRowEdge[e], _mm256_set1_pd(rastEdges[e].stepRasterTileY));
          }
          StepRasterTileY<RT>(state.colorHottileEnable, renderBuffers, currentRenderBufferRow);
      }
@@ -1297,10 +1422,16 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
  
  // Get pointers to hot tile memory for color RT, depth, stencil
  template <uint32_t numSamples>
-void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, uint32_t tileX, uint32_t tileY, RenderOutputBuffers &renderBuffers, uint32_t renderTargetArrayIndex)
+void GetRenderHotTiles(DRAW_CONTEXT*        pDC,
+                       uint32_t             workerId,
+                       uint32_t             macroID,
+                       uint32_t             tileX,
+                       uint32_t             tileY,
+                       RenderOutputBuffers& renderBuffers,
+                       uint32_t             renderTargetArrayIndex)
  {
-    const API_STATE& state = GetApiState(pDC);
-    SWR_CONTEXT *pContext = pDC->pContext;
+    const API_STATE& state    = GetApiState(pDC);
+    SWR_CONTEXT*     pContext = pDC->pContext;
      HANDLE hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
      uint32_t mx, my;
@@ -1310,46 +1441,73 @@ void GetRenderHotTiles(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroID, u
  
      // compute tile offset for active hottile buffers
      const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
-    uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
-    offset*=numSamples;
-
-    unsigned long rtSlot = 0;
-    uint32_t colorHottileEnableMask = state.colorHottileEnable;
-    while(_BitScanForward(&rtSlot, colorHottileEnableMask))
+    uint32_t       offset = ComputeTileOffset2D<
+        TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp>>(
+        pitch, tileX, tileY);
+    offset *= numSamples;
+
+    unsigned long rtSlot                 = 0;
+    uint32_t      colorHottileEnableMask = state.colorHottileEnable;
+    while (_BitScanForward(&rtSlot, colorHottileEnableMask))
      {
-        HOTTILE *pColor = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, 
-            numSamples, renderTargetArrayIndex);
-        pColor->state = HOTTILE_DIRTY;
+        HOTTILE* pColor = pContext->pHotTileMgr->GetHotTile(
+            pContext,
+            pDC,
+            hWorkerPrivateData,
+            macroID,
+            (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
+            true,
+            numSamples,
+            renderTargetArrayIndex);
+        pColor->state                = HOTTILE_DIRTY;
          renderBuffers.pColor[rtSlot] = pColor->pBuffer + offset;
-        
+
          colorHottileEnableMask &= ~(1 << rtSlot);
      }
-    if(state.depthHottileEnable)
+    if (state.depthHottileEnable)
      {
-        const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
-        uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
-        offset*=numSamples;
-        HOTTILE *pDepth = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true,
-            numSamples, renderTargetArrayIndex);
-        pDepth->state = HOTTILE_DIRTY;
+        const uint32_t pitch =
+            KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
+        uint32_t offset = ComputeTileOffset2D<
+            TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp>>(
+            pitch, tileX, tileY);
+        offset *= numSamples;
+        HOTTILE* pDepth = pContext->pHotTileMgr->GetHotTile(pContext,
+                                                            pDC,
+                                                            hWorkerPrivateData,
+                                                            macroID,
+                                                            SWR_ATTACHMENT_DEPTH,
+                                                            true,
+                                                            numSamples,
+                                                            renderTargetArrayIndex);
+        pDepth->state   = HOTTILE_DIRTY;
          SWR_ASSERT(pDepth->pBuffer != nullptr);
          renderBuffers.pDepth = pDepth->pBuffer + offset;
      }
-    if(state.stencilHottileEnable)
+    if (state.stencilHottileEnable)
      {
-        const uint32_t pitch = KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
-        uint32_t offset = ComputeTileOffset2D<TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp> >(pitch, tileX, tileY);
-        offset*=numSamples;
-        HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true,
-            numSamples, renderTargetArrayIndex);
-        pStencil->state = HOTTILE_DIRTY;
+        const uint32_t pitch =
+            KNOB_MACROTILE_X_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
+        uint32_t offset = ComputeTileOffset2D<
+            TilingTraits<SWR_TILE_SWRZ, FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp>>(
+            pitch, tileX, tileY);
+        offset *= numSamples;
+        HOTTILE* pStencil = pContext->pHotTileMgr->GetHotTile(pContext,
+                                                              pDC,
+                                                              hWorkerPrivateData,
+                                                              macroID,
+                                                              SWR_ATTACHMENT_STENCIL,
+                                                              true,
+                                                              numSamples,
+                                                              renderTargetArrayIndex);
+        pStencil->state   = HOTTILE_DIRTY;
          SWR_ASSERT(pStencil->pBuffer != nullptr);
          renderBuffers.pStencil = pStencil->pBuffer + offset;
      }
  }
  
  template <typename RT>
-INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buffers)
+INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers& buffers)
  {
      DWORD rt = 0;
      while (_BitScanForward(&rt, colorHotTileMask))
@@ -1357,13 +1515,15 @@ INLINE void StepRasterTileX(uint32_t colorHotTileMask, RenderOutputBuffers &buff
          colorHotTileMask &= ~(1 << rt);
          buffers.pColor[rt] += RT::colorRasterTileStep;
      }
-    
+
      buffers.pDepth += RT::depthRasterTileStep;
      buffers.pStencil += RT::stencilRasterTileStep;
  }
  
  template <typename RT>
-INLINE void StepRasterTileY(uint32_t colorHotTileMask, RenderOutputBuffers &buffers, RenderOutputBuffers &startBufferRow)
+INLINE void StepRasterTileY(uint32_t             colorHotTileMask,
+                            RenderOutputBuffers& buffers,
+                            RenderOutputBuffers& startBufferRow)
  {
      DWORD rt = 0;
      while (_BitScanForward(&rt, colorHotTileMask))
@@ -1378,4 +1538,3 @@ INLINE void StepRasterTileY(uint32_t colorHotTileMask, RenderOutputBuffers &buff
      startBufferRow.pStencil += RT::stencilRasterTileRowStep;
      buffers.pStencil = startBufferRow.pStencil;
  }
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp

index 48ea397018be97160c7eeffc4de1518f70207c31..e858a7d599e1f2fdb9c1409bca07b0d7bb1fc34b 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp
@@ -1,99 +1,100 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #include "rdtsc_core.h"
  #include "common/rdtsc_buckets.h"
  
  // must match CORE_BUCKETS enum order
  BUCKET_DESC gCoreBuckets[] = {
-    { "APIClearRenderTarget", "", true, 0xff0b8bea },
-    { "APIDraw", "", true, 0xff000066 },
-    { "APIDrawWakeAllThreads", "", false, 0xffffffff },
-    { "APIDrawIndexed", "", true, 0xff000066 },
-    { "APIDispatch", "", true, 0xff660000 },
-    { "APIStoreTiles", "", true, 0xff00ffff },
-    { "APIGetDrawContext", "", false, 0xffffffff },
-    { "APISync", "", true, 0xff6666ff },
-    { "APIWaitForIdle", "", true, 0xff0000ff },
-    { "FEProcessDraw", "", true, 0xff009900 },
-    { "FEProcessDrawIndexed", "", true, 0xff009900 },
-    { "FEFetchShader", "", false, 0xffffffff },
-    { "FEVertexShader", "", false, 0xffffffff },
-    { "FEHullShader", "", false, 0xffffffff },
-    { "FETessellation", "", false, 0xffffffff },
-    { "FEDomainShader", "", false, 0xffffffff },
-    { "FEGeometryShader", "", false, 0xffffffff },
-    { "FEStreamout", "", false, 0xffffffff },
-    { "FEPAAssemble", "", false, 0xffffffff },
-    { "FEBinPoints", "", false, 0xff29b854 },
-    { "FEBinLines", "", false, 0xff29b854 },
-    { "FEBinTriangles", "", false, 0xff29b854 },
-    { "FETriangleSetup", "", false, 0xffffffff },
-    { "FEViewportCull", "", false, 0xffffffff },
-    { "FEGuardbandClip", "", false, 0xffffffff },
-    { "FEClipPoints", "", false, 0xffffffff },
-    { "FEClipLines", "", false, 0xffffffff },
-    { "FEClipTriangles", "", false, 0xffffffff },
-    { "FEClipRectangles", "", false, 0xffffffff },
-    { "FECullZeroAreaAndBackface", "", false, 0xffffffff },
-    { "FECullBetweenCenters", "", false, 0xffffffff },
-    { "FEEarlyRastEnter", "", false, 0xffffffff },
-    { "FEEarlyRastExit", "", false, 0xffffffff },
-    { "FEProcessStoreTiles", "", true, 0xff39c864 },
-    { "FEProcessInvalidateTiles", "", true, 0xffffffff },
-    { "WorkerWorkOnFifoBE", "", false, 0xff40261c },
-    { "WorkerFoundWork", "", false, 0xff573326 },
-    { "BELoadTiles", "", true, 0xffb0e2ff },
-    { "BEDispatch", "", true, 0xff00a2ff },
-    { "BEClear", "", true, 0xff00ccbb },
-    { "BERasterizeLine", "", true, 0xffb26a4e },
-    { "BERasterizeTriangle", "", true, 0xffb26a4e },
-    { "BETriangleSetup", "", false, 0xffffffff },
-    { "BEStepSetup", "", false, 0xffffffff },
-    { "BECullZeroArea", "", false, 0xffffffff },
-    { "BEEmptyTriangle", "", false, 0xffffffff },
-    { "BETrivialAccept", "", false, 0xffffffff },
-    { "BETrivialReject", "", false, 0xffffffff },
-    { "BERasterizePartial", "", false, 0xffffffff },
-    { "BEPixelBackend", "", false, 0xffffffff },
-    { "BESetup", "", false, 0xffffffff },
-    { "BEBarycentric", "", false, 0xffffffff },
-    { "BEEarlyDepthTest", "", false, 0xffffffff },
-    { "BEPixelShader", "", false, 0xffffffff },
-    { "BESingleSampleBackend", "", false, 0xffffffff },
-    { "BEPixelRateBackend", "", false, 0xffffffff },
-    { "BESampleRateBackend", "", false, 0xffffffff },
-    { "BENullBackend", "", false, 0xffffffff },
-    { "BELateDepthTest", "", false, 0xffffffff },
-    { "BEOutputMerger", "", false, 0xffffffff },
-    { "BEStoreTiles", "", true, 0xff00cccc },
-    { "BEEndTile", "", false, 0xffffffff },
+    {"APIClearRenderTarget", "", true, 0xff0b8bea},
+    {"APIDraw", "", true, 0xff000066},
+    {"APIDrawWakeAllThreads", "", false, 0xffffffff},
+    {"APIDrawIndexed", "", true, 0xff000066},
+    {"APIDispatch", "", true, 0xff660000},
+    {"APIStoreTiles", "", true, 0xff00ffff},
+    {"APIGetDrawContext", "", false, 0xffffffff},
+    {"APISync", "", true, 0xff6666ff},
+    {"APIWaitForIdle", "", true, 0xff0000ff},
+    {"FEProcessDraw", "", true, 0xff009900},
+    {"FEProcessDrawIndexed", "", true, 0xff009900},
+    {"FEFetchShader", "", false, 0xffffffff},
+    {"FEVertexShader", "", false, 0xffffffff},
+    {"FEHullShader", "", false, 0xffffffff},
+    {"FETessellation", "", false, 0xffffffff},
+    {"FEDomainShader", "", false, 0xffffffff},
+    {"FEGeometryShader", "", false, 0xffffffff},
+    {"FEStreamout", "", false, 0xffffffff},
+    {"FEPAAssemble", "", false, 0xffffffff},
+    {"FEBinPoints", "", false, 0xff29b854},
+    {"FEBinLines", "", false, 0xff29b854},
+    {"FEBinTriangles", "", false, 0xff29b854},
+    {"FETriangleSetup", "", false, 0xffffffff},
+    {"FEViewportCull", "", false, 0xffffffff},
+    {"FEGuardbandClip", "", false, 0xffffffff},
+    {"FEClipPoints", "", false, 0xffffffff},
+    {"FEClipLines", "", false, 0xffffffff},
+    {"FEClipTriangles", "", false, 0xffffffff},
+    {"FEClipRectangles", "", false, 0xffffffff},
+    {"FECullZeroAreaAndBackface", "", false, 0xffffffff},
+    {"FECullBetweenCenters", "", false, 0xffffffff},
+    {"FEEarlyRastEnter", "", false, 0xffffffff},
+    {"FEEarlyRastExit", "", false, 0xffffffff},
+    {"FEProcessStoreTiles", "", true, 0xff39c864},
+    {"FEProcessInvalidateTiles", "", true, 0xffffffff},
+    {"WorkerWorkOnFifoBE", "", false, 0xff40261c},
+    {"WorkerFoundWork", "", false, 0xff573326},
+    {"BELoadTiles", "", true, 0xffb0e2ff},
+    {"BEDispatch", "", true, 0xff00a2ff},
+    {"BEClear", "", true, 0xff00ccbb},
+    {"BERasterizeLine", "", true, 0xffb26a4e},
+    {"BERasterizeTriangle", "", true, 0xffb26a4e},
+    {"BETriangleSetup", "", false, 0xffffffff},
+    {"BEStepSetup", "", false, 0xffffffff},
+    {"BECullZeroArea", "", false, 0xffffffff},
+    {"BEEmptyTriangle", "", false, 0xffffffff},
+    {"BETrivialAccept", "", false, 0xffffffff},
+    {"BETrivialReject", "", false, 0xffffffff},
+    {"BERasterizePartial", "", false, 0xffffffff},
+    {"BEPixelBackend", "", false, 0xffffffff},
+    {"BESetup", "", false, 0xffffffff},
+    {"BEBarycentric", "", false, 0xffffffff},
+    {"BEEarlyDepthTest", "", false, 0xffffffff},
+    {"BEPixelShader", "", false, 0xffffffff},
+    {"BESingleSampleBackend", "", false, 0xffffffff},
+    {"BEPixelRateBackend", "", false, 0xffffffff},
+    {"BESampleRateBackend", "", false, 0xffffffff},
+    {"BENullBackend", "", false, 0xffffffff},
+    {"BELateDepthTest", "", false, 0xffffffff},
+    {"BEOutputMerger", "", false, 0xffffffff},
+    {"BEStoreTiles", "", true, 0xff00cccc},
+    {"BEEndTile", "", false, 0xffffffff},
  };
-static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])), "RDTSC Bucket enum and description table size mismatched.");
+static_assert(NumBuckets == (sizeof(gCoreBuckets) / sizeof(gCoreBuckets[0])),
+              "RDTSC Bucket enum and description table size mismatched.");
  
  /// @todo bucketmanager and mapping should probably be a part of the SWR context
  std::vector<uint32_t> gBucketMap;
-BucketManager gBucketMgr;
+BucketManager         gBucketMgr;
  
-uint32_t gCurrentFrame = 0;
-bool gBucketsInitialized = false;
+uint32_t gCurrentFrame       = 0;
+bool     gBucketsInitialized = false;
diff --git a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h

index 704da650d852db93294134204ca91c8a899c47d7..dc20e5be98da8d0c442c437f9781bdee58ed6cd5 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
+++ b/src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #pragma once
  #include "knobs.h"
@@ -124,10 +124,10 @@ void rdtscEndFrame();
  #endif
  
  extern std::vector<uint32_t> gBucketMap;
-extern BucketManager gBucketMgr;
-extern BUCKET_DESC gCoreBuckets[];
-extern uint32_t gCurrentFrame;
-extern bool gBucketsInitialized;
+extern BucketManager         gBucketMgr;
+extern BUCKET_DESC           gCoreBuckets[];
+extern uint32_t              gCurrentFrame;
+extern bool                  gBucketsInitialized;
  
  INLINE void rdtscReset()
  {
@@ -174,12 +174,14 @@ INLINE void rdtscEndFrame()
  {
      gCurrentFrame++;
  
-    if (gCurrentFrame == KNOB_BUCKETS_START_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
+    if (gCurrentFrame == KNOB_BUCKETS_START_FRAME &&
+        KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
      {
          gBucketMgr.StartCapture();
      }
  
-    if (gCurrentFrame == KNOB_BUCKETS_END_FRAME && KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
+    if (gCurrentFrame == KNOB_BUCKETS_END_FRAME &&
+        KNOB_BUCKETS_START_FRAME < KNOB_BUCKETS_END_FRAME)
      {
          gBucketMgr.StopCapture();
          gBucketMgr.PrintReport("rdtsc.txt");
diff --git a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h b/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h

index f1bef2190fb8611650edf86e3c64d325a606b51c..133420e6f3d65efaa8f0d88b005551474cc76a72 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h
+++ b/src/gallium/drivers/swr/rasterizer/core/ringbuffer.h
@@ -1,56 +1,52 @@
  /****************************************************************************
-* Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file arena.h
-*
-* @brief RingBuffer
-*        The RingBuffer class manages all aspects of the ring buffer including
-*        the head/tail indices, etc.
-*
-******************************************************************************/
+ * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file arena.h
+ *
+ * @brief RingBuffer
+ *        The RingBuffer class manages all aspects of the ring buffer including
+ *        the head/tail indices, etc.
+ *
+ ******************************************************************************/
  #pragma once
  
-template<typename T>
+template <typename T>
  class RingBuffer
  {
  public:
-    RingBuffer()
-        : mpRingBuffer(nullptr), mNumEntries(0), mRingHead(0), mRingTail(0)
-    {
-    }
+    RingBuffer() : mpRingBuffer(nullptr), mNumEntries(0), mRingHead(0), mRingTail(0) {}
  
-    ~RingBuffer()
-    {
-        Destroy();
-    }
+    ~RingBuffer() { Destroy(); }
  
      void Init(uint32_t numEntries)
      {
          SWR_ASSERT(numEntries > 0);
-        SWR_ASSERT(((1ULL << 32) % numEntries) == 0, "%d is not evenly divisible into 2 ^ 32.  Wrap errors will occur!", numEntries);
-        mNumEntries = numEntries;
-        mpRingBuffer = (T*)AlignedMalloc(sizeof(T)*numEntries, 64);
+        SWR_ASSERT(((1ULL << 32) % numEntries) == 0,
+                   "%d is not evenly divisible into 2 ^ 32.  Wrap errors will occur!",
+                   numEntries);
+        mNumEntries  = numEntries;
+        mpRingBuffer = (T*)AlignedMalloc(sizeof(T) * numEntries, 64);
          SWR_ASSERT(mpRingBuffer != nullptr);
-        memset(mpRingBuffer, 0, sizeof(T)*numEntries);
+        memset(mpRingBuffer, 0, sizeof(T) * numEntries);
      }
  
      void Destroy()
@@ -77,10 +73,7 @@ public:
          InterlockedIncrement(&mRingTail); // There are multiple consumers.
      }
  
-    INLINE bool IsEmpty()
-    {
-        return (GetHead() == GetTail());
-    }
+    INLINE bool IsEmpty() { return (GetHead() == GetTail()); }
  
      INLINE bool IsFull()
      {
@@ -94,9 +87,9 @@ public:
      INLINE uint32_t GetHead() volatile { return mRingHead; }
  
  protected:
-    T* mpRingBuffer;
+    T*       mpRingBuffer;
      uint32_t mNumEntries;
  
-    OSALIGNLINE(volatile uint32_t) mRingHead;  // Consumer Counter
-    OSALIGNLINE(volatile uint32_t) mRingTail;  // Producer Counter
+    OSALIGNLINE(volatile uint32_t) mRingHead; // Consumer Counter
+    OSALIGNLINE(volatile uint32_t) mRingTail; // Producer Counter
  };
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h b/src/gallium/drivers/swr/rasterizer/core/state.h

index 9db17eeed01055a46adc4692e9f908d04e48ae3d..0b42a457945e52580d3f85a40020ae5d9e68306d 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -1,30 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file state.h
-*
-* @brief Definitions for API state.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file state.h
+ *
+ * @brief Definitions for API state.
+ *
+ ******************************************************************************/
+// Skipping clang-format due to parsing by simplistic python scripts
+// clang-format off
  #pragma once
  
  #include "common/formats.h"
@@ -39,63 +41,63 @@ using gfxptr_t = unsigned long long;
  //////////////////////////////////////////////////////////////////////////
  enum PRIMITIVE_TOPOLOGY
  {
-    TOP_UNKNOWN = 0x0,
-    TOP_POINT_LIST = 0x1,
-    TOP_LINE_LIST = 0x2,
-    TOP_LINE_STRIP = 0x3,
-    TOP_TRIANGLE_LIST = 0x4,
-    TOP_TRIANGLE_STRIP = 0x5,
-    TOP_TRIANGLE_FAN = 0x6,
-    TOP_QUAD_LIST = 0x7,
-    TOP_QUAD_STRIP = 0x8,
-    TOP_LINE_LIST_ADJ = 0x9,
-    TOP_LISTSTRIP_ADJ = 0xA,
-    TOP_TRI_LIST_ADJ = 0xB,
-    TOP_TRI_STRIP_ADJ = 0xC,
-    TOP_TRI_STRIP_REVERSE = 0xD,
-    TOP_POLYGON = 0xE,
-    TOP_RECT_LIST = 0xF,
-    TOP_LINE_LOOP = 0x10,
-    TOP_POINT_LIST_BF = 0x11,
-    TOP_LINE_STRIP_CONT = 0x12,
-    TOP_LINE_STRIP_BF = 0x13,
-    TOP_LINE_STRIP_CONT_BF = 0x14,
+    TOP_UNKNOWN                = 0x0,
+    TOP_POINT_LIST             = 0x1,
+    TOP_LINE_LIST              = 0x2,
+    TOP_LINE_STRIP             = 0x3,
+    TOP_TRIANGLE_LIST          = 0x4,
+    TOP_TRIANGLE_STRIP         = 0x5,
+    TOP_TRIANGLE_FAN           = 0x6,
+    TOP_QUAD_LIST              = 0x7,
+    TOP_QUAD_STRIP             = 0x8,
+    TOP_LINE_LIST_ADJ          = 0x9,
+    TOP_LISTSTRIP_ADJ          = 0xA,
+    TOP_TRI_LIST_ADJ           = 0xB,
+    TOP_TRI_STRIP_ADJ          = 0xC,
+    TOP_TRI_STRIP_REVERSE      = 0xD,
+    TOP_POLYGON                = 0xE,
+    TOP_RECT_LIST              = 0xF,
+    TOP_LINE_LOOP              = 0x10,
+    TOP_POINT_LIST_BF          = 0x11,
+    TOP_LINE_STRIP_CONT        = 0x12,
+    TOP_LINE_STRIP_BF          = 0x13,
+    TOP_LINE_STRIP_CONT_BF     = 0x14,
      TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
-    TOP_TRIANGLE_DISC = 0x17,   /// @todo What is this??
-
-    TOP_PATCHLIST_BASE = 0x1F,  // Invalid topology, used to calculate num verts for a patchlist.
-    TOP_PATCHLIST_1 = 0x20,     // List of 1-vertex patches
-    TOP_PATCHLIST_2 = 0x21,
-    TOP_PATCHLIST_3 = 0x22,
-    TOP_PATCHLIST_4 = 0x23,
-    TOP_PATCHLIST_5 = 0x24,
-    TOP_PATCHLIST_6 = 0x25,
-    TOP_PATCHLIST_7 = 0x26,
-    TOP_PATCHLIST_8 = 0x27,
-    TOP_PATCHLIST_9 = 0x28,
-    TOP_PATCHLIST_10 = 0x29,
-    TOP_PATCHLIST_11 = 0x2A,
-    TOP_PATCHLIST_12 = 0x2B,
-    TOP_PATCHLIST_13 = 0x2C,
-    TOP_PATCHLIST_14 = 0x2D,
-    TOP_PATCHLIST_15 = 0x2E,
-    TOP_PATCHLIST_16 = 0x2F,
-    TOP_PATCHLIST_17 = 0x30,
-    TOP_PATCHLIST_18 = 0x31,
-    TOP_PATCHLIST_19 = 0x32,
-    TOP_PATCHLIST_20 = 0x33,
-    TOP_PATCHLIST_21 = 0x34,
-    TOP_PATCHLIST_22 = 0x35,
-    TOP_PATCHLIST_23 = 0x36,
-    TOP_PATCHLIST_24 = 0x37,
-    TOP_PATCHLIST_25 = 0x38,
-    TOP_PATCHLIST_26 = 0x39,
-    TOP_PATCHLIST_27 = 0x3A,
-    TOP_PATCHLIST_28 = 0x3B,
-    TOP_PATCHLIST_29 = 0x3C,
-    TOP_PATCHLIST_30 = 0x3D,
-    TOP_PATCHLIST_31 = 0x3E,
-    TOP_PATCHLIST_32 = 0x3F,   // List of 32-vertex patches
+    TOP_TRIANGLE_DISC          = 0x17, /// @todo What is this??
+
+    TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist.
+    TOP_PATCHLIST_1    = 0x20, // List of 1-vertex patches
+    TOP_PATCHLIST_2    = 0x21,
+    TOP_PATCHLIST_3    = 0x22,
+    TOP_PATCHLIST_4    = 0x23,
+    TOP_PATCHLIST_5    = 0x24,
+    TOP_PATCHLIST_6    = 0x25,
+    TOP_PATCHLIST_7    = 0x26,
+    TOP_PATCHLIST_8    = 0x27,
+    TOP_PATCHLIST_9    = 0x28,
+    TOP_PATCHLIST_10   = 0x29,
+    TOP_PATCHLIST_11   = 0x2A,
+    TOP_PATCHLIST_12   = 0x2B,
+    TOP_PATCHLIST_13   = 0x2C,
+    TOP_PATCHLIST_14   = 0x2D,
+    TOP_PATCHLIST_15   = 0x2E,
+    TOP_PATCHLIST_16   = 0x2F,
+    TOP_PATCHLIST_17   = 0x30,
+    TOP_PATCHLIST_18   = 0x31,
+    TOP_PATCHLIST_19   = 0x32,
+    TOP_PATCHLIST_20   = 0x33,
+    TOP_PATCHLIST_21   = 0x34,
+    TOP_PATCHLIST_22   = 0x35,
+    TOP_PATCHLIST_23   = 0x36,
+    TOP_PATCHLIST_24   = 0x37,
+    TOP_PATCHLIST_25   = 0x38,
+    TOP_PATCHLIST_26   = 0x39,
+    TOP_PATCHLIST_27   = 0x3A,
+    TOP_PATCHLIST_28   = 0x3B,
+    TOP_PATCHLIST_29   = 0x3C,
+    TOP_PATCHLIST_30   = 0x3D,
+    TOP_PATCHLIST_31   = 0x3E,
+    TOP_PATCHLIST_32   = 0x3F, // List of 32-vertex patches
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -173,7 +175,6 @@ enum SWR_OUTER_TESSFACTOR_ID
      SWR_NUM_OUTER_TESS_FACTORS,
  };
  
-
  /////////////////////////////////////////////////////////////////////////
  /// simdvertex
  /// @brief Defines a vertex element that holds all the data for SIMD vertices.
@@ -182,9 +183,9 @@ enum SWR_OUTER_TESSFACTOR_ID
  enum SWR_VTX_SLOTS
  {
      VERTEX_SGV_SLOT                 = 0,
-        VERTEX_SGV_RTAI_COMP        = 0,
-        VERTEX_SGV_VAI_COMP         = 1,
-        VERTEX_SGV_POINT_SIZE_COMP  = 2,
+    VERTEX_SGV_RTAI_COMP            = 0,
+    VERTEX_SGV_VAI_COMP             = 1,
+    VERTEX_SGV_POINT_SIZE_COMP      = 2,
      VERTEX_POSITION_SLOT            = 1,
      VERTEX_POSITION_END_SLOT        = 1,
      VERTEX_CLIPCULL_DIST_LO_SLOT    = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
@@ -197,21 +198,21 @@ enum SWR_VTX_SLOTS
  // SoAoSoA
  struct simdvertex
  {
-    simdvector      attrib[SWR_VTX_NUM_SLOTS];
+    simdvector attrib[SWR_VTX_NUM_SLOTS];
  };
  
  #if ENABLE_AVX512_SIMD16
  struct simd16vertex
  {
-    simd16vector    attrib[SWR_VTX_NUM_SLOTS];
+    simd16vector attrib[SWR_VTX_NUM_SLOTS];
  };
  
  #endif
  
-template<typename SIMD_T>
+template <typename SIMD_T>
  struct SIMDVERTEX_T
  {
-    typename SIMD_T::Vec4               attrib[SWR_VTX_NUM_SLOTS];
+    typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS];
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -229,19 +230,20 @@ struct SWR_SHADER_STATS
  /////////////////////////////////////////////////////////////////////////
  struct SWR_VS_CONTEXT
  {
-    simdvertex* pVin;           // IN: SIMD input vertex data store
-    simdvertex* pVout;          // OUT: SIMD output vertex data store
+    simdvertex* pVin;  // IN: SIMD input vertex data store
+    simdvertex* pVout; // OUT: SIMD output vertex data store
  
-    uint32_t InstanceID;        // IN: Instance ID, constant across all verts of the SIMD
-    simdscalari VertexID;       // IN: Vertex ID
-    simdscalari mask;           // IN: Active mask for shader
+    uint32_t    InstanceID; // IN: Instance ID, constant across all verts of the SIMD
+    simdscalari VertexID;   // IN: Vertex ID
+    simdscalari mask;       // IN: Active mask for shader
  
      // SIMD16 Frontend fields.
-    uint32_t AlternateOffset;   // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
-    simd16scalari mask16;       // IN: Active mask for shader (16-wide)
-    simd16scalari VertexID16;   // IN: Vertex ID (16-wide)
+    uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in
+                              // simd16vertex output
+    simd16scalari mask16;     // IN: Active mask for shader (16-wide)
+    simd16scalari VertexID16; // IN: Vertex ID (16-wide)
  
-    SWR_SHADER_STATS stats;     // OUT: shader statistics used for archrast.
+    SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
  };
  
  /////////////////////////////////////////////////////////////////////////
@@ -268,16 +270,16 @@ struct ScalarCPoint
  /////////////////////////////////////////////////////////////////////////
  struct SWR_TESSELLATION_FACTORS
  {
-    float  OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
-    float  InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
+    float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
+    float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
  };
  
  #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
  struct ScalarPatch
  {
      SWR_TESSELLATION_FACTORS tessFactors;
-    ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
-    ScalarCPoint patchData;
+    ScalarCPoint             cp[MAX_NUM_VERTS_PER_PRIM];
+    ScalarCPoint             patchData;
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -286,12 +288,11 @@ struct ScalarPatch
  /////////////////////////////////////////////////////////////////////////
  struct SWR_HS_CONTEXT
  {
-    simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
-    simdscalari PrimitiveID;    // IN: (SIMD) primitive ID generated from the draw call
-    simdscalari mask;           // IN: Active mask for shader
-    ScalarPatch* pCPout;        // OUT: Output control point patch
-                                // SIMD-sized-array of SCALAR patches
-    SWR_SHADER_STATS stats;     // OUT: shader statistics used for archrast.
+    simdvertex       vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
+    simdscalari      PrimitiveID;                  // IN: (SIMD) primitive ID generated from the draw call
+    simdscalari      mask;                         // IN: Active mask for shader
+    ScalarPatch*     pCPout;                       // OUT: Output control point patch SIMD-sized-array of SCALAR patches
+    SWR_SHADER_STATS stats;                        // OUT: shader statistics used for archrast.
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -318,13 +319,13 @@ struct SWR_DS_CONTEXT
  /////////////////////////////////////////////////////////////////////////
  struct SWR_GS_CONTEXT
  {
-    simdvector* pVerts;                 // IN: input primitive data for SIMD prims
-    uint32_t inputVertStride;           // IN: input vertex stride, in attributes
-    simdscalari PrimitiveID;            // IN: input primitive ID generated from the draw call
-    uint32_t InstanceID;                // IN: input instance ID
-    simdscalari mask;                   // IN: Active mask for shader
-    uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
-    SWR_SHADER_STATS stats;             // OUT: shader statistics used for archrast.
+    simdvector* pVerts;                    // IN: input primitive data for SIMD prims
+    uint32_t    inputVertStride;           // IN: input vertex stride, in attributes
+    simdscalari PrimitiveID;               // IN: input primitive ID generated from the draw call
+    uint32_t    InstanceID;                // IN: input instance ID
+    simdscalari mask;                      // IN: Active mask for shader
+    uint8_t*    pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
+    SWR_SHADER_STATS stats;                // OUT: shader statistics used for archrast.
  };
  
  struct PixelPositions
@@ -343,36 +344,35 @@ struct PixelPositions
  /////////////////////////////////////////////////////////////////////////
  struct SWR_PS_CONTEXT
  {
-    PixelPositions vX;          // IN: x location(s) of pixels
-    PixelPositions vY;          // IN: x location(s) of pixels
-    simdscalar vZ;              // INOUT: z location of pixels
-    simdscalari activeMask;     // OUT: mask for kill
-    simdscalar  inputMask;      // IN: input coverage mask for all samples
-    simdscalari oMask;          // OUT: mask for output coverage
+    PixelPositions vX;         // IN: x location(s) of pixels
+    PixelPositions vY;         // IN: x location(s) of pixels
+    simdscalar     vZ;         // INOUT: z location of pixels
+    simdscalari    activeMask; // OUT: mask for kill
+    simdscalar     inputMask;  // IN: input coverage mask for all samples
+    simdscalari    oMask;      // OUT: mask for output coverage
  
-    PixelPositions vI;          // barycentric coords evaluated at pixel center, sample position, centroid
+    PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid
      PixelPositions vJ;
-    PixelPositions vOneOverW;   // IN: 1/w
+    PixelPositions vOneOverW; // IN: 1/w
  
      const float* pAttribs;      // IN: pointer to attribute barycentric coefficients
      const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
      const float* pRecipW;       // IN: pointer to 1/w coord for each vertex
-    const float *I;             // IN: Barycentric A, B, and C coefs used to compute I
-    const float *J;             // IN: Barycentric A, B, and C coefs used to compute J
-    float recipDet;             // IN: 1/Det, used when barycentric interpolating attributes
+    const float* I;             // IN: Barycentric A, B, and C coefs used to compute I
+    const float* J;             // IN: Barycentric A, B, and C coefs used to compute J
+    float        recipDet;      // IN: 1/Det, used when barycentric interpolating attributes
      const float* pSamplePosX;   // IN: array of sample positions
      const float* pSamplePosY;   // IN: array of sample positions
-    simdvector shaded[SWR_NUM_RENDERTARGETS];
-                                // OUT: result color per rendertarget
+    simdvector   shaded[SWR_NUM_RENDERTARGETS]; // OUT: result color per rendertarget
  
-    uint32_t frontFace;                 // IN: front- 1, back- 0
-    uint32_t sampleIndex;               // IN: sampleIndex
-    uint32_t renderTargetArrayIndex;    // IN: render target array index from GS
-    uint32_t rasterizerSampleCount;     // IN: sample count used by the rasterizer
+    uint32_t frontFace;              // IN: front- 1, back- 0
+    uint32_t sampleIndex;            // IN: sampleIndex
+    uint32_t renderTargetArrayIndex; // IN: render target array index from GS
+    uint32_t rasterizerSampleCount;  // IN: sample count used by the rasterizer
  
      uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
  
-    SWR_SHADER_STATS stats;             // OUT: shader statistics used for archrast.
+    SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -401,41 +401,41 @@ struct SWR_CS_CONTEXT
      // count into the shader. When the count reaches 0 then all thread groups in the
      // dispatch call have been completed.
  
-    uint32_t tileCounter;  // The tile counter value for this thread group.
+    uint32_t tileCounter; // The tile counter value for this thread group.
  
      // Dispatch dimensions used by shader to compute system values from the tile counter.
      uint32_t dispatchDims[3];
  
      uint8_t* pTGSM;               // Thread Group Shared Memory pointer.
      uint8_t* pSpillFillBuffer;    // Spill/fill buffer for barrier support
-    uint8_t* pScratchSpace;       // Pointer to scratch space buffer used by the shader, shader is responsible
-                                  // for subdividing scratch space per instance/simd
+    uint8_t* pScratchSpace;       // Pointer to scratch space buffer used by the shader, shader is
+                                  // responsible for subdividing scratch space per instance/simd
      uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
  
-    SWR_SHADER_STATS stats;       // OUT: shader statistics used for archrast.
+    SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
  };
  
  // enums
  enum SWR_TILE_MODE
  {
-    SWR_TILE_NONE = 0x0,    // Linear mode (no tiling)
-    SWR_TILE_MODE_WMAJOR,   // W major tiling
-    SWR_TILE_MODE_XMAJOR,   // X major tiling
-    SWR_TILE_MODE_YMAJOR,   // Y major tiling
-    SWR_TILE_SWRZ,          // SWR-Z tiling
+    SWR_TILE_NONE = 0x0,   // Linear mode (no tiling)
+    SWR_TILE_MODE_WMAJOR,  // W major tiling
+    SWR_TILE_MODE_XMAJOR,  // X major tiling
+    SWR_TILE_MODE_YMAJOR,  // Y major tiling
+    SWR_TILE_SWRZ,         // SWR-Z tiling
  
      SWR_TILE_MODE_COUNT
  };
  
  enum SWR_SURFACE_TYPE
  {
-    SURFACE_1D        = 0,
-    SURFACE_2D        = 1,
-    SURFACE_3D        = 2,
-    SURFACE_CUBE      = 3,
-    SURFACE_BUFFER    = 4,
+    SURFACE_1D                = 0,
+    SURFACE_2D                = 1,
+    SURFACE_3D                = 2,
+    SURFACE_CUBE              = 3,
+    SURFACE_BUFFER            = 4,
      SURFACE_STRUCTURED_BUFFER = 5,
-    SURFACE_NULL       = 7
+    SURFACE_NULL              = 7
  };
  
  enum SWR_ZFUNCTION
@@ -537,34 +537,35 @@ struct SWR_LOD_OFFSETS
  //////////////////////////////////////////////////////////////////////////
  struct SWR_SURFACE_STATE
  {
-    gfxptr_t xpBaseAddress;
-    SWR_SURFACE_TYPE type;  // @llvm_enum
-    SWR_FORMAT format;      // @llvm_enum
-    uint32_t width;
-    uint32_t height;
-    uint32_t depth;
-    uint32_t numSamples;
-    uint32_t samplePattern;
-    uint32_t pitch;
-    uint32_t qpitch;
-    uint32_t minLod;            // for sampled surfaces, the most detailed LOD that can be accessed by sampler
-    uint32_t maxLod;            // for sampled surfaces, the max LOD that can be accessed
-    float resourceMinLod;       // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
-    uint32_t lod;               // for render targets, the lod being rendered to
-    uint32_t arrayIndex;        // for render targets, the array index being rendered to for arrayed surfaces
-    SWR_TILE_MODE tileMode;     // @llvm_enum
-    uint32_t halign;
-    uint32_t valign;
-    uint32_t xOffset;
-    uint32_t yOffset;
+    gfxptr_t         xpBaseAddress;
+    SWR_SURFACE_TYPE type;   // @llvm_enum
+    SWR_FORMAT       format; // @llvm_enum
+    uint32_t         width;
+    uint32_t         height;
+    uint32_t         depth;
+    uint32_t         numSamples;
+    uint32_t         samplePattern;
+    uint32_t         pitch;
+    uint32_t         qpitch;
+    uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler
+    uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed
+    float    resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be
+                             // accessed by sampler
+    uint32_t lod;            // for render targets, the lod being rendered to
+    uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces
+    SWR_TILE_MODE tileMode; // @llvm_enum
+    uint32_t      halign;
+    uint32_t      valign;
+    uint32_t      xOffset;
+    uint32_t      yOffset;
  
      uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
  
-    gfxptr_t xpAuxBaseAddress;   // Used for compression, append/consume counter, etc.
-    SWR_AUX_MODE auxMode;      // @llvm_enum
+    gfxptr_t     xpAuxBaseAddress; // Used for compression, append/consume counter, etc.
+    SWR_AUX_MODE auxMode;          // @llvm_enum
  
  
-    bool bInterleavedSamples;   // are MSAA samples stored interleaved or planar
+    bool bInterleavedSamples; // are MSAA samples stored interleaved or planar
  };
  
  // vertex fetch state
@@ -576,9 +577,10 @@ struct SWR_VERTEX_BUFFER_STATE
      uint32_t index;
      uint32_t pitch;
      uint32_t size;
-    uint32_t minVertex;             // min vertex (for bounds checking)
-    uint32_t maxVertex;             // size / pitch.  precalculated value used by fetch shader for OOB checks
-    uint32_t partialInboundsSize;   // size % pitch.  precalculated value used by fetch shader for partially OOB vertices
+    uint32_t minVertex; // min vertex (for bounds checking)
+    uint32_t maxVertex; // size / pitch.  precalculated value used by fetch shader for OOB checks
+    uint32_t partialInboundsSize; // size % pitch.  precalculated value used by fetch shader for
+                                  // partially OOB vertices
  };
  
  struct SWR_INDEX_BUFFER_STATE
@@ -586,10 +588,9 @@ struct SWR_INDEX_BUFFER_STATE
      gfxptr_t xpIndices;
      // Format type for indices (e.g. UINT16, UINT32, etc.)
      SWR_FORMAT format; // @llvm_enum
-    uint32_t size;
+    uint32_t   size;
  };
  
-
  //////////////////////////////////////////////////////////////////////////
  /// SWR_FETCH_CONTEXT
  /// @brief Input to fetch shader.
@@ -598,20 +599,21 @@ struct SWR_INDEX_BUFFER_STATE
  /////////////////////////////////////////////////////////////////////////
  struct SWR_FETCH_CONTEXT
  {
-    const SWR_VERTEX_BUFFER_STATE* pStreams;    // IN: array of bound vertex buffers
-    gfxptr_t xpIndices;                          // IN: pointer to int32 index buffer for indexed draws
-    gfxptr_t xpLastIndex;                        // IN: pointer to end of index buffer, used for bounds checking
-    uint32_t CurInstance;                       // IN: current instance
-    uint32_t BaseVertex;                        // IN: base vertex
-    uint32_t StartVertex;                       // IN: start vertex
-    uint32_t StartInstance;                     // IN: start instance
-    simdscalari VertexID;                       // OUT: vector of vertex IDs
-    simdscalari CutMask;                        // OUT: vector mask of indices which have the cut index value
+    const SWR_VERTEX_BUFFER_STATE* pStreams;  // IN: array of bound vertex buffers
+    gfxptr_t                       xpIndices; // IN: pointer to int32 index buffer for indexed draws
+    gfxptr_t    xpLastIndex;   // IN: pointer to end of index buffer, used for bounds checking
+    uint32_t    CurInstance;   // IN: current instance
+    uint32_t    BaseVertex;    // IN: base vertex
+    uint32_t    StartVertex;   // IN: start vertex
+    uint32_t    StartInstance; // IN: start instance
+    simdscalari VertexID;      // OUT: vector of vertex IDs
+    simdscalari CutMask;       // OUT: vector mask of indices which have the cut index value
  #if USE_SIMD16_SHADERS
-//    simd16scalari VertexID;                     // OUT: vector of vertex IDs
-//    simd16scalari CutMask;                      // OUT: vector mask of indices which have the cut index value
-    simdscalari VertexID2;                      // OUT: vector of vertex IDs
-    simdscalari CutMask2;                       // OUT: vector mask of indices which have the cut index value
+    //    simd16scalari VertexID;                     // OUT: vector of vertex IDs
+    //    simd16scalari CutMask;                      // OUT: vector mask of indices which have the
+    //    cut index value
+    simdscalari VertexID2; // OUT: vector of vertex IDs
+    simdscalari CutMask2;  // OUT: vector mask of indices which have the cut index value
  #endif
  };
  
@@ -627,8 +629,8 @@ OSALIGNLINE(struct) SWR_STATS
      uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
  
      // Pipeline Stats
-    uint64_t PsInvocations;  // Number of Pixel Shader invocations
-    uint64_t CsInvocations;  // Number of Compute Shader invocations
+    uint64_t PsInvocations; // Number of Pixel Shader invocations
+    uint64_t CsInvocations; // Number of Compute Shader invocations
  
  };
  
@@ -654,9 +656,9 @@ OSALIGNLINE(struct) SWR_STATS_FE
      uint64_t SoNumPrimsWritten[4];
  };
  
-//////////////////////////////////////////////////////////////////////////
-/// STREAMOUT_BUFFERS
-/////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////
+    /// STREAMOUT_BUFFERS
+    /////////////////////////////////////////////////////////////////////////
  
  #define MAX_SO_STREAMS 4
  #define MAX_SO_BUFFERS 4
@@ -718,7 +720,7 @@ struct SWR_STREAMOUT_STATE
  /////////////////////////////////////////////////////////////////////////
  struct SWR_STREAMOUT_CONTEXT
  {
-    uint32_t* pPrimData;
+    uint32_t*             pPrimData;
      SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
  
      // Num prims written for this stream
@@ -736,8 +738,8 @@ struct SWR_GS_STATE
      bool gsEnable;
  
      // If true, geometry shader emits a single stream, with separate cut buffer.
-    // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
-    // to map vertices to streams
+    // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a
+    // separate StreamID buffer to map vertices to streams
      bool isSingleStream;
  
      // Number of input attributes per vertex. Used by the frontend to
@@ -748,7 +750,7 @@ struct SWR_GS_STATE
      uint32_t inputVertStride;
  
      // Output topology - can be point, tristrip, linestrip, or rectlist
-    PRIMITIVE_TOPOLOGY outputTopology;      // @llvm_enum
+    PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum
  
      // Maximum number of verts that can be emitted by a single instance of the GS
      uint32_t maxNumVerts;
@@ -763,14 +765,16 @@ struct SWR_GS_STATE
      // Total amount of memory to allocate for one instance of the shader output in bytes
      uint32_t allocationSize;
  
-    // Offset to the start of the attributes of the input vertices, in simdvector units, as read by the GS
+    // Offset to the start of the attributes of the input vertices, in simdvector units, as read by
+    // the GS
      uint32_t vertexAttribOffset;
  
      // Offset to the attributes as stored by the preceding shader stage.
      uint32_t srcVertexAttribOffset;
  
-    // Size of the control data section which contains cut or streamID data, in simdscalar units. Should be sized to handle
-    // the maximum number of verts output by the GS. Can be 0 if there are no cuts or streamID bits.
+    // Size of the control data section which contains cut or streamID data, in simdscalar units.
+    // Should be sized to handle the maximum number of verts output by the GS. Can be 0 if there are
+    // no cuts or streamID bits.
      uint32_t controlDataSize;
  
      // Offset to the control data section, in bytes
@@ -782,15 +786,14 @@ struct SWR_GS_STATE
      // Offset to the start of the vertex section, in bytes
      uint32_t outputVertexOffset;
  
-    // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
-    // expected to store the final vertex count in the first dword of the gs output stream.
+    // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero,
+    // shader is expected to store the final vertex count in the first dword of the gs output
+    // stream.
      uint32_t staticVertexCount;
  
      uint32_t pad;
  };
-static_assert(sizeof(SWR_GS_STATE) == 64,
-    "Adjust padding to keep size (or remove this assert)");
-
+static_assert(sizeof(SWR_GS_STATE) == 64, "Adjust padding to keep size (or remove this assert)");
  
  //////////////////////////////////////////////////////////////////////////
  /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
@@ -834,22 +837,22 @@ enum SWR_TS_DOMAIN
  /////////////////////////////////////////////////////////////////////////
  struct SWR_TS_STATE
  {
-    bool                    tsEnable;
+    bool tsEnable;
  
-    SWR_TS_OUTPUT_TOPOLOGY  tsOutputTopology;   // @llvm_enum
-    SWR_TS_PARTITIONING     partitioning;       // @llvm_enum
-    SWR_TS_DOMAIN           domain;             // @llvm_enum
+    SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum
+    SWR_TS_PARTITIONING    partitioning;     // @llvm_enum
+    SWR_TS_DOMAIN          domain;           // @llvm_enum
  
-    PRIMITIVE_TOPOLOGY      postDSTopology;     // @llvm_enum
+    PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum
  
-    uint32_t                numHsInputAttribs;
-    uint32_t                numHsOutputAttribs;
-    uint32_t                numDsOutputAttribs;
-    uint32_t                dsAllocationSize;
-    uint32_t                dsOutVtxAttribOffset;
+    uint32_t numHsInputAttribs;
+    uint32_t numHsOutputAttribs;
+    uint32_t numDsOutputAttribs;
+    uint32_t dsAllocationSize;
+    uint32_t dsOutVtxAttribOffset;
  
      // Offset to the start of the attributes of the input vertices, in simdvector units
-    uint32_t                vertexAttribOffset;
+    uint32_t vertexAttribOffset;
  };
  
  // output merger state
@@ -860,7 +863,8 @@ struct SWR_RENDER_TARGET_BLEND_STATE
      uint8_t writeDisableBlue : 1;
      uint8_t writeDisableAlpha : 1;
  };
-static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
+static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1,
+              "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
  
  enum SWR_MULTISAMPLE_COUNT
  {
@@ -887,7 +891,7 @@ struct SWR_BLEND_STATE
      uint32_t sampleMask;
      // all RT's have the same sample count
      ///@todo move this to Output Merger state when we refactor
-    SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
+    SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum
  
      SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
  };
@@ -895,17 +899,17 @@ static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
  
  struct SWR_BLEND_CONTEXT
  {
-    const SWR_BLEND_STATE*  pBlendState;
-    simdvector*             src;
-    simdvector*             src1;
-    simdvector*             src0alpha;
-    uint32_t                sampleNum;
-    simdvector*             pDst;
-    simdvector*             result;
-    simdscalari*            oMask;
-    simdscalari*            pMask;
-    uint32_t                isAlphaTested;
-    uint32_t                isAlphaBlended;
+    const SWR_BLEND_STATE* pBlendState;
+    simdvector*            src;
+    simdvector*            src1;
+    simdvector*            src0alpha;
+    uint32_t               sampleNum;
+    simdvector*            pDst;
+    simdvector*            result;
+    simdscalari*           oMask;
+    simdscalari*           pMask;
+    uint32_t               isAlphaTested;
+    uint32_t               isAlphaBlended;
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -922,13 +926,12 @@ typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateDat
  typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_GS_CONTEXT* pGsContext);
  typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_CS_CONTEXT* pCsContext);
  typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
-typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
-typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT *pContext);
+typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
+typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, HANDLE hWorkerPrivateData, SWR_PS_CONTEXT* pContext);
  typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(SWR_BLEND_CONTEXT*);
  typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
  
  
-
  //////////////////////////////////////////////////////////////////////////
  /// FRONTEND_STATE
  /////////////////////////////////////////////////////////////////////////
@@ -1029,44 +1032,44 @@ enum SWR_PIXEL_LOCATION
  struct SWR_MULTISAMPLE_POS
  {
  public:
-    INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
-    INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
-    INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
-    INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
-    INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
-    INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
-    INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
-    INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
-    typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
-    INLINE sampleArrayT X() const { return _x; }; // @llvm_func
-    INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
+    INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; };   // @llvm_func
+    INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; };   // @llvm_func
+    INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; };         // @llvm_func
+    INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; };         // @llvm_func
+    INLINE void     SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; };    // @llvm_func
+    INLINE void     SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; };    // @llvm_func
+    INLINE float    X(uint32_t sampleNum) const { return _x[sampleNum]; };           // @llvm_func
+    INLINE float    Y(uint32_t sampleNum) const { return _y[sampleNum]; };           // @llvm_func
+    typedef const float (&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES];                   //@llvm_typedef
+    INLINE sampleArrayT X() const { return _x; };                                    // @llvm_func
+    INLINE sampleArrayT Y() const { return _y; };                                    // @llvm_func
      INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
      INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
      INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
      INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
-    INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
-    INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
+    INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; };  // @llvm_func
+    INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; };  // @llvm_func
  
      INLINE void PrecalcSampleData(int numSamples); //@llvm_func
  
  private:
      template <typename MaskT>
      INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
-    INLINE void CalcTileSampleOffsets(int numSamples);   // @llvm_func
+    INLINE void    CalcTileSampleOffsets(int numSamples);          // @llvm_func
  
      // scalar sample values
      uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
      uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
-    float _x[SWR_MAX_NUM_MULTISAMPLES];
-    float _y[SWR_MAX_NUM_MULTISAMPLES];
+    float    _x[SWR_MAX_NUM_MULTISAMPLES];
+    float    _y[SWR_MAX_NUM_MULTISAMPLES];
  
      // precalc'd / vectorized samples
-    __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
-    __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
+    __m128i    _vXi[SWR_MAX_NUM_MULTISAMPLES];
+    __m128i    _vYi[SWR_MAX_NUM_MULTISAMPLES];
      simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
      simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
-    __m128i tileSampleOffsetsX;
-    __m128i tileSampleOffsetsY;
+    __m128i    tileSampleOffsetsX;
+    __m128i    tileSampleOffsetsY;
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -1074,33 +1077,33 @@ private:
  //////////////////////////////////////////////////////////////////////////
  struct SWR_RASTSTATE
  {
-    uint32_t cullMode               : 2;
-    uint32_t fillMode               : 2;
-    uint32_t frontWinding           : 1;
-    uint32_t scissorEnable          : 1;
-    uint32_t depthClipEnable        : 1;
-    uint32_t clipHalfZ              : 1;
-    uint32_t pointParam             : 1;
-    uint32_t pointSpriteEnable      : 1;
-    uint32_t pointSpriteTopOrigin   : 1;
-    uint32_t forcedSampleCount      : 1;
-    uint32_t pixelOffset            : 1;
-    uint32_t depthBiasPreAdjusted   : 1;    ///< depth bias constant is in float units, not per-format Z units
-    uint32_t conservativeRast       : 1;
+    uint32_t cullMode : 2;
+    uint32_t fillMode : 2;
+    uint32_t frontWinding : 1;
+    uint32_t scissorEnable : 1;
+    uint32_t depthClipEnable : 1;
+    uint32_t clipHalfZ : 1;
+    uint32_t pointParam : 1;
+    uint32_t pointSpriteEnable : 1;
+    uint32_t pointSpriteTopOrigin : 1;
+    uint32_t forcedSampleCount : 1;
+    uint32_t pixelOffset : 1;
+    uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units
+    uint32_t conservativeRast : 1;
  
      float pointSize;
      float lineWidth;
  
-    float depthBias;
-    float slopeScaledDepthBias;
-    float depthBiasClamp;
-    SWR_FORMAT depthFormat;     // @llvm_enum
+    float      depthBias;
+    float      slopeScaledDepthBias;
+    float      depthBiasClamp;
+    SWR_FORMAT depthFormat; // @llvm_enum
  
      // sample count the rasterizer is running at
-    SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
-    uint32_t pixelLocation;     // UL or Center
-    SWR_MULTISAMPLE_POS samplePositions;    // @llvm_struct
-    bool bIsCenterPattern;   // @llvm_enum
+    SWR_MULTISAMPLE_COUNT sampleCount;      // @llvm_enum
+    uint32_t              pixelLocation;    // UL or Center
+    SWR_MULTISAMPLE_POS   samplePositions;  // @llvm_struct
+    bool                  bIsCenterPattern; // @llvm_enum
  };
  
  
@@ -1122,17 +1125,21 @@ struct SWR_ATTRIB_SWIZZLE
  // backend state
  struct SWR_BACKEND_STATE
  {
-    uint32_t constantInterpolationMask;     // bitmask indicating which attributes have constant interpolation
-    uint32_t pointSpriteTexCoordMask;       // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
+    uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant
+                                        // interpolation
+    uint32_t pointSpriteTexCoordMask;   // bitmask indicating the attribute(s) which should be
+                                        // interpreted as tex coordinates
  
-    bool swizzleEnable;                 // when enabled, core will parse the swizzle map when
-                                        // setting up attributes for the backend, otherwise
-                                        // all attributes up to numAttributes will be sent
-    uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
-    uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
+    bool swizzleEnable;        // when enabled, core will parse the swizzle map when
+                               // setting up attributes for the backend, otherwise
+                               // all attributes up to numAttributes will be sent
+    uint8_t numAttributes;     // total number of attributes to send to backend (up to 32)
+    uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some
+                               // calculations for unneeded components
  
-    bool readRenderTargetArrayIndex;    // Forward render target array index from last FE stage to the backend
-    bool readViewportArrayIndex;        // Read viewport array index from last FE stage during binning
+    bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the
+                                     // backend
+    bool readViewportArrayIndex;     // Read viewport array index from last FE stage during binning
  
      // User clip/cull distance enables
      uint8_t cullDistanceMask;
@@ -1142,7 +1149,7 @@ struct SWR_BACKEND_STATE
      // and that the next fields are dword aligned.
      uint8_t pad[10];
  
-        // Offset to the start of the attributes of the input vertices, in simdvector units
+    // Offset to the start of the attributes of the input vertices, in simdvector units
      uint32_t vertexAttribOffset;
  
      // Offset to clip/cull attrib section of the vertex, in simdvector units
@@ -1151,7 +1158,7 @@ struct SWR_BACKEND_STATE
      SWR_ATTRIB_SWIZZLE swizzleMap[32];
  };
  static_assert(sizeof(SWR_BACKEND_STATE) == 128,
-    "Adjust padding to keep size (or remove this assert)");
+              "Adjust padding to keep size (or remove this assert)");
  
  
  union SWR_DEPTH_STENCIL_STATE
@@ -1214,8 +1221,8 @@ enum SWR_PS_POSITION_OFFSET
  
  enum SWR_BARYCENTRICS_MASK
  {
-    SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
-    SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
+    SWR_BARYCENTRIC_PER_PIXEL_MASK  = 0x1,
+    SWR_BARYCENTRIC_CENTROID_MASK   = 0x2,
      SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
  };
  
@@ -1223,27 +1230,28 @@ enum SWR_BARYCENTRICS_MASK
  struct SWR_PS_STATE
  {
      // dword 0-1
-    PFN_PIXEL_KERNEL pfnPixelShader;  // @llvm_pfn
+    PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn
  
      // dword 2
-    uint32_t killsPixel             : 1;    // pixel shader can kill pixels
-    uint32_t inputCoverage          : 2;    // ps uses input coverage
-    uint32_t writesODepth           : 1;    // pixel shader writes to depth
-    uint32_t usesSourceDepth        : 1;    // pixel shader reads depth
-    uint32_t shadingRate            : 2;    // shading per pixel / sample / coarse pixel
-    uint32_t posOffset              : 2;    // type of offset (none, sample, centroid) to add to pixel position
-    uint32_t barycentricsMask       : 3;    // which type(s) of barycentric coords does the PS interpolate attributes with
-    uint32_t usesUAV                : 1;    // pixel shader accesses UAV
-    uint32_t forceEarlyZ            : 1;    // force execution of early depth/stencil test
+    uint32_t killsPixel : 1;      // pixel shader can kill pixels
+    uint32_t inputCoverage : 2;   // ps uses input coverage
+    uint32_t writesODepth : 1;    // pixel shader writes to depth
+    uint32_t usesSourceDepth : 1; // pixel shader reads depth
+    uint32_t shadingRate : 2;     // shading per pixel / sample / coarse pixel
+    uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position
+    uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate
+                                   // attributes with
+    uint32_t usesUAV : 1;          // pixel shader accesses UAV
+    uint32_t forceEarlyZ : 1;      // force execution of early depth/stencil test
  
-    uint8_t renderTargetMask;               // Mask of render targets written
+    uint8_t renderTargetMask; // Mask of render targets written
  };
  
  // depth bounds state
  struct SWR_DEPTH_BOUNDS_STATE
  {
-    bool    depthBoundsTestEnable;
-    float   depthBoundsTestMinValue;
-    float   depthBoundsTestMaxValue;
+    bool  depthBoundsTestEnable;
+    float depthBoundsTestMinValue;
+    float depthBoundsTestMaxValue;
  };
-
+// clang-format on
diff --git a/src/gallium/drivers/swr/rasterizer/core/state_funcs.h b/src/gallium/drivers/swr/rasterizer/core/state_funcs.h

index eaf0094b62641bf8897de59f864d4bd89b7dcfb9..99eac835ea8edafd9ed34e5a51a31e47bc907d66 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/state_funcs.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state_funcs.h
@@ -1,36 +1,35 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file state.h
-*
-* @brief Definitions for API state - complex function implementation.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file state.h
+ *
+ * @brief Definitions for API state - complex function implementation.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "core/state.h"
  #include "common/simdintrin.h"
  
-
  template <typename MaskT>
  INLINE __m128i SWR_MULTISAMPLE_POS::expandThenBlend4(uint32_t* min, uint32_t* max)
  {
@@ -41,27 +40,27 @@ INLINE __m128i SWR_MULTISAMPLE_POS::expandThenBlend4(uint32_t* min, uint32_t* ma
  
  INLINE void SWR_MULTISAMPLE_POS::PrecalcSampleData(int numSamples)
  {
-    for(int i = 0; i < numSamples; i++)
+    for (int i = 0; i < numSamples; i++)
      {
          _vXi[i] = _mm_set1_epi32(_xi[i]);
          _vYi[i] = _mm_set1_epi32(_yi[i]);
-        _vX[i] = _simd_set1_ps(_x[i]);
-        _vY[i] = _simd_set1_ps(_y[i]);
+        _vX[i]  = _simd_set1_ps(_x[i]);
+        _vY[i]  = _simd_set1_ps(_y[i]);
      }
      // precalculate the raster tile BB for the rasterizer.
-    CalcTileSampleOffsets(numSamples);                                 
+    CalcTileSampleOffsets(numSamples);
  }
  
  INLINE void SWR_MULTISAMPLE_POS::CalcTileSampleOffsets(int numSamples)
  {
-    auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
-    auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
+    auto minXi  = std::min_element(std::begin(_xi), &_xi[numSamples]);
+    auto maxXi  = std::max_element(std::begin(_xi), &_xi[numSamples]);
      using xMask = std::integral_constant<int, 0xA>;
      // BR(max),    BL(min),    UR(max),    UL(min)
      tileSampleOffsetsX = expandThenBlend4<xMask>(minXi, maxXi);
  
-    auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
-    auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
+    auto minYi  = std::min_element(std::begin(_yi), &_yi[numSamples]);
+    auto maxYi  = std::max_element(std::begin(_yi), &_yi[numSamples]);
      using yMask = std::integral_constant<int, 0xC>;
      // BR(max),    BL(min),    UR(max),    UL(min)
      tileSampleOffsetsY = expandThenBlend4<yMask>(minYi, maxYi);
diff --git a/src/gallium/drivers/swr/rasterizer/core/tessellator.h b/src/gallium/drivers/swr/rasterizer/core/tessellator.h

index 316f66f94ae90becf98d20a3d444470b0d4aa143..348170bfd4245e2d2a429a8a10127599c4cce141 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/tessellator.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tessellator.h
@@ -1,43 +1,42 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tessellator.h
-*
-* @brief Tessellator fixed function unit interface definition
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tessellator.h
+ *
+ * @brief Tessellator fixed function unit interface definition
+ *
+ ******************************************************************************/
  #pragma once
  
  /// Allocate and initialize a new tessellation context
-HANDLE SWR_API TSInitCtx(
-    SWR_TS_DOMAIN tsDomain,                     ///< [IN] Tessellation domain (isoline, quad, triangle)
-    SWR_TS_PARTITIONING tsPartitioning,         ///< [IN] Tessellation partitioning algorithm
-    SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology,    ///< [IN] Tessellation output topology
-    void* pContextMem,                          ///< [IN] Memory to use for the context
-    size_t& memSize);                           ///< [INOUT] In: Amount of memory in pContextMem. Out: Mem required
+HANDLE SWR_API
+       TSInitCtx(SWR_TS_DOMAIN          tsDomain, ///< [IN] Tessellation domain (isoline, quad, triangle)
+                 SWR_TS_PARTITIONING    tsPartitioning, ///< [IN] Tessellation partitioning algorithm
+                 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology, ///< [IN] Tessellation output topology
+                 void*                  pContextMem,      ///< [IN] Memory to use for the context
+                 size_t& memSize); ///< [INOUT] In: Amount of memory in pContextMem. Out: Mem required
  
  /// Destroy & de-allocate tessellation context
-void SWR_API TSDestroyCtx(
-    HANDLE tsCtx);  ///< [IN] Tessellation context to be destroyed
+void SWR_API TSDestroyCtx(HANDLE tsCtx); ///< [IN] Tessellation context to be destroyed
  
  struct SWR_TS_TESSELLATED_DATA
  {
@@ -45,43 +44,38 @@ struct SWR_TS_TESSELLATED_DATA
      uint32_t NumDomainPoints;
  
      uint32_t* ppIndices[3];
-    float* pDomainPointsU;
-    float* pDomainPointsV;
+    float*    pDomainPointsU;
+    float*    pDomainPointsV;
      // For Tri: pDomainPointsW[i] = 1.0f - pDomainPointsU[i] - pDomainPointsV[i]
  };
  
  /// Perform Tessellation
-void SWR_API TSTessellate(
-    HANDLE tsCtx,                                   ///< [IN] Tessellation Context
-    const SWR_TESSELLATION_FACTORS& tsTessFactors,  ///< [IN] Tessellation Factors
-    SWR_TS_TESSELLATED_DATA& tsTessellatedData);    ///< [OUT] Tessellated Data
-
+void SWR_API
+     TSTessellate(HANDLE                          tsCtx,         ///< [IN] Tessellation Context
+                  const SWR_TESSELLATION_FACTORS& tsTessFactors, ///< [IN] Tessellation Factors
+                  SWR_TS_TESSELLATED_DATA&        tsTessellatedData);   ///< [OUT] Tessellated Data
  
  
  /// @TODO - Implement OSS tessellator
  
-INLINE HANDLE SWR_API TSInitCtx(
-    SWR_TS_DOMAIN tsDomain,
-    SWR_TS_PARTITIONING tsPartitioning,
-    SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology,
-    void* pContextMem,
-    size_t& memSize)
+INLINE HANDLE SWR_API TSInitCtx(SWR_TS_DOMAIN          tsDomain,
+                                SWR_TS_PARTITIONING    tsPartitioning,
+                                SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology,
+                                void*                  pContextMem,
+                                size_t&                memSize)
  {
      SWR_NOT_IMPL;
      return NULL;
  }
  
-
  INLINE void SWR_API TSDestroyCtx(HANDLE tsCtx)
  {
      SWR_NOT_IMPL;
  }
  
-
-INLINE void SWR_API TSTessellate(
-    HANDLE tsCtx,
-    const SWR_TESSELLATION_FACTORS& tsTessFactors,
-    SWR_TS_TESSELLATED_DATA& tsTessellatedData)
+INLINE void SWR_API TSTessellate(HANDLE                          tsCtx,
+                                 const SWR_TESSELLATION_FACTORS& tsTessFactors,
+                                 SWR_TS_TESSELLATED_DATA&        tsTessellatedData)
  {
      SWR_NOT_IMPL;
  }
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.cpp b/src/gallium/drivers/swr/rasterizer/core/threads.cpp

index f77ae22a80a9d26563ac3e35f03d177116c14753..4523616cba0bc8742d3b3aa3af8419b6e9cba481 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/threads.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.cpp
@@ -1,25 +1,25 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-****************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ ****************************************************************************/
  
  #include <stdio.h>
  #include <thread>
@@ -52,13 +52,11 @@
  #include "tileset.h"
  
  
-
-
  // ThreadId
  struct Core
  {
-    uint32_t                procGroup = 0;
-    std::vector<uint32_t>   threadIds;
+    uint32_t              procGroup = 0;
+    std::vector<uint32_t> threadIds;
  };
  
  struct NumaNode
@@ -78,7 +76,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
  
      std::vector<KAFFINITY> threadMaskPerProcGroup;
  
-    static std::mutex m;
+    static std::mutex           m;
      std::lock_guard<std::mutex> l(m);
  
      DWORD bufSize = 0;
@@ -86,13 +84,14 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
      BOOL ret = GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &bufSize);
      SWR_ASSERT(ret == FALSE && GetLastError() == ERROR_INSUFFICIENT_BUFFER);
  
-    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
+    PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBufferMem =
+        (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufSize);
      SWR_ASSERT(pBufferMem);
  
      ret = GetLogicalProcessorInformationEx(RelationProcessorCore, pBufferMem, &bufSize);
      SWR_ASSERT(ret != FALSE, "Failed to get Processor Topology Information");
  
-    uint32_t count = bufSize / pBufferMem->Size;
+    uint32_t                                 count   = bufSize / pBufferMem->Size;
      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pBuffer = pBufferMem;
  
      for (uint32_t i = 0; i < count; ++i)
@@ -100,8 +99,8 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
          SWR_ASSERT(pBuffer->Relationship == RelationProcessorCore);
          for (uint32_t g = 0; g < pBuffer->Processor.GroupCount; ++g)
          {
-            auto& gmask = pBuffer->Processor.GroupMask[g];
-            uint32_t threadId = 0;
+            auto&    gmask     = pBuffer->Processor.GroupMask[g];
+            uint32_t threadId  = 0;
              uint32_t procGroup = gmask.Group;
  
              Core* pCore = nullptr;
@@ -133,10 +132,10 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
                  threadMaskPerProcGroup[procGroup] |= (KAFFINITY(1) << threadId);
  
                  // Find Numa Node
-                uint32_t numaId = 0;
+                uint32_t         numaId  = 0;
                  PROCESSOR_NUMBER procNum = {};
-                procNum.Group = WORD(procGroup);
-                procNum.Number = UCHAR(threadId);
+                procNum.Group            = WORD(procGroup);
+                procNum.Number           = UCHAR(threadId);
  
                  ret = GetNumaProcessorNodeEx(&procNum, (PUSHORT)&numaId);
                  SWR_ASSERT(ret);
@@ -146,7 +145,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
                  {
                      out_nodes.resize(numaId + 1);
                  }
-                auto& numaNode = out_nodes[numaId];
+                auto& numaNode  = out_nodes[numaId];
                  numaNode.numaId = numaId;
  
                  uint32_t coreId = 0;
@@ -154,7 +153,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
                  if (nullptr == pCore)
                  {
                      numaNode.cores.push_back(Core());
-                    pCore = &numaNode.cores.back();
+                    pCore            = &numaNode.cores.back();
                      pCore->procGroup = procGroup;
                  }
                  pCore->threadIds.push_back(threadId);
@@ -169,56 +168,55 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
  
      free(pBufferMem);
  
-
-#elif defined(__linux__) || defined (__gnu_linux__)
+#elif defined(__linux__) || defined(__gnu_linux__)
  
      // Parse /proc/cpuinfo to get full topology
      std::ifstream input("/proc/cpuinfo");
-    std::string line;
-    char* c;
-    uint32_t procId = uint32_t(-1);
-    uint32_t coreId = uint32_t(-1);
-    uint32_t physId = uint32_t(-1);
+    std::string   line;
+    char*         c;
+    uint32_t      procId = uint32_t(-1);
+    uint32_t      coreId = uint32_t(-1);
+    uint32_t      physId = uint32_t(-1);
  
      while (std::getline(input, line))
      {
          if (line.find("processor") != std::string::npos)
          {
              auto data_start = line.find(": ") + 2;
-            procId = std::strtoul(&line.c_str()[data_start], &c, 10);
+            procId          = std::strtoul(&line.c_str()[data_start], &c, 10);
              continue;
          }
          if (line.find("core id") != std::string::npos)
          {
              auto data_start = line.find(": ") + 2;
-            coreId = std::strtoul(&line.c_str()[data_start], &c, 10);
+            coreId          = std::strtoul(&line.c_str()[data_start], &c, 10);
              continue;
          }
          if (line.find("physical id") != std::string::npos)
          {
              auto data_start = line.find(": ") + 2;
-            physId = std::strtoul(&line.c_str()[data_start], &c, 10);
+            physId          = std::strtoul(&line.c_str()[data_start], &c, 10);
              continue;
          }
          if (line.length() == 0)
          {
              if (physId + 1 > out_nodes.size())
                  out_nodes.resize(physId + 1);
-            auto& numaNode = out_nodes[physId];
+            auto& numaNode  = out_nodes[physId];
              numaNode.numaId = physId;
  
              if (coreId + 1 > numaNode.cores.size())
                  numaNode.cores.resize(coreId + 1);
-            auto& core = numaNode.cores[coreId];
+            auto& core     = numaNode.cores[coreId];
              core.procGroup = coreId;
              core.threadIds.push_back(procId);
          }
      }
  
      out_numThreadsPerProcGroup = 0;
-    for (auto &node : out_nodes)
+    for (auto& node : out_nodes)
      {
-        for (auto &core : node.cores)
+        for (auto& core : node.cores)
          {
              out_numThreadsPerProcGroup += core.threadIds.size();
          }
@@ -226,11 +224,11 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
  
  #elif defined(__APPLE__)
  
-    auto numProcessors = 0;
-    auto numCores = 0;
+    auto numProcessors  = 0;
+    auto numCores       = 0;
      auto numPhysicalIds = 0;
  
-    int value;
+    int    value;
      size_t size = sizeof(value);
  
      int result = sysctlbyname("hw.packages", &value, &size, NULL, 0);
@@ -249,8 +247,8 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
  
      for (auto physId = 0; physId < numPhysicalIds; ++physId)
      {
-        auto &numaNode = out_nodes[physId];
-        auto procId = 0;
+        auto& numaNode = out_nodes[physId];
+        auto  procId   = 0;
  
          numaNode.cores.resize(numCores);
  
@@ -258,7 +256,7 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
          {
              for (auto coreId = 0; coreId < numaNode.cores.size(); ++coreId, ++procId)
              {
-                auto &core = numaNode.cores[coreId];
+                auto& core = numaNode.cores[coreId];
  
                  core.procGroup = coreId;
                  core.threadIds.push_back(procId);
@@ -268,9 +266,9 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
  
      out_numThreadsPerProcGroup = 0;
  
-    for (auto &node : out_nodes)
+    for (auto& node : out_nodes)
      {
-        for (auto &core : node.cores)
+        for (auto& core : node.cores)
          {
              out_numThreadsPerProcGroup += core.threadIds.size();
          }
@@ -283,10 +281,10 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
  #endif
  
      // Prune empty cores and numa nodes
-    for (auto node_it = out_nodes.begin(); node_it != out_nodes.end(); )
+    for (auto node_it = out_nodes.begin(); node_it != out_nodes.end();)
      {
          // Erase empty cores (first)
-        for (auto core_it = node_it->cores.begin(); core_it != node_it->cores.end(); )
+        for (auto core_it = node_it->cores.begin(); core_it != node_it->cores.end();)
          {
              if (core_it->threadIds.size() == 0)
              {
@@ -310,10 +308,14 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
      }
  }
  
-void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId = 0, bool bindProcGroup=false)
+void bindThread(SWR_CONTEXT* pContext,
+                uint32_t     threadId,
+                uint32_t     procGroupId   = 0,
+                bool         bindProcGroup = false)
  {
      // Only bind threads when MAX_WORKER_THREADS isn't set.
-    if (pContext->threadInfo.SINGLE_THREADED || (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false))
+    if (pContext->threadInfo.SINGLE_THREADED ||
+        (pContext->threadInfo.MAX_WORKER_THREADS && bindProcGroup == false))
      {
          return;
      }
@@ -321,7 +323,7 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId =
  #if defined(_WIN32)
  
      GROUP_AFFINITY affinity = {};
-    affinity.Group = procGroupId;
+    affinity.Group          = procGroupId;
  
  #if !defined(_WIN64)
      if (threadId >= 32)
@@ -340,7 +342,7 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId =
      {
          // If MAX_WORKER_THREADS is set, only bind to the proc group,
          // Not the individual HW thread.
-        if (!bindProcGroup  && !pContext->threadInfo.MAX_WORKER_THREADS)
+        if (!bindProcGroup && !pContext->threadInfo.MAX_WORKER_THREADS)
          {
              affinity.Mask = KAFFINITY(1) << threadId;
          }
@@ -372,15 +374,15 @@ void bindThread(SWR_CONTEXT* pContext, uint32_t threadId, uint32_t procGroupId =
  }
  
  INLINE
-uint32_t GetEnqueuedDraw(SWR_CONTEXT *pContext)
+uint32_t GetEnqueuedDraw(SWR_CONTEXT* pContext)
  {
      return pContext->dcRing.GetHead();
  }
  
  INLINE
-DRAW_CONTEXT *GetDC(SWR_CONTEXT *pContext, uint32_t drawId)
+DRAW_CONTEXT* GetDC(SWR_CONTEXT* pContext, uint32_t drawId)
  {
-    return &pContext->dcRing[(drawId-1) % pContext->MAX_DRAWS_IN_FLIGHT];
+    return &pContext->dcRing[(drawId - 1) % pContext->MAX_DRAWS_IN_FLIGHT];
  }
  
  INLINE
@@ -393,12 +395,12 @@ bool IDComparesLess(uint32_t a, uint32_t b)
  
  // returns true if dependency not met
  INLINE
-bool CheckDependency(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRetiredDraw)
+bool CheckDependency(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t lastRetiredDraw)
  {
      return pDC->dependent && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
  }
  
-bool CheckDependencyFE(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t lastRetiredDraw)
+bool CheckDependencyFE(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t lastRetiredDraw)
  {
      return pDC->dependentFE && IDComparesLess(lastRetiredDraw, pDC->drawId - 1);
  }
@@ -413,15 +415,15 @@ INLINE void UpdateClientStats(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CON
      }
  
      DRAW_DYNAMIC_STATE& dynState = pDC->dynState;
-    OSALIGNLINE(SWR_STATS) stats{ 0 };
+    OSALIGNLINE(SWR_STATS) stats{0};
  
      // Sum up stats across all workers before sending to client.
      for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i)
      {
          stats.DepthPassCount += dynState.pStats[i].DepthPassCount;
  
-        stats.PsInvocations  += dynState.pStats[i].PsInvocations;
-        stats.CsInvocations  += dynState.pStats[i].CsInvocations;
+        stats.PsInvocations += dynState.pStats[i].PsInvocations;
+        stats.CsInvocations += dynState.pStats[i].CsInvocations;
      }
  
  
@@ -435,8 +437,8 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONT
      if (pDC->retireCallback.pfnCallbackFunc)
      {
          pDC->retireCallback.pfnCallbackFunc(pDC->retireCallback.userData,
-            pDC->retireCallback.userData2,
-            pDC->retireCallback.userData3);
+                                            pDC->retireCallback.userData2,
+                                            pDC->retireCallback.userData3);
      }
  }
  
@@ -465,7 +467,7 @@ INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId,
  
          _ReadWriteBarrier();
  
-        pContext->dcRing.Dequeue();  // Remove from tail
+        pContext->dcRing.Dequeue(); // Remove from tail
      }
  
      return result;
@@ -477,20 +479,23 @@ int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC)
      return CompleteDrawContextInl(pContext, 0, pDC);
  }
  
-INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE, uint32_t& drawEnqueued)
+INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext,
+                                    uint32_t     workerId,
+                                    uint32_t&    curDrawBE,
+                                    uint32_t&    drawEnqueued)
  {
      // increment our current draw id to the first incomplete draw
      drawEnqueued = GetEnqueuedDraw(pContext);
      while (IDComparesLess(curDrawBE, drawEnqueued))
      {
-        DRAW_CONTEXT *pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT];
+        DRAW_CONTEXT* pDC = &pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT];
  
          // If its not compute and FE is not done then break out of loop.
-        if (!pDC->doneFE && !pDC->isCompute) break;
+        if (!pDC->doneFE && !pDC->isCompute)
+            break;
  
-        bool isWorkComplete = pDC->isCompute ?
-            pDC->pDispatch->isWorkComplete() :
-            pDC->pTileMgr->isWorkComplete();
+        bool isWorkComplete =
+            pDC->isCompute ? pDC->pDispatch->isWorkComplete() : pDC->pTileMgr->isWorkComplete();
  
          if (isWorkComplete)
          {
@@ -511,24 +516,24 @@ INLINE bool FindFirstIncompleteDraw(SWR_CONTEXT* pContext, uint32_t workerId, ui
  /// @brief If there is any BE work then go work on it.
  /// @param pContext - pointer to SWR context.
  /// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread
-///                    has its own curDrawBE counter and this ensures that each worker processes all the
-///                    draws in order.
+/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker
+/// thread
+///                    has its own curDrawBE counter and this ensures that each worker processes all
+///                    the draws in order.
  /// @param lockedTiles - This is the set of tiles locked by other threads. Each thread maintains its
-///                      own set and each time it fails to lock a macrotile, because its already locked,
-///                      then it will add that tile to the lockedTiles set. As a worker begins to work
-///                      on future draws the lockedTiles ensure that it doesn't work on tiles that may
-///                      still have work pending in a previous draw. Additionally, the lockedTiles is
-///                      hueristic that can steer a worker back to the same macrotile that it had been
-///                      working on in a previous draw.
+///                      own set and each time it fails to lock a macrotile, because its already
+///                      locked, then it will add that tile to the lockedTiles set. As a worker
+///                      begins to work on future draws the lockedTiles ensure that it doesn't work
+///                      on tiles that may still have work pending in a previous draw. Additionally,
+///                      the lockedTiles is hueristic that can steer a worker back to the same
+///                      macrotile that it had been working on in a previous draw.
  /// @returns        true if worker thread should shutdown
-bool WorkOnFifoBE(
-    SWR_CONTEXT *pContext,
-    uint32_t workerId,
-    uint32_t &curDrawBE,
-    TileSet& lockedTiles,
-    uint32_t numaNode,
-    uint32_t numaMask)
+bool WorkOnFifoBE(SWR_CONTEXT* pContext,
+                  uint32_t     workerId,
+                  uint32_t&    curDrawBE,
+                  TileSet&     lockedTiles,
+                  uint32_t     numaNode,
+                  uint32_t     numaMask)
  {
      bool bShutdown = false;
  
@@ -540,27 +545,30 @@ bool WorkOnFifoBE(
          return false;
      }
  
-    uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
+    uint32_t lastRetiredDraw =
+        pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
  
      // Reset our history for locked tiles. We'll have to re-learn which tiles are locked.
      lockedTiles.clear();
  
      // Try to work on each draw in order of the available draws in flight.
      //   1. If we're on curDrawBE, we can work on any macrotile that is available.
-    //   2. If we're trying to work on draws after curDrawBE, we are restricted to 
+    //   2. If we're trying to work on draws after curDrawBE, we are restricted to
      //      working on those macrotiles that are known to be complete in the prior draw to
      //      maintain order. The locked tiles provides the history to ensures this.
      for (uint32_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
      {
-        DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
+        DRAW_CONTEXT* pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
  
-        if (pDC->isCompute) return false; // We don't look at compute work.
+        if (pDC->isCompute)
+            return false; // We don't look at compute work.
  
          // First wait for FE to be finished with this draw. This keeps threading model simple
          // but if there are lots of bubbles between draws then serializing FE and BE may
          // need to be revisited.
-        if (!pDC->doneFE) return false;
-        
+        if (!pDC->doneFE)
+            return false;
+
          // If this draw is dependent on a previous draw then we need to bail.
          if (CheckDependency(pContext, pDC, lastRetiredDraw))
          {
@@ -568,7 +576,7 @@ bool WorkOnFifoBE(
          }
  
          // Grab the list of all dirty macrotiles. A tile is dirty if it has work queued to it.
-        auto &macroTiles = pDC->pTileMgr->getDirtyTiles();
+        auto& macroTiles = pDC->pTileMgr->getDirtyTiles();
  
          for (auto tile : macroTiles)
          {
@@ -595,7 +603,7 @@ bool WorkOnFifoBE(
  
              if (tile->tryLock())
              {
-                BE_WORK *pWork;
+                BE_WORK* pWork;
  
                  RDTSC_BEGIN(WorkerFoundWork, pDC->drawId);
  
@@ -624,11 +632,13 @@ bool WorkOnFifoBE(
  
                  pDC->pTileMgr->markTileComplete(tileID);
  
-                // Optimization: If the draw is complete and we're the last one to have worked on it then
-                // we can reset the locked list as we know that all previous draws before the next are guaranteed to be complete.
+                // Optimization: If the draw is complete and we're the last one to have worked on it
+                // then we can reset the locked list as we know that all previous draws before the
+                // next are guaranteed to be complete.
                  if ((curDrawBE == i) && (bShutdown || pDC->pTileMgr->isWorkComplete()))
                  {
-                    // We can increment the current BE and safely move to next draw since we know this draw is complete.
+                    // We can increment the current BE and safely move to next draw since we know
+                    // this draw is complete.
                      curDrawBE++;
                      CompleteDrawContextInl(pContext, workerId, pDC);
  
@@ -645,7 +655,8 @@ bool WorkOnFifoBE(
              }
              else
              {
-                // This tile is already locked. So let's add it to our locked tiles set. This way we don't try locking this one again.
+                // This tile is already locked. So let's add it to our locked tiles set. This way we
+                // don't try locking this one again.
                  lockedTiles.set(tileID);
              }
          }
@@ -663,12 +674,24 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX
          SWR_STATS_FE& stats = pDC->dynState.statsFE;
  
          AR_EVENT(FrontendStatsEvent(pDC->drawId,
-            stats.IaVertices, stats.IaPrimitives, stats.VsInvocations, stats.HsInvocations,
-            stats.DsInvocations, stats.GsInvocations, stats.GsPrimitives, stats.CInvocations, stats.CPrimitives,
-            stats.SoPrimStorageNeeded[0], stats.SoPrimStorageNeeded[1], stats.SoPrimStorageNeeded[2], stats.SoPrimStorageNeeded[3],
-            stats.SoNumPrimsWritten[0], stats.SoNumPrimsWritten[1], stats.SoNumPrimsWritten[2], stats.SoNumPrimsWritten[3]
-        ));
-               AR_EVENT(FrontendDrawEndEvent(pDC->drawId));
+                                    stats.IaVertices,
+                                    stats.IaPrimitives,
+                                    stats.VsInvocations,
+                                    stats.HsInvocations,
+                                    stats.DsInvocations,
+                                    stats.GsInvocations,
+                                    stats.GsPrimitives,
+                                    stats.CInvocations,
+                                    stats.CPrimitives,
+                                    stats.SoPrimStorageNeeded[0],
+                                    stats.SoPrimStorageNeeded[1],
+                                    stats.SoPrimStorageNeeded[2],
+                                    stats.SoPrimStorageNeeded[3],
+                                    stats.SoNumPrimsWritten[0],
+                                    stats.SoNumPrimsWritten[1],
+                                    stats.SoNumPrimsWritten[2],
+                                    stats.SoNumPrimsWritten[3]));
+        AR_EVENT(FrontendDrawEndEvent(pDC->drawId));
  
          pContext->pfnUpdateStatsFE(GetPrivateState(pDC), &stats);
      }
@@ -680,7 +703,8 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX
              if ((pDC->dynState.SoWriteOffsetDirty[i]) &&
                  (pDC->pState->state.soBuffer[i].soWriteEnable))
              {
-                pContext->pfnUpdateSoWriteOffset(GetPrivateState(pDC), i, pDC->dynState.SoWriteOffset[i]);
+                pContext->pfnUpdateSoWriteOffset(
+                    GetPrivateState(pDC), i, pDC->dynState.SoWriteOffset[i]);
              }
          }
      }
@@ -692,14 +716,14 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX
      InterlockedDecrement(&pContext->drawsOutstandingFE);
  }
  
-void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
+void WorkOnFifoFE(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawFE)
  {
      // Try to grab the next DC from the ring
      uint32_t drawEnqueued = GetEnqueuedDraw(pContext);
      while (IDComparesLess(curDrawFE, drawEnqueued))
      {
-        uint32_t dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT;
-        DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
+        uint32_t      dcSlot = curDrawFE % pContext->MAX_DRAWS_IN_FLIGHT;
+        DRAW_CONTEXT* pDC    = &pContext->dcRing[dcSlot];
          if (pDC->isCompute || pDC->doneFE)
          {
              CompleteDrawContextInl(pContext, workerId, pDC);
@@ -712,11 +736,11 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
      }
  
      uint32_t lastRetiredFE = curDrawFE - 1;
-    uint32_t curDraw = curDrawFE;
+    uint32_t curDraw       = curDrawFE;
      while (IDComparesLess(curDraw, drawEnqueued))
      {
-        uint32_t dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
-        DRAW_CONTEXT *pDC = &pContext->dcRing[dcSlot];
+        uint32_t      dcSlot = curDraw % pContext->MAX_DRAWS_IN_FLIGHT;
+        DRAW_CONTEXT* pDC    = &pContext->dcRing[dcSlot];
  
          if (!pDC->isCompute && !pDC->FeLock)
          {
@@ -742,13 +766,11 @@ void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
  /// @brief If there is any compute work then go work on it.
  /// @param pContext - pointer to SWR context.
  /// @param workerId - The unique worker ID that is assigned to this thread.
-/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker thread
-///                    has its own curDrawBE counter and this ensures that each worker processes all the
-///                    draws in order.
-void WorkOnCompute(
-    SWR_CONTEXT *pContext,
-    uint32_t workerId,
-    uint32_t& curDrawBE)
+/// @param curDrawBE - This tracks the draw contexts that this thread has processed. Each worker
+/// thread
+///                    has its own curDrawBE counter and this ensures that each worker processes all
+///                    the draws in order.
+void WorkOnCompute(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE)
  {
      uint32_t drawEnqueued = 0;
      if (FindFirstIncompleteDraw(pContext, workerId, curDrawBE, drawEnqueued) == false)
@@ -756,12 +778,14 @@ void WorkOnCompute(
          return;
      }
  
-    uint32_t lastRetiredDraw = pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
+    uint32_t lastRetiredDraw =
+        pContext->dcRing[curDrawBE % pContext->MAX_DRAWS_IN_FLIGHT].drawId - 1;
  
      for (uint64_t i = curDrawBE; IDComparesLess(i, drawEnqueued); ++i)
      {
-        DRAW_CONTEXT *pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
-        if (pDC->isCompute == false) return;
+        DRAW_CONTEXT* pDC = &pContext->dcRing[i % pContext->MAX_DRAWS_IN_FLIGHT];
+        if (pDC->isCompute == false)
+            return;
  
          // check dependencies
          if (CheckDependency(pContext, pDC, lastRetiredDraw))
@@ -775,9 +799,9 @@ void WorkOnCompute(
          // Is there any work remaining?
          if (queue.getNumQueued() > 0)
          {
-            void* pSpillFillBuffer = nullptr;
-            void* pScratchSpace = nullptr;
-            uint32_t threadGroupId = 0;
+            void*    pSpillFillBuffer = nullptr;
+            void*    pScratchSpace    = nullptr;
+            uint32_t threadGroupId    = 0;
              while (queue.getWork(threadGroupId))
              {
                  queue.dispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
@@ -790,7 +814,7 @@ void WorkOnCompute(
      }
  }
  
-void BindApiThread(SWR_CONTEXT *pContext, uint32_t apiThreadId)
+void BindApiThread(SWR_CONTEXT* pContext, uint32_t apiThreadId)
  {
      if (nullptr == pContext)
      {
@@ -801,25 +825,26 @@ void BindApiThread(SWR_CONTEXT *pContext, uint32_t apiThreadId)
      {
          if (pContext->threadPool.numReservedThreads)
          {
-            const THREAD_DATA &threadData = pContext->threadPool.pApiThreadData[0];
+            const THREAD_DATA& threadData = pContext->threadPool.pApiThreadData[0];
              // Just bind to the process group used for API thread 0
              bindThread(pContext, 0, threadData.procGroupId, true);
          }
          return;
      }
  
-    const THREAD_DATA &threadData = pContext->threadPool.pApiThreadData[apiThreadId];
+    const THREAD_DATA& threadData = pContext->threadPool.pApiThreadData[apiThreadId];
  
-    bindThread(pContext, threadData.threadId, threadData.procGroupId, threadData.forceBindProcGroup);
+    bindThread(
+        pContext, threadData.threadId, threadData.procGroupId, threadData.forceBindProcGroup);
  }
  
-template<bool IsFEThread, bool IsBEThread>
+template <bool IsFEThread, bool IsBEThread>
  DWORD workerThreadMain(LPVOID pData)
  {
-    THREAD_DATA *pThreadData = (THREAD_DATA*)pData;
-    SWR_CONTEXT *pContext = pThreadData->pContext;
-    uint32_t threadId = pThreadData->threadId;
-    uint32_t workerId = pThreadData->workerId;
+    THREAD_DATA* pThreadData = (THREAD_DATA*)pData;
+    SWR_CONTEXT* pContext    = pThreadData->pContext;
+    uint32_t     threadId    = pThreadData->threadId;
+    uint32_t     workerId    = pThreadData->workerId;
  
      bindThread(pContext, threadId, pThreadData->procGroupId, pThreadData->forceBindProcGroup);
  
@@ -832,7 +857,10 @@ DWORD workerThreadMain(LPVOID pData)
                    // linux pthread name limited to 16 chars (including \0)
                    "w%03d-n%d-c%03d-t%d",
  #endif
-            workerId, pThreadData->numaId, pThreadData->coreId, pThreadData->htId);
+                  workerId,
+                  pThreadData->numaId,
+                  pThreadData->coreId,
+                  pThreadData->htId);
          SetCurrentThreadName(threadName);
      }
  
@@ -851,7 +879,7 @@ DWORD workerThreadMain(LPVOID pData)
  
      // each worker has the ability to work on any of the queued draws as long as certain
      // conditions are met. the data associated
-    // with a draw is guaranteed to be active as long as a worker hasn't signaled that he 
+    // with a draw is guaranteed to be active as long as a worker hasn't signaled that he
      // has moved on to the next draw when he determines there is no more work to do. The api
      // thread will not increment the head of the dc ring until all workers have moved past the
      // current head.
@@ -906,7 +934,8 @@ DWORD workerThreadMain(LPVOID pData)
          if (IsBEThread)
          {
              RDTSC_BEGIN(WorkerWorkOnFifoBE, 0);
-            bShutdown |= WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
+            bShutdown |=
+                WorkOnFifoBE(pContext, workerId, curDrawBE, lockedTiles, numaNode, numaMask);
              RDTSC_END(WorkerWorkOnFifoBE, 0);
  
              WorkOnCompute(pContext, workerId, curDrawBE);
@@ -925,7 +954,8 @@ DWORD workerThreadMain(LPVOID pData)
  
      return 0;
  }
-template<> DWORD workerThreadMain<false, false>(LPVOID) = delete;
+template <>
+DWORD workerThreadMain<false, false>(LPVOID) = delete;
  
  template <bool IsFEThread, bool IsBEThread>
  DWORD workerThreadInit(LPVOID pData)
@@ -938,7 +968,7 @@ DWORD workerThreadInit(LPVOID pData)
      }
  
  #if defined(_WIN32)
-    __except(EXCEPTION_CONTINUE_SEARCH)
+    __except (EXCEPTION_CONTINUE_SEARCH)
      {
      }
  
@@ -946,14 +976,16 @@ DWORD workerThreadInit(LPVOID pData)
  
      return 1;
  }
-template<> DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
+template <>
+DWORD workerThreadInit<false, false>(LPVOID pData) = delete;
  
  static void InitPerThreadStats(SWR_CONTEXT* pContext, uint32_t numThreads)
  {
      // Initialize DRAW_CONTEXT's per-thread stats
      for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc)
      {
-        pContext->dcRing[dc].dynState.pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * numThreads, 64);
+        pContext->dcRing[dc].dynState.pStats =
+            (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * numThreads, 64);
          memset(pContext->dcRing[dc].dynState.pStats, 0, sizeof(SWR_STATS) * numThreads);
      }
  }
@@ -965,15 +997,15 @@ static void InitPerThreadStats(SWR_CONTEXT* pContext, uint32_t numThreads)
  void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
  {
      CPUNumaNodes nodes;
-    uint32_t numThreadsPerProcGroup = 0;
+    uint32_t     numThreadsPerProcGroup = 0;
      CalculateProcessorTopology(nodes, numThreadsPerProcGroup);
  
      // Assumption, for asymmetric topologies, multi-threaded cores will appear
      // in the list before single-threaded cores.  This appears to be true for
      // Windows when the total HW threads is limited to 64.
-    uint32_t numHWNodes         = (uint32_t)nodes.size();
-    uint32_t numHWCoresPerNode  = (uint32_t)nodes[0].cores.size();
-    uint32_t numHWHyperThreads  = (uint32_t)nodes[0].cores[0].threadIds.size();
+    uint32_t numHWNodes        = (uint32_t)nodes.size();
+    uint32_t numHWCoresPerNode = (uint32_t)nodes[0].cores.size();
+    uint32_t numHWHyperThreads = (uint32_t)nodes[0].cores[0].threadIds.size();
  
  #if defined(_WIN32) && !defined(_WIN64)
      if (!pContext->threadInfo.MAX_WORKER_THREADS)
@@ -997,9 +1029,9 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
          }
      }
  
-    uint32_t numNodes           = numHWNodes;
-    uint32_t numCoresPerNode    = numHWCoresPerNode;
-    uint32_t numHyperThreads    = numHWHyperThreads;
+    uint32_t numNodes        = numHWNodes;
+    uint32_t numCoresPerNode = numHWCoresPerNode;
+    uint32_t numHyperThreads = numHWHyperThreads;
  
      // Calc used threads per-core
      if (numHyperThreads > pContext->threadInfo.BASE_THREAD)
@@ -1008,11 +1040,10 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
      }
      else
      {
-        SWR_ASSERT(
-            false,
-            "Cannot use BASE_THREAD value: %d, maxThreads: %d, reverting BASE_THREAD to 0",
-            pContext->threadInfo.BASE_THREAD,
-            numHyperThreads);
+        SWR_ASSERT(false,
+                   "Cannot use BASE_THREAD value: %d, maxThreads: %d, reverting BASE_THREAD to 0",
+                   pContext->threadInfo.BASE_THREAD,
+                   numHyperThreads);
          pContext->threadInfo.BASE_THREAD = 0;
      }
  
@@ -1042,11 +1073,10 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
      }
      else
      {
-        SWR_ASSERT(
-            false,
-            "Cannot use BASE_CORE value: %d, maxCores: %d, reverting BASE_CORE to 0",
-            pContext->threadInfo.BASE_CORE,
-            numCoresPerNode);
+        SWR_ASSERT(false,
+                   "Cannot use BASE_CORE value: %d, maxCores: %d, reverting BASE_CORE to 0",
+                   pContext->threadInfo.BASE_CORE,
+                   numCoresPerNode);
          pContext->threadInfo.BASE_CORE = 0;
      }
  
@@ -1080,25 +1110,25 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
      SWR_REL_ASSERT(numThreads <= numHWThreads);
  
      uint32_t& numAPIReservedThreads = pContext->apiThreadInfo.numAPIReservedThreads;
-    uint32_t& numAPIThreadsPerCore = pContext->apiThreadInfo.numAPIThreadsPerCore;
-    uint32_t numRemovedThreads = 0;
+    uint32_t& numAPIThreadsPerCore  = pContext->apiThreadInfo.numAPIThreadsPerCore;
+    uint32_t  numRemovedThreads     = 0;
  
      if (pContext->threadInfo.SINGLE_THREADED)
      {
-        numAPIReservedThreads = 0;
-        numThreads = 1;
+        numAPIReservedThreads      = 0;
+        numThreads                 = 1;
          pContext->NumWorkerThreads = 1;
-        pContext->NumFEThreads = 1;
-        pContext->NumBEThreads = 1;
-        pPool->numThreads = 0;
+        pContext->NumFEThreads     = 1;
+        pContext->NumBEThreads     = 1;
+        pPool->numThreads          = 0;
      }
      else if (pContext->threadInfo.MAX_WORKER_THREADS)
      {
          numThreads = std::min(pContext->threadInfo.MAX_WORKER_THREADS, numHWThreads);
          pContext->threadInfo.BASE_NUMA_NODE = 0;
-        pContext->threadInfo.BASE_CORE = 0;
-        pContext->threadInfo.BASE_THREAD = 0;
-        numAPIReservedThreads = 0;
+        pContext->threadInfo.BASE_CORE      = 0;
+        pContext->threadInfo.BASE_THREAD    = 0;
+        numAPIReservedThreads               = 0;
      }
      else
      {
@@ -1119,7 +1149,8 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
              if (numAPIThreadsPerCore == 2 && numHyperThreads == 1)
              {
                  // Adjust removed threads to make logic below work
-                numRemovedThreads = std::max(1U, (numRemovedThreads + numAPIThreadsPerCore - 1) / 2);
+                numRemovedThreads =
+                    std::max(1U, (numRemovedThreads + numAPIThreadsPerCore - 1) / 2);
              }
  
              numThreads -= numRemovedThreads;
@@ -1131,7 +1162,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
      if (pContext->threadInfo.SINGLE_THREADED)
      {
          numAPIReservedThreads = 0;
-        numThreads = 1;
+        numThreads            = 1;
      }
  
      if (numAPIReservedThreads)
@@ -1149,7 +1180,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
      }
      pPool->numReservedThreads = numAPIReservedThreads;
  
-    pPool->numThreads = numThreads;
+    pPool->numThreads          = numThreads;
      pContext->NumWorkerThreads = pPool->numThreads;
  
      pPool->pThreadData = new (std::nothrow) THREAD_DATA[pPool->numThreads];
@@ -1161,7 +1192,8 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
      pPool->pWorkerPrivateDataArray = nullptr;
      if (pContext->workerPrivateState.perWorkerPrivateStateSize)
      {
-        size_t perWorkerSize = AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
+        size_t perWorkerSize =
+            AlignUpPow2(pContext->workerPrivateState.perWorkerPrivateStateSize, 64);
          size_t totalSize = perWorkerSize * pPool->numThreads;
          if (totalSize)
          {
@@ -1191,19 +1223,19 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
  
      if (pContext->threadInfo.MAX_WORKER_THREADS)
      {
-        bool bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
+        bool     bForceBindProcGroup = (numThreads > numThreadsPerProcGroup);
          uint32_t numProcGroups = (numThreads + numThreadsPerProcGroup - 1) / numThreadsPerProcGroup;
          // When MAX_WORKER_THREADS is set we don't bother to bind to specific HW threads
          // But Windows will still require binding to specific process groups
          for (uint32_t workerId = 0; workerId < numThreads; ++workerId)
          {
-            pPool->pThreadData[workerId].workerId = workerId;
-            pPool->pThreadData[workerId].procGroupId = workerId % numProcGroups;
-            pPool->pThreadData[workerId].threadId = 0;
-            pPool->pThreadData[workerId].numaId = 0;
-            pPool->pThreadData[workerId].coreId = 0;
-            pPool->pThreadData[workerId].htId = 0;
-            pPool->pThreadData[workerId].pContext = pContext;
+            pPool->pThreadData[workerId].workerId           = workerId;
+            pPool->pThreadData[workerId].procGroupId        = workerId % numProcGroups;
+            pPool->pThreadData[workerId].threadId           = 0;
+            pPool->pThreadData[workerId].numaId             = 0;
+            pPool->pThreadData[workerId].coreId             = 0;
+            pPool->pThreadData[workerId].htId               = 0;
+            pPool->pThreadData[workerId].pContext           = pContext;
              pPool->pThreadData[workerId].forceBindProcGroup = bForceBindProcGroup;
  
              pContext->NumBEThreads++;
@@ -1228,7 +1260,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
              pPool->numaMask = 0;
          }
  
-        uint32_t workerId = 0;
+        uint32_t workerId           = 0;
          uint32_t numReservedThreads = numAPIReservedThreads;
          for (uint32_t n = 0; n < numNodes; ++n)
          {
@@ -1236,7 +1268,7 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
              {
                  break;
              }
-            auto& node = nodes[n + pContext->threadInfo.BASE_NUMA_NODE];
+            auto&    node     = nodes[n + pContext->threadInfo.BASE_NUMA_NODE];
              uint32_t numCores = numCoresPerNode;
              for (uint32_t c = 0; c < numCores; ++c)
              {
@@ -1258,26 +1290,32 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
                          --numRemovedThreads;
                          SWR_REL_ASSERT(numReservedThreads);
                          --numReservedThreads;
-                        pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
+                        pPool->pApiThreadData[numReservedThreads].workerId    = 0xFFFFFFFFU;
                          pPool->pApiThreadData[numReservedThreads].procGroupId = core.procGroup;
-                        pPool->pApiThreadData[numReservedThreads].threadId = core.threadIds[t];
-                        pPool->pApiThreadData[numReservedThreads].numaId = useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
-                        pPool->pApiThreadData[numReservedThreads].coreId = c + pContext->threadInfo.BASE_CORE;
-                        pPool->pApiThreadData[numReservedThreads].htId = t + pContext->threadInfo.BASE_THREAD;
-                        pPool->pApiThreadData[numReservedThreads].pContext = pContext;
+                        pPool->pApiThreadData[numReservedThreads].threadId    = core.threadIds[t];
+                        pPool->pApiThreadData[numReservedThreads].numaId =
+                            useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
+                        pPool->pApiThreadData[numReservedThreads].coreId =
+                            c + pContext->threadInfo.BASE_CORE;
+                        pPool->pApiThreadData[numReservedThreads].htId =
+                            t + pContext->threadInfo.BASE_THREAD;
+                        pPool->pApiThreadData[numReservedThreads].pContext           = pContext;
                          pPool->pApiThreadData[numReservedThreads].forceBindProcGroup = false;
  
-
                          if (numAPIThreadsPerCore > numHyperThreads && numReservedThreads)
                          {
                              --numReservedThreads;
-                            pPool->pApiThreadData[numReservedThreads].workerId = 0xFFFFFFFFU;
+                            pPool->pApiThreadData[numReservedThreads].workerId    = 0xFFFFFFFFU;
                              pPool->pApiThreadData[numReservedThreads].procGroupId = core.procGroup;
-                            pPool->pApiThreadData[numReservedThreads].threadId = core.threadIds[t + 1];
-                            pPool->pApiThreadData[numReservedThreads].numaId = useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
-                            pPool->pApiThreadData[numReservedThreads].coreId = c + pContext->threadInfo.BASE_CORE;
-                            pPool->pApiThreadData[numReservedThreads].htId = t + pContext->threadInfo.BASE_THREAD;
-                            pPool->pApiThreadData[numReservedThreads].pContext = pContext;
+                            pPool->pApiThreadData[numReservedThreads].threadId =
+                                core.threadIds[t + 1];
+                            pPool->pApiThreadData[numReservedThreads].numaId =
+                                useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
+                            pPool->pApiThreadData[numReservedThreads].coreId =
+                                c + pContext->threadInfo.BASE_CORE;
+                            pPool->pApiThreadData[numReservedThreads].htId =
+                                t + pContext->threadInfo.BASE_THREAD;
+                            pPool->pApiThreadData[numReservedThreads].pContext           = pContext;
                              pPool->pApiThreadData[numReservedThreads].forceBindProcGroup = false;
                          }
  
@@ -1286,12 +1324,14 @@ void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
  
                      SWR_ASSERT(workerId < numThreads);
  
-                    pPool->pThreadData[workerId].workerId = workerId;
+                    pPool->pThreadData[workerId].workerId    = workerId;
                      pPool->pThreadData[workerId].procGroupId = core.procGroup;
-                    pPool->pThreadData[workerId].threadId = core.threadIds[t + pContext->threadInfo.BASE_THREAD];
-                    pPool->pThreadData[workerId].numaId = useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
-                    pPool->pThreadData[workerId].coreId = c + pContext->threadInfo.BASE_CORE;
-                    pPool->pThreadData[workerId].htId = t + pContext->threadInfo.BASE_THREAD;
+                    pPool->pThreadData[workerId].threadId =
+                        core.threadIds[t + pContext->threadInfo.BASE_THREAD];
+                    pPool->pThreadData[workerId].numaId =
+                        useNuma ? (n + pContext->threadInfo.BASE_NUMA_NODE) : 0;
+                    pPool->pThreadData[workerId].coreId   = c + pContext->threadInfo.BASE_CORE;
+                    pPool->pThreadData[workerId].htId     = t + pContext->threadInfo.BASE_THREAD;
                      pPool->pThreadData[workerId].pContext = pContext;
                      pPool->pThreadData[workerId].forceBindProcGroup = false;
  
@@ -1319,7 +1359,8 @@ void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
  
      for (uint32_t workerId = 0; workerId < pContext->NumWorkerThreads; ++workerId)
      {
-        pPool->pThreads[workerId] = new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
+        pPool->pThreads[workerId] =
+            new std::thread(workerThreadInit<true, true>, &pPool->pThreadData[workerId]);
      }
  }
  
@@ -1327,7 +1368,7 @@ void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
  /// @brief Destroys thread pool.
  /// @param pContext - pointer to context
  /// @param pPool - pointer to thread pool object.
-void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
+void DestroyThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool)
  {
      // Wait for all threads to finish
      SwrWaitForIdle(pContext);
@@ -1340,12 +1381,13 @@ void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool)
              // Detach from thread.  Cannot join() due to possibility (in Windows) of code
              // in some DLLMain(THREAD_DETATCH case) blocking the thread until after this returns.
              pPool->pThreads[t]->detach();
-            delete(pPool->pThreads[t]);
+            delete (pPool->pThreads[t]);
          }
  
          if (pContext->workerPrivateState.pfnFinishWorkerData)
          {
-            pContext->workerPrivateState.pfnFinishWorkerData(pPool->pThreadData[t].pWorkerPrivateData, t);
+            pContext->workerPrivateState.pfnFinishWorkerData(
+                pPool->pThreadData[t].pWorkerPrivateData, t);
          }
      }
  
diff --git a/src/gallium/drivers/swr/rasterizer/core/threads.h b/src/gallium/drivers/swr/rasterizer/core/threads.h

index 0489a3cc6cf4b83d59089a7f1d0763ce068f6b6c..d0f4b30dca0129b71d29481bf6f71e28441ff4b8 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/threads.h
+++ b/src/gallium/drivers/swr/rasterizer/core/threads.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file threads.h
-*
-* @brief Definitions for SWR threading model.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file threads.h
+ *
+ * @brief Definitions for SWR threading model.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "knobs.h"
@@ -39,39 +39,43 @@ struct SWR_WORKER_PRIVATE_STATE;
  
  struct THREAD_DATA
  {
-    void* pWorkerPrivateData;// Pointer to per-worker private data
-    uint32_t procGroupId;   // Will always be 0 for non-Windows OS
-    uint32_t threadId;      // within the procGroup for Windows
-    uint32_t numaId;        // NUMA node id
-    uint32_t coreId;        // Core id
-    uint32_t htId;          // Hyperthread id
-    uint32_t workerId;
-    SWR_CONTEXT *pContext;
-    bool forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
+    void*        pWorkerPrivateData; // Pointer to per-worker private data
+    uint32_t     procGroupId;        // Will always be 0 for non-Windows OS
+    uint32_t     threadId;           // within the procGroup for Windows
+    uint32_t     numaId;             // NUMA node id
+    uint32_t     coreId;             // Core id
+    uint32_t     htId;               // Hyperthread id
+    uint32_t     workerId;
+    SWR_CONTEXT* pContext;
+    bool         forceBindProcGroup; // Only useful when MAX_WORKER_THREADS is set.
  };
  
-
  struct THREAD_POOL
  {
-    THREAD_PTR* pThreads;
-    uint32_t numThreads;
-    uint32_t numaMask;
-    THREAD_DATA *pThreadData;
-    void* pWorkerPrivateDataArray; // All memory for worker private data
-    uint32_t numReservedThreads; // Number of threads reserved for API use
-    THREAD_DATA *pApiThreadData;
+    THREAD_PTR*  pThreads;
+    uint32_t     numThreads;
+    uint32_t     numaMask;
+    THREAD_DATA* pThreadData;
+    void*        pWorkerPrivateDataArray; // All memory for worker private data
+    uint32_t     numReservedThreads;      // Number of threads reserved for API use
+    THREAD_DATA* pApiThreadData;
  };
  
  struct TileSet;
  
-void CreateThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
+void CreateThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
  void StartThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
-void DestroyThreadPool(SWR_CONTEXT *pContext, THREAD_POOL *pPool);
+void DestroyThreadPool(SWR_CONTEXT* pContext, THREAD_POOL* pPool);
  
  // Expose FE and BE worker functions to the API thread if single threaded
-void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE);
-bool WorkOnFifoBE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE, TileSet &usedTiles, uint32_t numaNode, uint32_t numaMask);
-void WorkOnCompute(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawBE);
+void    WorkOnFifoFE(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawFE);
+bool    WorkOnFifoBE(SWR_CONTEXT* pContext,
+                     uint32_t     workerId,
+                     uint32_t&    curDrawBE,
+                     TileSet&     usedTiles,
+                     uint32_t     numaNode,
+                     uint32_t     numaMask);
+void    WorkOnCompute(SWR_CONTEXT* pContext, uint32_t workerId, uint32_t& curDrawBE);
  int32_t CompleteDrawContext(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC);
  
-void BindApiThread(SWR_CONTEXT *pContext, uint32_t apiThreadId);
+void BindApiThread(SWR_CONTEXT* pContext, uint32_t apiThreadId);
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp

index 1bdef4bd7dd1b709b07aae9b94a06f39e4049748..87d5373a215d98c7cb155558eee008334be2227e 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tilemgr.cpp
-*
-* @brief Implementation for Macro Tile Manager which provides the facilities
-*        for threads to work on an macro tile.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tilemgr.cpp
+ *
+ * @brief Implementation for Macro Tile Manager which provides the facilities
+ *        for threads to work on an macro tile.
+ *
+ ******************************************************************************/
  #include <unordered_map>
  
  #include "fifo.hpp"
@@ -33,17 +33,15 @@
  #include "core/multisample.h"
  #include "rdtsc_core.h"
  
-MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena)
-{
-}
+MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena) {}
  
-void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
+void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK* pWork)
  {
      // Should not enqueue more then what we have backing for in the hot tile manager.
      SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
      SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
  
-    if ((x & ~(KNOB_NUM_HOT_TILES_X-1)) | (y & ~(KNOB_NUM_HOT_TILES_Y-1)))
+    if ((x & ~(KNOB_NUM_HOT_TILES_X - 1)) | (y & ~(KNOB_NUM_HOT_TILES_Y - 1)))
      {
          return;
      }
@@ -55,7 +53,7 @@ void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
          mTiles.resize((16 + id) * 2);
      }
  
-    MacroTileQueue *pTile = mTiles[id];
+    MacroTileQueue* pTile = mTiles[id];
      if (!pTile)
      {
          pTile = mTiles[id] = new MacroTileQueue();
@@ -76,8 +74,8 @@ void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK *pWork)
  void MacroTileMgr::markTileComplete(uint32_t id)
  {
      SWR_ASSERT(mTiles.size() > id);
-    MacroTileQueue &tile = *mTiles[id];
-    uint32_t numTiles = tile.mWorkItemsFE;
+    MacroTileQueue& tile     = *mTiles[id];
+    uint32_t        numTiles = tile.mWorkItemsFE;
      InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
  
      _ReadWriteBarrier();
@@ -90,8 +88,14 @@ void MacroTileMgr::markTileComplete(uint32_t id)
      tile.mWorkItemsBE = 0;
  }
  
-HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerPrivateData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples,
-    uint32_t renderTargetArrayIndex)
+HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT*                pContext,
+                                DRAW_CONTEXT*               pDC,
+                                HANDLE                      hWorkerPrivateData,
+                                uint32_t                    macroID,
+                                SWR_RENDERTARGET_ATTACHMENT attachment,
+                                bool                        create,
+                                uint32_t                    numSamples,
+                                uint32_t                    renderTargetArrayIndex)
  {
      uint32_t x, y;
      MacroTileMgr::getTileIndices(macroID, x, y);
@@ -99,17 +103,18 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE
      SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
      SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
  
-    HotTileSet &tile = mHotTiles[x][y];
-    HOTTILE& hotTile = tile.Attachment[attachment];
+    HotTileSet& tile    = mHotTiles[x][y];
+    HOTTILE&    hotTile = tile.Attachment[attachment];
      if (hotTile.pBuffer == NULL)
      {
          if (create)
          {
-            uint32_t size = numSamples * mHotTileSize[attachment];
+            uint32_t size     = numSamples * mHotTileSize[attachment];
              uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
-            hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
-            hotTile.state = HOTTILE_INVALID;
-            hotTile.numSamples = numSamples;
+            hotTile.pBuffer =
+                (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
+            hotTile.state                  = HOTTILE_INVALID;
+            hotTile.numSamples             = numSamples;
              hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
          }
          else
@@ -122,22 +127,22 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE
          // free the old tile and create a new one with enough space to hold all samples
          if (numSamples > hotTile.numSamples)
          {
-            // tile should be either uninitialized or resolved if we're deleting and switching to a 
+            // tile should be either uninitialized or resolved if we're deleting and switching to a
              // new sample count
-            SWR_ASSERT((hotTile.state == HOTTILE_INVALID) ||
-                (hotTile.state == HOTTILE_RESOLVED) ||
-                (hotTile.state == HOTTILE_CLEAR));
+            SWR_ASSERT((hotTile.state == HOTTILE_INVALID) || (hotTile.state == HOTTILE_RESOLVED) ||
+                       (hotTile.state == HOTTILE_CLEAR));
              FreeHotTileMem(hotTile.pBuffer);
  
-            uint32_t size = numSamples * mHotTileSize[attachment];
+            uint32_t size     = numSamples * mHotTileSize[attachment];
              uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
-            hotTile.pBuffer = (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
-            hotTile.state = HOTTILE_INVALID;
+            hotTile.pBuffer =
+                (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
+            hotTile.state      = HOTTILE_INVALID;
              hotTile.numSamples = numSamples;
          }
  
-        // if requested render target array index isn't currently loaded, need to store out the current hottile 
-        // and load the requested array slice
+        // if requested render target array index isn't currently loaded, need to store out the
+        // current hottile and load the requested array slice
          if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
          {
              SWR_FORMAT format;
@@ -150,10 +155,19 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE
              case SWR_ATTACHMENT_COLOR4:
              case SWR_ATTACHMENT_COLOR5:
              case SWR_ATTACHMENT_COLOR6:
-            case SWR_ATTACHMENT_COLOR7: format = KNOB_COLOR_HOT_TILE_FORMAT; break;
-            case SWR_ATTACHMENT_DEPTH: format = KNOB_DEPTH_HOT_TILE_FORMAT; break;
-            case SWR_ATTACHMENT_STENCIL: format = KNOB_STENCIL_HOT_TILE_FORMAT; break;
-            default: SWR_INVALID("Unknown attachment: %d", attachment); format = KNOB_COLOR_HOT_TILE_FORMAT; break;
+            case SWR_ATTACHMENT_COLOR7:
+                format = KNOB_COLOR_HOT_TILE_FORMAT;
+                break;
+            case SWR_ATTACHMENT_DEPTH:
+                format = KNOB_DEPTH_HOT_TILE_FORMAT;
+                break;
+            case SWR_ATTACHMENT_STENCIL:
+                format = KNOB_STENCIL_HOT_TILE_FORMAT;
+                break;
+            default:
+                SWR_INVALID("Unknown attachment: %d", attachment);
+                format = KNOB_COLOR_HOT_TILE_FORMAT;
+                break;
              }
  
              if (hotTile.state == HOTTILE_CLEAR)
@@ -170,23 +184,38 @@ HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE
  
              if (hotTile.state == HOTTILE_DIRTY)
              {
-                pContext->pfnStoreTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
-                    x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, hotTile.renderTargetArrayIndex, hotTile.pBuffer);
+                pContext->pfnStoreTile(GetPrivateState(pDC),
+                                       hWorkerPrivateData,
+                                       format,
+                                       attachment,
+                                       x * KNOB_MACROTILE_X_DIM,
+                                       y * KNOB_MACROTILE_Y_DIM,
+                                       hotTile.renderTargetArrayIndex,
+                                       hotTile.pBuffer);
              }
  
-            pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, format, attachment,
-                x * KNOB_MACROTILE_X_DIM, y * KNOB_MACROTILE_Y_DIM, renderTargetArrayIndex, hotTile.pBuffer);
+            pContext->pfnLoadTile(GetPrivateState(pDC),
+                                  hWorkerPrivateData,
+                                  format,
+                                  attachment,
+                                  x * KNOB_MACROTILE_X_DIM,
+                                  y * KNOB_MACROTILE_Y_DIM,
+                                  renderTargetArrayIndex,
+                                  hotTile.pBuffer);
  
              hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
-            hotTile.state = HOTTILE_DIRTY;
+            hotTile.state                  = HOTTILE_DIRTY;
          }
      }
      return &tile.Attachment[attachment];
  }
  
-HOTTILE* HotTileMgr::GetHotTileNoLoad(
-    SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID,
-    SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples)
+HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT*                pContext,
+                                      DRAW_CONTEXT*               pDC,
+                                      uint32_t                    macroID,
+                                      SWR_RENDERTARGET_ATTACHMENT attachment,
+                                      bool                        create,
+                                      uint32_t                    numSamples)
  {
      uint32_t x, y;
      MacroTileMgr::getTileIndices(macroID, x, y);
@@ -194,16 +223,16 @@ HOTTILE* HotTileMgr::GetHotTileNoLoad(
      SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
      SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
  
-    HotTileSet &tile = mHotTiles[x][y];
-    HOTTILE& hotTile = tile.Attachment[attachment];
+    HotTileSet& tile    = mHotTiles[x][y];
+    HOTTILE&    hotTile = tile.Attachment[attachment];
      if (hotTile.pBuffer == NULL)
      {
          if (create)
          {
-            uint32_t size = numSamples * mHotTileSize[attachment];
-            hotTile.pBuffer = (uint8_t*)AlignedMalloc(size, 64);
-            hotTile.state = HOTTILE_INVALID;
-            hotTile.numSamples = numSamples;
+            uint32_t size                  = numSamples * mHotTileSize[attachment];
+            hotTile.pBuffer                = (uint8_t*)AlignedMalloc(size, 64);
+            hotTile.state                  = HOTTILE_INVALID;
+            hotTile.numSamples             = numSamples;
              hotTile.renderTargetArrayIndex = 0;
          }
          else
@@ -216,23 +245,25 @@ HOTTILE* HotTileMgr::GetHotTileNoLoad(
  }
  
  #if USE_8x2_TILE_BACKEND
-void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearColorHotTile(
+    const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
  {
      // Load clear color into SIMD register...
-    float *pClearData = (float *)(pHotTile->clearData);
-    simd16scalar valR = _simd16_broadcast_ss(&pClearData[0]);
-    simd16scalar valG = _simd16_broadcast_ss(&pClearData[1]);
-    simd16scalar valB = _simd16_broadcast_ss(&pClearData[2]);
-    simd16scalar valA = _simd16_broadcast_ss(&pClearData[3]);
+    float*       pClearData = (float*)(pHotTile->clearData);
+    simd16scalar valR       = _simd16_broadcast_ss(&pClearData[0]);
+    simd16scalar valG       = _simd16_broadcast_ss(&pClearData[1]);
+    simd16scalar valB       = _simd16_broadcast_ss(&pClearData[2]);
+    simd16scalar valA       = _simd16_broadcast_ss(&pClearData[3]);
  
-    float *pfBuf = (float *)pHotTile->pBuffer;
+    float*   pfBuf      = (float*)pHotTile->pBuffer;
      uint32_t numSamples = pHotTile->numSamples;
  
      for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
      {
          for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
          {
-            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
+            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+                 si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
              {
                  _simd16_store_ps(pfBuf, valR);
                  pfBuf += KNOB_SIMD16_WIDTH;
@@ -250,20 +281,22 @@ void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile)  // clear a macro ti
      }
  }
  
-void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearDepthHotTile(
+    const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
  {
      // Load clear color into SIMD register...
-    float *pClearData = (float *)(pHotTile->clearData);
-    simd16scalar valZ = _simd16_broadcast_ss(&pClearData[0]);
+    float*       pClearData = (float*)(pHotTile->clearData);
+    simd16scalar valZ       = _simd16_broadcast_ss(&pClearData[0]);
  
-    float *pfBuf = (float *)pHotTile->pBuffer;
+    float*   pfBuf      = (float*)pHotTile->pBuffer;
      uint32_t numSamples = pHotTile->numSamples;
  
      for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
      {
          for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
          {
-            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
+            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+                 si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
              {
                  _simd16_store_ps(pfBuf, valZ);
                  pfBuf += KNOB_SIMD16_WIDTH;
@@ -276,18 +309,19 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
  {
      // convert from F32 to U8.
      uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
-    //broadcast 32x into __m256i...
+    // broadcast 32x into __m256i...
      simd16scalari valS = _simd16_set1_epi8(clearVal);
  
-    simd16scalari *pBuf = (simd16scalari *)pHotTile->pBuffer;
-    uint32_t numSamples = pHotTile->numSamples;
+    simd16scalari* pBuf       = (simd16scalari*)pHotTile->pBuffer;
+    uint32_t       numSamples = pHotTile->numSamples;
  
      for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
      {
          for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
          {
              // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
-            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
+            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+                 si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
              {
                  _simd16_store_si(pBuf, valS);
                  pBuf += 1;
@@ -297,23 +331,26 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
  }
  
  #else
-void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearColorHotTile(
+    const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
  {
      // Load clear color into SIMD register...
-    float *pClearData = (float*)(pHotTile->clearData);
-    simdscalar valR = _simd_broadcast_ss(&pClearData[0]);
-    simdscalar valG = _simd_broadcast_ss(&pClearData[1]);
-    simdscalar valB = _simd_broadcast_ss(&pClearData[2]);
-    simdscalar valA = _simd_broadcast_ss(&pClearData[3]);
+    float*     pClearData = (float*)(pHotTile->clearData);
+    simdscalar valR       = _simd_broadcast_ss(&pClearData[0]);
+    simdscalar valG       = _simd_broadcast_ss(&pClearData[1]);
+    simdscalar valB       = _simd_broadcast_ss(&pClearData[2]);
+    simdscalar valA       = _simd_broadcast_ss(&pClearData[3]);
  
-    float *pfBuf = (float*)pHotTile->pBuffer;
+    float*   pfBuf      = (float*)pHotTile->pBuffer;
      uint32_t numSamples = pHotTile->numSamples;
  
      for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
      {
          for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
          {
-            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) //SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
+            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+                 si +=
+                 SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM) // SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM); si++)
              {
                  _simd_store_ps(pfBuf, valR);
                  pfBuf += KNOB_SIMD_WIDTH;
@@ -328,20 +365,22 @@ void HotTileMgr::ClearColorHotTile(const HOTTILE* pHotTile)  // clear a macro ti
      }
  }
  
-void HotTileMgr::ClearDepthHotTile(const HOTTILE* pHotTile)  // clear a macro tile from float4 clear data.
+void HotTileMgr::ClearDepthHotTile(
+    const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
  {
      // Load clear color into SIMD register...
-    float *pClearData = (float*)(pHotTile->clearData);
-    simdscalar valZ = _simd_broadcast_ss(&pClearData[0]);
+    float*     pClearData = (float*)(pHotTile->clearData);
+    simdscalar valZ       = _simd_broadcast_ss(&pClearData[0]);
  
-    float *pfBuf = (float*)pHotTile->pBuffer;
+    float*   pfBuf      = (float*)pHotTile->pBuffer;
      uint32_t numSamples = pHotTile->numSamples;
  
      for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
      {
          for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
          {
-            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
+            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+                 si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM)
              {
                  _simd_store_ps(pfBuf, valZ);
                  pfBuf += KNOB_SIMD_WIDTH;
@@ -354,18 +393,19 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
  {
      // convert from F32 to U8.
      uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
-    //broadcast 32x into __m256i...
+    // broadcast 32x into __m256i...
      simdscalari valS = _simd_set1_epi8(clearVal);
  
-    simdscalari* pBuf = (simdscalari*)pHotTile->pBuffer;
-    uint32_t numSamples = pHotTile->numSamples;
+    simdscalari* pBuf       = (simdscalari*)pHotTile->pBuffer;
+    uint32_t     numSamples = pHotTile->numSamples;
  
      for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
      {
          for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
          {
              // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
-            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples); si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
+            for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
+                 si += SIMD_TILE_X_DIM * SIMD_TILE_Y_DIM * 4)
              {
                  _simd_store_si(pBuf, valS);
                  pBuf += 1;
@@ -383,9 +423,12 @@ void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
  /// to avoid unnecessary setup every triangle
  /// @todo support deferred clear
  /// @param pCreateInfo - pointer to creation info.
-void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID)
+void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
+                                    DRAW_CONTEXT* pDC,
+                                    uint32_t      workerId,
+                                    uint32_t      macroID)
  {
-    const API_STATE& state = GetApiState(pDC);
+    const API_STATE& state    = GetApiState(pDC);
      HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  
      uint32_t x, y;
@@ -396,17 +439,31 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
      uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
  
      // check RT if enabled
-    unsigned long rtSlot = 0;
-    uint32_t colorHottileEnableMask = state.colorHottileEnable;
+    unsigned long rtSlot                 = 0;
+    uint32_t      colorHottileEnableMask = state.colorHottileEnable;
      while (_BitScanForward(&rtSlot, colorHottileEnableMask))
      {
-        HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), true, numSamples);
+        HOTTILE* pHotTile =
+            GetHotTile(pContext,
+                       pDC,
+                       hWorkerPrivateData,
+                       macroID,
+                       (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
+                       true,
+                       numSamples);
  
          if (pHotTile->state == HOTTILE_INVALID)
          {
              RDTSC_BEGIN(BELoadTiles, pDC->drawId);
              // invalid hottile before draw requires a load from surface before we can draw to it
-            pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_COLOR_HOT_TILE_FORMAT, (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot), x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+            pContext->pfnLoadTile(GetPrivateState(pDC),
+                                  hWorkerPrivateData,
+                                  KNOB_COLOR_HOT_TILE_FORMAT,
+                                  (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
+                                  x,
+                                  y,
+                                  pHotTile->renderTargetArrayIndex,
+                                  pHotTile->pBuffer);
              pHotTile->state = HOTTILE_DIRTY;
              RDTSC_END(BELoadTiles, 0);
          }
@@ -424,12 +481,20 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
      // check depth if enabled
      if (state.depthHottileEnable)
      {
-        HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
+        HOTTILE* pHotTile = GetHotTile(
+            pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
          if (pHotTile->state == HOTTILE_INVALID)
          {
              RDTSC_BEGIN(BELoadTiles, pDC->drawId);
              // invalid hottile before draw requires a load from surface before we can draw to it
-            pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_DEPTH_HOT_TILE_FORMAT, SWR_ATTACHMENT_DEPTH, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+            pContext->pfnLoadTile(GetPrivateState(pDC),
+                                  hWorkerPrivateData,
+                                  KNOB_DEPTH_HOT_TILE_FORMAT,
+                                  SWR_ATTACHMENT_DEPTH,
+                                  x,
+                                  y,
+                                  pHotTile->renderTargetArrayIndex,
+                                  pHotTile->pBuffer);
              pHotTile->state = HOTTILE_DIRTY;
              RDTSC_END(BELoadTiles, 0);
          }
@@ -446,12 +511,20 @@ void HotTileMgr::InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, ui
      // check stencil if enabled
      if (state.stencilHottileEnable)
      {
-        HOTTILE* pHotTile = GetHotTile(pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
+        HOTTILE* pHotTile = GetHotTile(
+            pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
          if (pHotTile->state == HOTTILE_INVALID)
          {
              RDTSC_BEGIN(BELoadTiles, pDC->drawId);
              // invalid hottile before draw requires a load from surface before we can draw to it
-            pContext->pfnLoadTile(GetPrivateState(pDC), hWorkerPrivateData, KNOB_STENCIL_HOT_TILE_FORMAT, SWR_ATTACHMENT_STENCIL, x, y, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
+            pContext->pfnLoadTile(GetPrivateState(pDC),
+                                  hWorkerPrivateData,
+                                  KNOB_STENCIL_HOT_TILE_FORMAT,
+                                  SWR_ATTACHMENT_STENCIL,
+                                  x,
+                                  y,
+                                  pHotTile->renderTargetArrayIndex,
+                                  pHotTile->pBuffer);
              pHotTile->state = HOTTILE_DIRTY;
              RDTSC_END(BELoadTiles, 0);
          }
diff --git a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h

index 8392db1b05f9d2c93bf0de9e078f8c7dbd873ed4..7173b0248f12deab63108cd83db93bdd21b550b9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tilemgr.h
@@ -1,31 +1,31 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tilemgr.h
-*
-* @brief Definitions for Macro Tile Manager which provides the facilities
-*        for threads to work on an macro tile.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tilemgr.h
+ *
+ * @brief Definitions for Macro Tile Manager which provides the facilities
+ *        for threads to work on an macro tile.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include <set>
@@ -41,22 +41,16 @@
  //////////////////////////////////////////////////////////////////////////
  struct MacroTileQueue
  {
-    MacroTileQueue() { }
+    MacroTileQueue() {}
      ~MacroTileQueue() { destroy(); }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Returns number of work items queued for this tile.
-    uint32_t getNumQueued()
-    {
-        return mFifo.getNumQueued();
-    }
+    uint32_t getNumQueued() { return mFifo.getNumQueued(); }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Attempt to lock the work fifo. If already locked then return false.
-    bool tryLock()
-    {
-        return mFifo.tryLock();
-    }
+    bool tryLock() { return mFifo.tryLock(); }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Clear fifo and unlock it.
@@ -68,10 +62,7 @@ struct MacroTileQueue
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Peek at work sitting at the front of the fifo.
-    BE_WORK* peek()
-    {
-        return mFifo.peek();
-    }
+    BE_WORK* peek() { return mFifo.peek(); }
  
      template <typename ArenaT>
      bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
@@ -81,22 +72,16 @@ struct MacroTileQueue
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Move to next work item
-    void dequeue()
-    {
-        mFifo.dequeue_noinc();
-    }
+    void dequeue() { mFifo.dequeue_noinc(); }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Destroy fifo
-    void destroy()
-    {
-        mFifo.destroy();
-    }
+    void destroy() { mFifo.destroy(); }
  
      ///@todo This will all be private.
      uint32_t mWorkItemsFE = 0;
      uint32_t mWorkItemsBE = 0;
-    uint32_t mId = 0;
+    uint32_t mId          = 0;
  
  private:
      QUEUE<BE_WORK> mFifo;
@@ -111,7 +96,7 @@ public:
      MacroTileMgr(CachingArena& arena);
      ~MacroTileMgr()
      {
-        for (auto *pTile : mTiles)
+        for (auto* pTile : mTiles)
          {
              delete pTile;
          }
@@ -126,16 +111,13 @@ public:
      }
  
      INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
-    void markTileComplete(uint32_t id);
+    void                                 markTileComplete(uint32_t id);
  
-    INLINE bool isWorkComplete()
-    {
-        return mWorkItemsProduced == mWorkItemsConsumed;
-    }
+    INLINE bool isWorkComplete() { return mWorkItemsProduced == mWorkItemsConsumed; }
  
-    void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
+    void enqueue(uint32_t x, uint32_t y, BE_WORK* pWork);
  
-    static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
+    static INLINE void getTileIndices(uint32_t tileID, uint32_t& x, uint32_t& y)
      {
          // Morton / Z order of tiles
          x = pext_u32(tileID, 0x55555555);
@@ -149,17 +131,21 @@ public:
      }
  
  private:
-    CachingArena& mArena;
+    CachingArena&                mArena;
      std::vector<MacroTileQueue*> mTiles;
  
      // Any tile that has work queued to it is a dirty tile.
      std::vector<MacroTileQueue*> mDirtyTiles;
  
-    OSALIGNLINE(long) mWorkItemsProduced { 0 };
-    OSALIGNLINE(volatile long) mWorkItemsConsumed { 0 };
+    OSALIGNLINE(long) mWorkItemsProduced{0};
+    OSALIGNLINE(volatile long) mWorkItemsConsumed{0};
  };
  
-typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
+typedef void (*PFN_DISPATCH)(DRAW_CONTEXT* pDC,
+                             uint32_t      workerId,
+                             uint32_t      threadGroupId,
+                             void*&        pSpillFillBuffer,
+                             void*&        pScratchSpace);
  
  //////////////////////////////////////////////////////////////////////////
  /// DispatchQueue - work queue for dispatch
@@ -175,23 +161,20 @@ public:
      {
          // The available and outstanding counts start with total tasks.
          // At the start there are N tasks available and outstanding.
-        // When both the available and outstanding counts have reached 0 then all work has completed.
-        // When a worker starts on a threadgroup then it decrements the available count.
+        // When both the available and outstanding counts have reached 0 then all work has
+        // completed. When a worker starts on a threadgroup then it decrements the available count.
          // When a worker completes a threadgroup then it decrements the outstanding count.
  
-        mTasksAvailable = totalTasks;
+        mTasksAvailable   = totalTasks;
          mTasksOutstanding = totalTasks;
  
-        mpTaskData = pTaskData;
+        mpTaskData   = pTaskData;
          mPfnDispatch = pfnDispatch;
      }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Returns number of tasks available for this dispatch.
-    uint32_t getNumQueued()
-    {
-        return (mTasksAvailable > 0) ? mTasksAvailable : 0;
-    }
+    uint32_t getNumQueued() { return (mTasksAvailable > 0) ? mTasksAvailable : 0; }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Atomically decrement the work available count. If the result
@@ -224,50 +207,49 @@ public:
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Work is complete once both the available/outstanding counts have reached 0.
-    bool isWorkComplete()
-    {
-        return ((mTasksAvailable <= 0) &&
-                (mTasksOutstanding <= 0));
-    }
+    bool isWorkComplete() { return ((mTasksAvailable <= 0) && (mTasksOutstanding <= 0)); }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Return pointer to task data.
-    const void* GetTasksData()
-    {
-        return mpTaskData;
-    }
+    const void* GetTasksData() { return mpTaskData; }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Dispatches a unit of work
-    void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
+    void dispatch(DRAW_CONTEXT* pDC,
+                  uint32_t      workerId,
+                  uint32_t      threadGroupId,
+                  void*&        pSpillFillBuffer,
+                  void*&        pScratchSpace)
      {
          SWR_ASSERT(mPfnDispatch != nullptr);
          mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
      }
  
-    void* mpTaskData{ nullptr };        // The API thread will set this up and the callback task function will interpet this.
-    PFN_DISPATCH mPfnDispatch{ nullptr };      // Function to call per dispatch
+    void* mpTaskData{nullptr}; // The API thread will set this up and the callback task function
+                               // will interpet this.
+    PFN_DISPATCH mPfnDispatch{nullptr}; // Function to call per dispatch
  
-    OSALIGNLINE(volatile long) mTasksAvailable{ 0 };
-    OSALIGNLINE(volatile long) mTasksOutstanding{ 0 };
+    OSALIGNLINE(volatile long) mTasksAvailable{0};
+    OSALIGNLINE(volatile long) mTasksOutstanding{0};
  };
  
-
  enum HOTTILE_STATE
  {
-    HOTTILE_INVALID,        // tile is in unitialized state and should be loaded with surface contents before rendering
-    HOTTILE_CLEAR,          // tile should be cleared
-    HOTTILE_DIRTY,          // tile has been rendered to
-    HOTTILE_RESOLVED,       // tile has been stored to memory
+    HOTTILE_INVALID,  // tile is in unitialized state and should be loaded with surface contents
+                      // before rendering
+    HOTTILE_CLEAR,    // tile should be cleared
+    HOTTILE_DIRTY,    // tile has been rendered to
+    HOTTILE_RESOLVED, // tile has been stored to memory
  };
  
  struct HOTTILE
  {
-    uint8_t *pBuffer;
+    uint8_t*      pBuffer;
      HOTTILE_STATE state;
-    DWORD clearData[4];                 // May need to change based on pfnClearTile implementation.  Reorder for alignment?
+    DWORD clearData[4]; // May need to change based on pfnClearTile implementation.  Reorder for
+                        // alignment?
      uint32_t numSamples;
-    uint32_t renderTargetArrayIndex;    // current render target array index loaded
+    uint32_t renderTargetArrayIndex; // current render target array index loaded
  };
  
  union HotTileSet
@@ -291,10 +273,13 @@ public:
          // cache hottile size
          for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
          {
-            mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
+            mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
+                              FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
          }
-        mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
-        mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
+        mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
+                                             FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
+        mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
+                                               FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
      }
  
      ~HotTileMgr()
@@ -311,12 +296,26 @@ public:
          }
      }
  
-    void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
-
-    HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, HANDLE hWorkerData, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
-        uint32_t renderTargetArrayIndex = 0);
-
-    HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
+    void InitializeHotTiles(SWR_CONTEXT*  pContext,
+                            DRAW_CONTEXT* pDC,
+                            uint32_t      workerId,
+                            uint32_t      macroID);
+
+    HOTTILE* GetHotTile(SWR_CONTEXT*                pContext,
+                        DRAW_CONTEXT*               pDC,
+                        HANDLE                      hWorkerData,
+                        uint32_t                    macroID,
+                        SWR_RENDERTARGET_ATTACHMENT attachment,
+                        bool                        create,
+                        uint32_t                    numSamples             = 1,
+                        uint32_t                    renderTargetArrayIndex = 0);
+
+    HOTTILE* GetHotTileNoLoad(SWR_CONTEXT*                pContext,
+                              DRAW_CONTEXT*               pDC,
+                              uint32_t                    macroID,
+                              SWR_RENDERTARGET_ATTACHMENT attachment,
+                              bool                        create,
+                              uint32_t                    numSamples = 1);
  
      static void ClearColorHotTile(const HOTTILE* pHotTile);
      static void ClearDepthHotTile(const HOTTILE* pHotTile);
@@ -324,14 +323,15 @@ public:
  
  private:
      HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
-    uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
+    uint32_t   mHotTileSize[SWR_NUM_ATTACHMENTS];
  
      void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
      {
          void* p = nullptr;
  #if defined(_WIN32)
          HANDLE hProcess = GetCurrentProcess();
-        p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
+        p               = VirtualAllocExNuma(
+            hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
  #else
          p = AlignedMalloc(size, align);
  #endif
@@ -351,4 +351,3 @@ private:
          }
      }
  };
-
diff --git a/src/gallium/drivers/swr/rasterizer/core/tileset.h b/src/gallium/drivers/swr/rasterizer/core/tileset.h

index 3eb4c5d1f00ecbc9469eea0349afeddb5a28802b..e28c84d789f07b65cbb142cc0692a61121ca592d 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/tileset.h
+++ b/src/gallium/drivers/swr/rasterizer/core/tileset.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file tileset.h
-*
-* @brief Custom bitset class for managing locked tiles
-*
-******************************************************************************/
+ * Copyright (C) 2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file tileset.h
+ *
+ * @brief Custom bitset class for managing locked tiles
+ *
+ ******************************************************************************/
  #pragma once
  
  struct TileSet
@@ -65,16 +65,13 @@ struct TileSet
  
  private:
      static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
-    static const size_t BITS_OFFSET = BITS_PER_WORD - 1;
+    static const size_t BITS_OFFSET   = BITS_PER_WORD - 1;
  
-    size_t              m_size = 0;
-    size_t              m_maxSet = 0;
-    size_t*             m_bits = nullptr;
+    size_t  m_size   = 0;
+    size_t  m_maxSet = 0;
+    size_t* m_bits   = nullptr;
  
-    INLINE size_t& _get_word(size_t idx)
-    {
-        return m_bits[idx / BITS_PER_WORD];
-    }
+    INLINE size_t& _get_word(size_t idx) { return m_bits[idx / BITS_PER_WORD]; }
  
      void _grow(size_t idx)
      {
@@ -83,10 +80,10 @@ private:
              return;
          }
  
-        size_t new_size = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
-        size_t num_words = new_size / BITS_PER_WORD;
-        size_t* newBits = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
-        size_t copy_words = 0;
+        size_t  new_size   = (1 + idx + BITS_OFFSET) & ~BITS_OFFSET;
+        size_t  num_words  = new_size / BITS_PER_WORD;
+        size_t* newBits    = (size_t*)AlignedMalloc(sizeof(size_t) * num_words, 64);
+        size_t  copy_words = 0;
  
          if (m_bits)
          {
diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h b/src/gallium/drivers/swr/rasterizer/core/utils.h

index 7769e05a6787cdec93c57026530bb39d717643c7..27c9c606d1790de0af1497e4d510682ec66ce4f3 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file utils.h
-*
-* @brief Utilities used by SWR core.
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file utils.h
+ *
+ * @brief Utilities used by SWR core.
+ *
+ ******************************************************************************/
  #pragma once
  
  #include <string.h>
@@ -54,38 +54,44 @@ struct simd16BBox
  };
  #endif
  
-template<typename SIMD_T>
+template <typename SIMD_T>
  struct SIMDBBOX_T
  {
-    typename SIMD_T::Integer            ymin;
-    typename SIMD_T::Integer            ymax;
-    typename SIMD_T::Integer            xmin;
-    typename SIMD_T::Integer            xmax;
+    typename SIMD_T::Integer ymin;
+    typename SIMD_T::Integer ymax;
+    typename SIMD_T::Integer xmin;
+    typename SIMD_T::Integer xmax;
  };
  
  // helper function to unroll loops
-template<int Begin, int End, int Step = 1>
-struct UnrollerL {
-    template<typename Lambda>
-    INLINE static void step(Lambda& func) {
+template <int Begin, int End, int Step = 1>
+struct UnrollerL
+{
+    template <typename Lambda>
+    INLINE static void step(Lambda& func)
+    {
          func(Begin);
          UnrollerL<Begin + Step, End, Step>::step(func);
      }
  };
  
-template<int End, int Step>
-struct UnrollerL<End, End, Step> {
-    template<typename Lambda>
-    static void step(Lambda& func) {
+template <int End, int Step>
+struct UnrollerL<End, End, Step>
+{
+    template <typename Lambda>
+    static void step(Lambda& func)
+    {
      }
  };
  
  // helper function to unroll loops, with mask to skip specific iterations
-template<int Begin, int End, int Step = 1, int Mask = 0x7f>
-struct UnrollerLMask {
-    template<typename Lambda>
-    INLINE static void step(Lambda& func) {
-        if(Mask & (1 << Begin))
+template <int Begin, int End, int Step = 1, int Mask = 0x7f>
+struct UnrollerLMask
+{
+    template <typename Lambda>
+    INLINE static void step(Lambda& func)
+    {
+        if (Mask & (1 << Begin))
          {
              func(Begin);
          }
@@ -93,29 +99,31 @@ struct UnrollerLMask {
      }
  };
  
-template<int End, int Step, int Mask>
-struct UnrollerLMask<End, End, Step, Mask> {
-    template<typename Lambda>
-    static void step(Lambda& func) {
+template <int End, int Step, int Mask>
+struct UnrollerLMask<End, End, Step, Mask>
+{
+    template <typename Lambda>
+    static void step(Lambda& func)
+    {
      }
  };
  
  // general CRC compute
  INLINE
-uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)
+uint32_t ComputeCRC(uint32_t crc, const void* pData, uint32_t size)
  {
  #if defined(_WIN64) || defined(__x86_64__)
-    uint32_t sizeInQwords = size / sizeof(uint64_t);
-    uint32_t sizeRemainderBytes = size % sizeof(uint64_t);
-    uint64_t* pDataWords = (uint64_t*)pData;
+    uint32_t  sizeInQwords       = size / sizeof(uint64_t);
+    uint32_t  sizeRemainderBytes = size % sizeof(uint64_t);
+    uint64_t* pDataWords         = (uint64_t*)pData;
      for (uint32_t i = 0; i < sizeInQwords; ++i)
      {
          crc = (uint32_t)_mm_crc32_u64(crc, *pDataWords++);
      }
  #else
-    uint32_t sizeInDwords = size / sizeof(uint32_t);
-    uint32_t sizeRemainderBytes = size % sizeof(uint32_t);
-    uint32_t* pDataWords = (uint32_t*)pData;
+    uint32_t  sizeInDwords       = size / sizeof(uint32_t);
+    uint32_t  sizeRemainderBytes = size % sizeof(uint32_t);
+    uint32_t* pDataWords         = (uint32_t*)pData;
      for (uint32_t i = 0; i < sizeInDwords; ++i)
      {
          crc = _mm_crc32_u32(crc, *pDataWords++);
@@ -135,8 +143,7 @@ uint32_t ComputeCRC(uint32_t crc, const void *pData, uint32_t size)
  /// Check specified bit within a data word
  //////////////////////////////////////////////////////////////////////////
  template <typename T>
-INLINE
-static bool CheckBit(T word, uint32_t bit)
+INLINE static bool CheckBit(T word, uint32_t bit)
  {
      return 0 != (word & (T(1) << bit));
  }
@@ -145,8 +152,7 @@ static bool CheckBit(T word, uint32_t bit)
  /// Add byte offset to any-type pointer
  //////////////////////////////////////////////////////////////////////////
  template <typename T>
-INLINE
-static T* PtrAdd(T* p, intptr_t offset)
+INLINE static T* PtrAdd(T* p, intptr_t offset)
  {
      intptr_t intp = reinterpret_cast<intptr_t>(p);
      return reinterpret_cast<T*>(intp + offset);
@@ -156,8 +162,7 @@ static T* PtrAdd(T* p, intptr_t offset)
  /// Is a power-of-2?
  //////////////////////////////////////////////////////////////////////////
  template <typename T>
-INLINE
-static bool IsPow2(T value)
+INLINE static bool IsPow2(T value)
  {
      return value == (value & (T(0) - value));
  }
@@ -167,8 +172,7 @@ static bool IsPow2(T value)
  /// Note: IsPow2(alignment) MUST be true
  //////////////////////////////////////////////////////////////////////////
  template <typename T1, typename T2>
-INLINE
-static T1 AlignDownPow2(T1 value, T2 alignment)
+INLINE static T1 AlignDownPow2(T1 value, T2 alignment)
  {
      SWR_ASSERT(IsPow2(alignment));
      return value & ~T1(alignment - 1);
@@ -179,8 +183,7 @@ static T1 AlignDownPow2(T1 value, T2 alignment)
  /// Note: IsPow2(alignment) MUST be true
  //////////////////////////////////////////////////////////////////////////
  template <typename T1, typename T2>
-INLINE
-static T1 AlignUpPow2(T1 value, T2 alignment)
+INLINE static T1 AlignUpPow2(T1 value, T2 alignment)
  {
      return AlignDownPow2(value + T1(alignment - 1), alignment);
  }
@@ -190,8 +193,7 @@ static T1 AlignUpPow2(T1 value, T2 alignment)
  /// Note: IsPow2(alignment) MUST be true
  //////////////////////////////////////////////////////////////////////////
  template <typename T1, typename T2>
-INLINE
-static T1* AlignUpPow2(T1* value, T2 alignment)
+INLINE static T1* AlignUpPow2(T1* value, T2 alignment)
  {
      return reinterpret_cast<T1*>(
          AlignDownPow2(reinterpret_cast<uintptr_t>(value) + uintptr_t(alignment - 1), alignment));
@@ -201,10 +203,12 @@ static T1* AlignUpPow2(T1* value, T2 alignment)
  /// Align down to specified alignment
  //////////////////////////////////////////////////////////////////////////
  template <typename T1, typename T2>
-INLINE
-static T1 AlignDown(T1 value, T2 alignment)
+INLINE static T1 AlignDown(T1 value, T2 alignment)
  {
-    if (IsPow2(alignment)) { return AlignDownPow2(value, alignment); }
+    if (IsPow2(alignment))
+    {
+        return AlignDownPow2(value, alignment);
+    }
      return value - T1(value % alignment);
  }
  
@@ -212,8 +216,7 @@ static T1 AlignDown(T1 value, T2 alignment)
  /// Align down to specified alignment
  //////////////////////////////////////////////////////////////////////////
  template <typename T1, typename T2>
-INLINE
-static T1* AlignDown(T1* value, T2 alignment)
+INLINE static T1* AlignDown(T1* value, T2 alignment)
  {
      return (T1*)AlignDown(uintptr_t(value), alignment);
  }
@@ -223,8 +226,7 @@ static T1* AlignDown(T1* value, T2 alignment)
  /// Note: IsPow2(alignment) MUST be true
  //////////////////////////////////////////////////////////////////////////
  template <typename T1, typename T2>
-INLINE
-static T1 AlignUp(T1 value, T2 alignment)
+INLINE static T1 AlignUp(T1 value, T2 alignment)
  {
      return AlignDown(value + T1(alignment - 1), alignment);
  }
@@ -234,33 +236,31 @@ static T1 AlignUp(T1 value, T2 alignment)
  /// Note: IsPow2(alignment) MUST be true
  //////////////////////////////////////////////////////////////////////////
  template <typename T1, typename T2>
-INLINE
-static T1* AlignUp(T1* value, T2 alignment)
+INLINE static T1* AlignUp(T1* value, T2 alignment)
  {
      return AlignDown(PtrAdd(value, alignment - 1), alignment);
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// Helper structure used to access an array of elements that don't 
+/// Helper structure used to access an array of elements that don't
  /// correspond to a typical word size.
  //////////////////////////////////////////////////////////////////////////
-template<typename T, size_t BitsPerElementT, size_t ArrayLenT>
+template <typename T, size_t BitsPerElementT, size_t ArrayLenT>
  class BitsArray
  {
  private:
-    static const size_t BITS_PER_WORD = sizeof(size_t) * 8;
+    static const size_t BITS_PER_WORD     = sizeof(size_t) * 8;
      static const size_t ELEMENTS_PER_WORD = BITS_PER_WORD / BitsPerElementT;
-    static const size_t NUM_WORDS = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
-    static const size_t ELEMENT_MASK = (size_t(1) << BitsPerElementT) - 1;
+    static const size_t NUM_WORDS         = (ArrayLenT + ELEMENTS_PER_WORD - 1) / ELEMENTS_PER_WORD;
+    static const size_t ELEMENT_MASK      = (size_t(1) << BitsPerElementT) - 1;
  
      static_assert(ELEMENTS_PER_WORD * BitsPerElementT == BITS_PER_WORD,
-        "Element size must an integral fraction of pointer size");
+                  "Element size must an integral fraction of pointer size");
  
-    size_t              m_words[NUM_WORDS] = {};
+    size_t m_words[NUM_WORDS] = {};
  
  public:
-
-    T operator[] (size_t elementIndex) const
+    T operator[](size_t elementIndex) const
      {
          size_t word = m_words[elementIndex / ELEMENTS_PER_WORD];
          word >>= ((elementIndex % ELEMENTS_PER_WORD) * BitsPerElementT);
@@ -324,9 +324,11 @@ struct TemplateArgUnroller
          }
          if (TMax > TMin)
          {
-            return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(RangedArg<T, TMin, (T)(int(TMax)-1)>{iArg.val});
+            return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
+                RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val});
          }
-        SWR_ASSUME(false); return nullptr;
+        SWR_ASSUME(false);
+        return nullptr;
      }
      template <typename T, T TVal>
      static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg)
@@ -341,19 +343,23 @@ struct TemplateArgUnroller
      {
          if (iArg.val == TMax)
          {
-            return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TMax>>::GetFunc(remainingArgs...);
+            return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TMax>>::GetFunc(
+                remainingArgs...);
          }
          if (TMax > TMin)
          {
-            return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val}, remainingArgs...);
+            return TemplateArgUnroller<TermT, ArgsB...>::GetFunc(
+                RangedArg<T, TMin, (T)(int(TMax) - 1)>{iArg.val}, remainingArgs...);
          }
-        SWR_ASSUME(false); return nullptr;
+        SWR_ASSUME(false);
+        return nullptr;
      }
      template <typename T, T TVal, typename... TArgsT>
      static typename TermT::FuncType GetFunc(RangedArg<T, TVal, TVal> iArg, TArgsT... remainingArgs)
      {
          SWR_ASSERT(iArg.val == TVal);
-        return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TVal>>::GetFunc(remainingArgs...);
+        return TemplateArgUnroller<TermT, ArgsB..., std::integral_constant<T, TVal>>::GetFunc(
+            remainingArgs...);
      }
  };
  
@@ -365,12 +371,13 @@ static INLINE std::string GetEnv(const std::string& variableName)
      std::string output;
  #if defined(_WIN32)
      DWORD valueSize = GetEnvironmentVariableA(variableName.c_str(), nullptr, 0);
-    if (!valueSize) return output;
+    if (!valueSize)
+        return output;
      output.resize(valueSize - 1); // valueSize includes null, output.resize() does not
      GetEnvironmentVariableA(variableName.c_str(), &output[0], valueSize);
  #else
-    char *env = getenv(variableName.c_str());
-    output = env ? env : "";
+    char* env = getenv(variableName.c_str());
+    output    = env ? env : "";
  #endif
  
      return output;
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp

index e9412b1b53c5328af96e54f90752457b8952025b..5cf527ecd89863b2e0b271b900387213e9431aeb 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file JitManager.cpp
-*
-* @brief Implementation if the Jit Manager.
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file JitManager.cpp
+ *
+ * @brief Implementation if the Jit Manager.
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "jit_pch.hpp"
  
  #include "JitManager.h"
@@ -59,21 +59,22 @@ using namespace SwrJit;
  //////////////////////////////////////////////////////////////////////////
  /// @brief Contructor for JitManager.
  /// @param simdWidth - SIMD width to be used in generated program.
-JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
-    : mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth), mArch(arch)
+JitManager::JitManager(uint32_t simdWidth, const char *arch, const char *core) :
+    mContext(), mBuilder(mContext), mIsModuleFinalized(true), mJitNumber(0), mVWidth(simdWidth),
+    mArch(arch)
  {
      InitializeNativeTarget();
      InitializeNativeTargetAsmPrinter();
      InitializeNativeTargetDisassembler();
  
  
-    TargetOptions    tOpts;
+    TargetOptions tOpts;
      tOpts.AllowFPOpFusion = FPOpFusion::Fast;
-    tOpts.NoInfsFPMath = false;
-    tOpts.NoNaNsFPMath = false;
+    tOpts.NoInfsFPMath    = false;
+    tOpts.NoNaNsFPMath    = false;
      tOpts.UnsafeFPMath = false;
  
-    //tOpts.PrintMachineCode    = true;
+    // tOpts.PrintMachineCode    = true;
  
      std::unique_ptr<Module> newModule(new Module("", mContext));
      mpCurrentModule = newModule.get();
@@ -81,10 +82,10 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
      StringRef hostCPUName;
  
      // force JIT to use the same CPU arch as the rest of swr
-    if(mArch.AVX512F())
+    if (mArch.AVX512F())
      {
  #if USE_SIMD16_SHADERS
-        if(mArch.AVX512ER())
+        if (mArch.AVX512ER())
          {
              hostCPUName = StringRef("knl");
          }
@@ -101,7 +102,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
              mVWidth = 8;
          }
      }
-    else if(mArch.AVX2())
+    else if (mArch.AVX2())
      {
          hostCPUName = StringRef("core-avx2");
          if (mVWidth == 0)
@@ -109,7 +110,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
              mVWidth = 8;
          }
      }
-    else if(mArch.AVX())
+    else if (mArch.AVX())
      {
          if (mArch.F16C())
          {
@@ -140,10 +141,10 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
  
      mpCurrentModule->setTargetTriple(sys::getProcessTriple());
      mpExec = EngineBuilder(std::move(newModule))
-        .setTargetOptions(tOpts)
-        .setOptLevel(optLevel)
-        .setMCPU(hostCPUName)
-        .create();
+                 .setTargetOptions(tOpts)
+                 .setOptLevel(optLevel)
+                 .setMCPU(hostCPUName)
+                 .create();
  
      if (KNOB_JIT_ENABLE_CACHE)
      {
@@ -162,7 +163,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
  #else
      // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
  #endif
-    std::vector<Type*> fsArgs;
+    std::vector<Type *> fsArgs;
  
      // llvm5 is picky and does not take a void * type
      fsArgs.push_back(PointerType::get(Gen_SWR_FETCH_CONTEXT(this), 0));
@@ -211,32 +212,44 @@ void JitManager::SetupNewModule()
  }
  
  
-DIType* JitManager::CreateDebugStructType(StructType* pType, const std::string& name, DIFile* pFile, uint32_t lineNum,
-    const std::vector<std::pair<std::string, uint32_t>>& members)
+DIType *
+JitManager::CreateDebugStructType(StructType *                                         pType,
+                                  const std::string &                                  name,
+                                  DIFile *                                             pFile,
+                                  uint32_t                                             lineNum,
+                                  const std::vector<std::pair<std::string, uint32_t>> &members)
  {
-    DIBuilder builder(*mpCurrentModule);
-    SmallVector<Metadata*, 8> ElemTypes;
-    DataLayout DL = DataLayout(mpCurrentModule);
-    uint32_t size = DL.getTypeAllocSizeInBits(pType);
-    uint32_t alignment = DL.getABITypeAlignment(pType);
-    DINode::DIFlags flags = DINode::DIFlags::FlagPublic;
-
-    DICompositeType* pDIStructTy = builder.createStructType(pFile, name, pFile, lineNum, size, alignment,
-        flags, nullptr, builder.getOrCreateArray(ElemTypes));
+    DIBuilder                  builder(*mpCurrentModule);
+    SmallVector<Metadata *, 8> ElemTypes;
+    DataLayout                 DL        = DataLayout(mpCurrentModule);
+    uint32_t                   size      = DL.getTypeAllocSizeInBits(pType);
+    uint32_t                   alignment = DL.getABITypeAlignment(pType);
+    DINode::DIFlags            flags     = DINode::DIFlags::FlagPublic;
+
+    DICompositeType *pDIStructTy = builder.createStructType(pFile,
+                                                            name,
+                                                            pFile,
+                                                            lineNum,
+                                                            size,
+                                                            alignment,
+                                                            flags,
+                                                            nullptr,
+                                                            builder.getOrCreateArray(ElemTypes));
  
      // Register mapping now to break loops (in case struct contains itself or pointers to itself)
      mDebugStructMap[pType] = pDIStructTy;
  
      uint32_t idx = 0;
-    for (auto& elem : pType->elements())
+    for (auto &elem : pType->elements())
      {
-        std::string name = members[idx].first;
-        uint32_t lineNum = members[idx].second;
-        size = DL.getTypeAllocSizeInBits(elem);
-        alignment = DL.getABITypeAlignment(elem);
-        uint32_t offset = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
-        llvm::DIType* pDebugTy = GetDebugType(elem);
-        ElemTypes.push_back(builder.createMemberType(pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
+        std::string name       = members[idx].first;
+        uint32_t    lineNum    = members[idx].second;
+        size                   = DL.getTypeAllocSizeInBits(elem);
+        alignment              = DL.getABITypeAlignment(elem);
+        uint32_t      offset   = DL.getStructLayout(pType)->getElementOffsetInBits(idx);
+        llvm::DIType *pDebugTy = GetDebugType(elem);
+        ElemTypes.push_back(builder.createMemberType(
+            pDIStructTy, name, pFile, lineNum, size, alignment, offset, flags, pDebugTy));
  
          idx++;
      }
@@ -245,54 +258,76 @@ DIType* JitManager::CreateDebugStructType(StructType* pType, const std::string&
      return pDIStructTy;
  }
  
-DIType* JitManager::GetDebugArrayType(Type* pTy)
+DIType *JitManager::GetDebugArrayType(Type *pTy)
  {
-    DIBuilder builder(*mpCurrentModule);
-    DataLayout DL = DataLayout(mpCurrentModule);
-    ArrayType* pArrayTy = cast<ArrayType>(pTy);
-    uint32_t size = DL.getTypeAllocSizeInBits(pArrayTy);
-    uint32_t alignment = DL.getABITypeAlignment(pArrayTy);
+    DIBuilder  builder(*mpCurrentModule);
+    DataLayout DL        = DataLayout(mpCurrentModule);
+    ArrayType *pArrayTy  = cast<ArrayType>(pTy);
+    uint32_t   size      = DL.getTypeAllocSizeInBits(pArrayTy);
+    uint32_t   alignment = DL.getABITypeAlignment(pArrayTy);
  
-    SmallVector<Metadata*, 8> Elems;
+    SmallVector<Metadata *, 8> Elems;
      Elems.push_back(builder.getOrCreateSubrange(0, pArrayTy->getNumElements()));
-    return builder.createArrayType(size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
+    return builder.createArrayType(
+        size, alignment, GetDebugType(pArrayTy->getElementType()), builder.getOrCreateArray(Elems));
  }
  
  // Create a DIType from llvm Type
-DIType* JitManager::GetDebugType(Type* pTy)
+DIType *JitManager::GetDebugType(Type *pTy)
  {
-    DIBuilder builder(*mpCurrentModule);
+    DIBuilder    builder(*mpCurrentModule);
      Type::TypeID id = pTy->getTypeID();
  
      switch (id)
      {
-    case Type::VoidTyID: return builder.createUnspecifiedType("void"); break;
-    case Type::HalfTyID: return builder.createBasicType("float16", 16, dwarf::DW_ATE_float); break;
-    case Type::FloatTyID: return builder.createBasicType("float", 32, dwarf::DW_ATE_float); break;
-    case Type::DoubleTyID: return builder.createBasicType("double", 64, dwarf::DW_ATE_float); break;
-    case Type::IntegerTyID: return GetDebugIntegerType(pTy); break;
-    case Type::StructTyID: return GetDebugStructType(pTy); break;
-    case Type::ArrayTyID: return GetDebugArrayType(pTy); break;
-    case Type::PointerTyID: return builder.createPointerType(GetDebugType(pTy->getPointerElementType()), 64, 64); break;
-    case Type::VectorTyID: return GetDebugVectorType(pTy); break;
-    case Type::FunctionTyID: return GetDebugFunctionType(pTy); break;
-    default: SWR_ASSERT(false, "Unimplemented llvm type");
+    case Type::VoidTyID:
+        return builder.createUnspecifiedType("void");
+        break;
+    case Type::HalfTyID:
+        return builder.createBasicType("float16", 16, dwarf::DW_ATE_float);
+        break;
+    case Type::FloatTyID:
+        return builder.createBasicType("float", 32, dwarf::DW_ATE_float);
+        break;
+    case Type::DoubleTyID:
+        return builder.createBasicType("double", 64, dwarf::DW_ATE_float);
+        break;
+    case Type::IntegerTyID:
+        return GetDebugIntegerType(pTy);
+        break;
+    case Type::StructTyID:
+        return GetDebugStructType(pTy);
+        break;
+    case Type::ArrayTyID:
+        return GetDebugArrayType(pTy);
+        break;
+    case Type::PointerTyID:
+        return builder.createPointerType(GetDebugType(pTy->getPointerElementType()), 64, 64);
+        break;
+    case Type::VectorTyID:
+        return GetDebugVectorType(pTy);
+        break;
+    case Type::FunctionTyID:
+        return GetDebugFunctionType(pTy);
+        break;
+    default:
+        SWR_ASSERT(false, "Unimplemented llvm type");
      }
      return nullptr;
  }
  
  // Create a DISubroutineType from an llvm FunctionType
-DIType* JitManager::GetDebugFunctionType(Type* pTy)
+DIType *JitManager::GetDebugFunctionType(Type *pTy)
  {
-    SmallVector<Metadata*, 8> ElemTypes;
-    FunctionType* pFuncTy = cast<FunctionType>(pTy);
-    DIBuilder builder(*mpCurrentModule);
+    SmallVector<Metadata *, 8> ElemTypes;
+    FunctionType *             pFuncTy = cast<FunctionType>(pTy);
+    DIBuilder                  builder(*mpCurrentModule);
  
      // Add result type
      ElemTypes.push_back(GetDebugType(pFuncTy->getReturnType()));
  
      // Add arguments
-    for (auto& param : pFuncTy->params())
+    for (auto &param : pFuncTy->params())
      {
          ElemTypes.push_back(GetDebugType(param));
      }
@@ -300,60 +335,74 @@ DIType* JitManager::GetDebugFunctionType(Type* pTy)
      return builder.createSubroutineType(builder.getOrCreateTypeArray(ElemTypes));
  }
  
-DIType* JitManager::GetDebugIntegerType(Type* pTy)
+DIType *JitManager::GetDebugIntegerType(Type *pTy)
  {
-    DIBuilder builder(*mpCurrentModule);
-    IntegerType* pIntTy = cast<IntegerType>(pTy);
+    DIBuilder    builder(*mpCurrentModule);
+    IntegerType *pIntTy = cast<IntegerType>(pTy);
      switch (pIntTy->getBitWidth())
      {
-    case 1: return builder.createBasicType("int1", 1, dwarf::DW_ATE_unsigned); break;
-    case 8: return builder.createBasicType("int8", 8, dwarf::DW_ATE_signed); break;
-    case 16: return builder.createBasicType("int16", 16, dwarf::DW_ATE_signed); break;
-    case 32: return builder.createBasicType("int", 32, dwarf::DW_ATE_signed); break;
-    case 64: return builder.createBasicType("int64", 64, dwarf::DW_ATE_signed); break;
-    case 128: return builder.createBasicType("int128", 128, dwarf::DW_ATE_signed); break;
-    default: SWR_ASSERT(false, "Unimplemented integer bit width");
+    case 1:
+        return builder.createBasicType("int1", 1, dwarf::DW_ATE_unsigned);
+        break;
+    case 8:
+        return builder.createBasicType("int8", 8, dwarf::DW_ATE_signed);
+        break;
+    case 16:
+        return builder.createBasicType("int16", 16, dwarf::DW_ATE_signed);
+        break;
+    case 32:
+        return builder.createBasicType("int", 32, dwarf::DW_ATE_signed);
+        break;
+    case 64:
+        return builder.createBasicType("int64", 64, dwarf::DW_ATE_signed);
+        break;
+    case 128:
+        return builder.createBasicType("int128", 128, dwarf::DW_ATE_signed);
+        break;
+    default:
+        SWR_ASSERT(false, "Unimplemented integer bit width");
      }
      return nullptr;
  }
  
-DIType* JitManager::GetDebugVectorType(Type* pTy)
+DIType *JitManager::GetDebugVectorType(Type *pTy)
  {
-    DIBuilder builder(*mpCurrentModule);
-    VectorType* pVecTy = cast<VectorType>(pTy);
-    DataLayout DL = DataLayout(mpCurrentModule);
-    uint32_t size = DL.getTypeAllocSizeInBits(pVecTy);
-    uint32_t alignment = DL.getABITypeAlignment(pVecTy);
-    SmallVector<Metadata*, 1> Elems;
+    DIBuilder                  builder(*mpCurrentModule);
+    VectorType *               pVecTy    = cast<VectorType>(pTy);
+    DataLayout                 DL        = DataLayout(mpCurrentModule);
+    uint32_t                   size      = DL.getTypeAllocSizeInBits(pVecTy);
+    uint32_t                   alignment = DL.getABITypeAlignment(pVecTy);
+    SmallVector<Metadata *, 1> Elems;
      Elems.push_back(builder.getOrCreateSubrange(0, pVecTy->getVectorNumElements()));
  
-    return builder.createVectorType(size, alignment, GetDebugType(pVecTy->getVectorElementType()), builder.getOrCreateArray(Elems));
-
+    return builder.createVectorType(size,
+                                    alignment,
+                                    GetDebugType(pVecTy->getVectorElementType()),
+                                    builder.getOrCreateArray(Elems));
  }
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Dump function x86 assembly to file.
  /// @note This should only be called after the module has been jitted to x86 and the
  ///       module will not be further accessed.
-void JitManager::DumpAsm(Function* pFunction, const char* fileName)
+void JitManager::DumpAsm(Function *pFunction, const char *fileName)
  {
      if (KNOB_DUMP_SHADER_IR)
      {
-
  #if defined(_WIN32)
          DWORD pid = GetCurrentProcessId();
-        char procname[MAX_PATH];
+        char  procname[MAX_PATH];
          GetModuleFileNameA(NULL, procname, MAX_PATH);
-        const char* pBaseName = strrchr(procname, '\\');
+        const char *      pBaseName = strrchr(procname, '\\');
          std::stringstream outDir;
          outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid << std::ends;
          CreateDirectoryPath(outDir.str().c_str());
  #endif
  
          std::error_code EC;
-        Module* pModule = pFunction->getParent();
-        const char *funcName = pFunction->getName().data();
-        char fName[256];
+        Module *        pModule  = pFunction->getParent();
+        const char *    funcName = pFunction->getName().data();
+        char            fName[256];
  #if defined(_WIN32)
          sprintf(fName, "%s\\%s.%s.asm", outDir.str().c_str(), funcName, fileName);
  #else
@@ -362,8 +411,8 @@ void JitManager::DumpAsm(Function* pFunction, const char* fileName)
  
          raw_fd_ostream filestream(fName, EC, llvm::sys::fs::F_None);
  
-        legacy::PassManager* pMPasses = new legacy::PassManager();
-        auto* pTarget = mpExec->getTargetMachine();
+        legacy::PassManager *pMPasses         = new legacy::PassManager();
+        auto *               pTarget          = mpExec->getTargetMachine();
          pTarget->Options.MCOptions.AsmVerbose = true;
          pTarget->addPassesToEmitFile(*pMPasses, filestream, TargetMachine::CGFT_AssemblyFile);
          pMPasses->run(*pModule);
@@ -376,9 +425,9 @@ std::string JitManager::GetOutputDir()
  {
  #if defined(_WIN32)
      DWORD pid = GetCurrentProcessId();
-    char procname[MAX_PATH];
+    char  procname[MAX_PATH];
      GetModuleFileNameA(NULL, procname, MAX_PATH);
-    const char* pBaseName = strrchr(procname, '\\');
+    const char *      pBaseName = strrchr(procname, '\\');
      std::stringstream outDir;
      outDir << JITTER_OUTPUT_DIR << pBaseName << "_" << pid;
      CreateDirectoryPath(outDir.str().c_str());
@@ -396,8 +445,8 @@ void JitManager::DumpToFile(Module *M, const char *fileName)
          std::string outDir = GetOutputDir();
  
          std::error_code EC;
-        const char *funcName = M->getName().data();
-        char fName[256];
+        const char *    funcName = M->getName().data();
+        char            fName[256];
  #if defined(_WIN32)
          sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
  #else
@@ -418,8 +467,8 @@ void JitManager::DumpToFile(Function *f, const char *fileName)
          std::string outDir = GetOutputDir();
  
          std::error_code EC;
-        const char *funcName = f->getName().data();
-        char fName[256];
+        const char *    funcName = f->getName().data();
+        char            fName[256];
  #if defined(_WIN32)
          sprintf(fName, "%s\\%s.%s.ll", outDir.c_str(), funcName, fileName);
  #else
@@ -436,34 +485,33 @@ void JitManager::DumpToFile(Function *f, const char *fileName)
          fd.flush();
  
          raw_fd_ostream fd_cfg(fName, EC, llvm::sys::fs::F_Text);
-        WriteGraph(fd_cfg, (const Function*)f);
+        WriteGraph(fd_cfg, (const Function *)f);
  
          fd_cfg.flush();
      }
  }
  
-extern "C"
-{
-    bool g_DllActive = true;
+extern "C" {
+bool g_DllActive = true;
  
-    //////////////////////////////////////////////////////////////////////////
-    /// @brief Create JIT context.
-    /// @param simdWidth - SIMD width to be used in generated program.
-    HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char* arch, const char* core)
-    {
-        return new JitManager(targetSimdWidth, arch, core);
-    }
+//////////////////////////////////////////////////////////////////////////
+/// @brief Create JIT context.
+/// @param simdWidth - SIMD width to be used in generated program.
+HANDLE JITCALL JitCreateContext(uint32_t targetSimdWidth, const char *arch, const char *core)
+{
+    return new JitManager(targetSimdWidth, arch, core);
+}
  
-    //////////////////////////////////////////////////////////////////////////
-    /// @brief Destroy JIT context.
-    void JITCALL JitDestroyContext(HANDLE hJitContext)
+//////////////////////////////////////////////////////////////////////////
+/// @brief Destroy JIT context.
+void JITCALL JitDestroyContext(HANDLE hJitContext)
+{
+    if (g_DllActive)
      {
-        if (g_DllActive)
-        {
-            delete reinterpret_cast<JitManager*>(hJitContext);
-        }
+        delete reinterpret_cast<JitManager *>(hJitContext);
      }
  }
+}
  
  //////////////////////////////////////////////////////////////////////////
  /// JitCache
@@ -474,31 +522,29 @@ extern "C"
  //////////////////////////////////////////////////////////////////////////
  struct JitCacheFileHeader
  {
-    void Init(
-        uint32_t llCRC,
-        uint32_t objCRC,
-        const std::string& moduleID,
-        const std::string& cpu,
-        uint32_t optLevel,
-        uint64_t objSize)
+    void Init(uint32_t           llCRC,
+              uint32_t           objCRC,
+              const std::string &moduleID,
+              const std::string &cpu,
+              uint32_t           optLevel,
+              uint64_t           objSize)
      {
          m_objSize = objSize;
-        m_llCRC = llCRC;
-        m_objCRC = objCRC;
+        m_llCRC   = llCRC;
+        m_objCRC  = objCRC;
          strncpy(m_ModuleID, moduleID.c_str(), JC_STR_MAX_LEN - 1);
          m_ModuleID[JC_STR_MAX_LEN - 1] = 0;
          strncpy(m_Cpu, cpu.c_str(), JC_STR_MAX_LEN - 1);
          m_Cpu[JC_STR_MAX_LEN - 1] = 0;
-        m_optLevel = optLevel;
+        m_optLevel                = optLevel;
      }
  
  
-    bool IsValid(uint32_t llCRC, const std::string& moduleID, const std::string& cpu, uint32_t optLevel)
+    bool
+    IsValid(uint32_t llCRC, const std::string &moduleID, const std::string &cpu, uint32_t optLevel)
      {
-        if ((m_MagicNumber != JC_MAGIC_NUMBER) ||
-            (m_llCRC != llCRC) ||
-            (m_platformKey != JC_PLATFORM_KEY) ||
-            (m_optLevel != optLevel))
+        if ((m_MagicNumber != JC_MAGIC_NUMBER) || (m_llCRC != llCRC) ||
+            (m_platformKey != JC_PLATFORM_KEY) || (m_optLevel != optLevel))
          {
              return false;
          }
@@ -522,27 +568,25 @@ struct JitCacheFileHeader
      uint64_t GetObjectCRC() const { return m_objCRC; }
  
  private:
-    static const uint64_t   JC_MAGIC_NUMBER = 0xfedcba9876543211ULL + 4;
-    static const size_t     JC_STR_MAX_LEN = 32;
-    static const uint32_t   JC_PLATFORM_KEY =
-        (LLVM_VERSION_MAJOR << 24)  |
-        (LLVM_VERSION_MINOR << 16)  |
-        (LLVM_VERSION_PATCH << 8)   |
-        ((sizeof(void*) > sizeof(uint32_t)) ? 1 : 0);
-
-    uint64_t m_MagicNumber = JC_MAGIC_NUMBER;
-    uint64_t m_objSize = 0;
-    uint32_t m_llCRC = 0;
-    uint32_t m_platformKey = JC_PLATFORM_KEY;
-    uint32_t m_objCRC = 0;
-    uint32_t m_optLevel = 0;
-    char m_ModuleID[JC_STR_MAX_LEN] = {};
-    char m_Cpu[JC_STR_MAX_LEN] = {};
+    static const uint64_t JC_MAGIC_NUMBER = 0xfedcba9876543211ULL + 4;
+    static const size_t   JC_STR_MAX_LEN  = 32;
+    static const uint32_t JC_PLATFORM_KEY = (LLVM_VERSION_MAJOR << 24) |
+                                            (LLVM_VERSION_MINOR << 16) | (LLVM_VERSION_PATCH << 8) |
+                                            ((sizeof(void *) > sizeof(uint32_t)) ? 1 : 0);
+
+    uint64_t m_MagicNumber              = JC_MAGIC_NUMBER;
+    uint64_t m_objSize                  = 0;
+    uint32_t m_llCRC                    = 0;
+    uint32_t m_platformKey              = JC_PLATFORM_KEY;
+    uint32_t m_objCRC                   = 0;
+    uint32_t m_optLevel                 = 0;
+    char     m_ModuleID[JC_STR_MAX_LEN] = {};
+    char     m_Cpu[JC_STR_MAX_LEN]      = {};
  };
  
-static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
+static inline uint32_t ComputeModuleCRC(const llvm::Module *M)
  {
-    std::string bitcodeBuffer;
+    std::string        bitcodeBuffer;
      raw_string_ostream bitcodeStream(bitcodeBuffer);
  
  #if LLVM_VERSION_MAJOR >= 7
@@ -550,7 +594,7 @@ static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
  #else
      llvm::WriteBitcodeToFile(M, bitcodeStream);
  #endif
-    //M->print(bitcodeStream, nullptr, false);
+    // M->print(bitcodeStream, nullptr, false);
  
      bitcodeStream.flush();
  
@@ -561,21 +605,24 @@ static inline uint32_t ComputeModuleCRC(const llvm::Module* M)
  JitCache::JitCache()
  {
  #if defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
-    if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0) {
+    if (strncmp(KNOB_JIT_CACHE_DIR.c_str(), "~/", 2) == 0)
+    {
          char *homedir;
-        if (!(homedir = getenv("HOME"))) {
+        if (!(homedir = getenv("HOME")))
+        {
              homedir = getpwuid(getuid())->pw_dir;
          }
          mCacheDir = homedir;
          mCacheDir += (KNOB_JIT_CACHE_DIR.c_str() + 1);
-    } else
+    }
+    else
  #endif
      {
          mCacheDir = KNOB_JIT_CACHE_DIR;
      }
  }
  
-int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
+int ExecUnhookedProcess(const std::string &CmdLine, std::string *pStdOut, std::string *pStdErr)
  {
      return ExecCmd(CmdLine, "", pStdOut, pStdErr);
  }
@@ -583,7 +630,7 @@ int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::s
  /// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
  void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj)
  {
-    const std::string& moduleID = M->getModuleIdentifier();
+    const std::string &moduleID = M->getModuleIdentifier();
      if (!moduleID.length())
      {
          return;
@@ -605,7 +652,7 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef
      objPath += JIT_OBJ_EXT;
  
      {
-        std::error_code err;
+        std::error_code      err;
          llvm::raw_fd_ostream fileObj(objPath.c_str(), err, llvm::sys::fs::F_None);
          fileObj << Obj.getBuffer();
          fileObj.flush();
@@ -613,14 +660,14 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef
  
  
      {
-        std::error_code err;
+        std::error_code      err;
          llvm::raw_fd_ostream fileObj(filePath.c_str(), err, llvm::sys::fs::F_None);
  
          uint32_t objcrc = ComputeCRC(0, Obj.getBufferStart(), Obj.getBufferSize());
  
          header.Init(mCurrentModuleCRC, objcrc, moduleID, mCpu, mOptLevel, Obj.getBufferSize());
  
-        fileObj.write((const char*)&header, sizeof(header));
+        fileObj.write((const char *)&header, sizeof(header));
          fileObj.flush();
      }
  }
@@ -628,10 +675,10 @@ void JitCache::notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef
  /// Returns a pointer to a newly allocated MemoryBuffer that contains the
  /// object which corresponds with Module M, or 0 if an object is not
  /// available.
-std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
+std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module *M)
  {
-    const std::string& moduleID = M->getModuleIdentifier();
-    mCurrentModuleCRC = ComputeModuleCRC(M);
+    const std::string &moduleID = M->getModuleIdentifier();
+    mCurrentModuleCRC           = ComputeModuleCRC(M);
  
      if (!moduleID.length())
      {
@@ -649,8 +696,8 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
      llvm::SmallString<MAX_PATH> objFilePath = filePath;
      objFilePath += JIT_OBJ_EXT;
  
-    FILE* fpObjIn = nullptr;
-    FILE* fpIn = fopen(filePath.c_str(), "rb");
+    FILE *fpObjIn = nullptr;
+    FILE *fpIn    = fopen(filePath.c_str(), "rb");
      if (!fpIn)
      {
          return nullptr;
@@ -681,7 +728,7 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
  #else
          pBuf = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(size_t(header.GetObjectSize()));
  #endif
-        if (!fread(const_cast<char*>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
+        if (!fread(const_cast<char *>(pBuf->getBufferStart()), header.GetObjectSize(), 1, fpObjIn))
          {
              pBuf = nullptr;
              break;
@@ -694,8 +741,7 @@ std::unique_ptr<llvm::MemoryBuffer> JitCache::getObject(const llvm::Module* M)
              break;
          }
  
-    }
-    while (0);
+    } while (0);
  
      fclose(fpIn);
  
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h

index 152776a65135318d5f9ecb0cb8cbdf3d050247bc..a5b6af91f06436251829466c75e65a24e8c5ca84 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file JitManager.h
-*
-* @brief JitManager contains the LLVM data structures used for JIT generation
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file JitManager.h
+ *
+ * @brief JitManager contains the LLVM data structures used for JIT generation
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "jit_pch.hpp"
@@ -37,7 +37,7 @@
  /// JitInstructionSet
  /// @brief Subclass of InstructionSet that allows users to override
  /// the reporting of support for certain ISA features.  This allows capping
-/// the jitted code to a certain feature level, e.g. jit AVX level code on 
+/// the jitted code to a certain feature level, e.g. jit AVX level code on
  /// a platform that supports AVX2.
  //////////////////////////////////////////////////////////////////////////
  class JitInstructionSet : public InstructionSet
@@ -47,22 +47,22 @@ public:
      {
          std::transform(isaRequest.begin(), isaRequest.end(), isaRequest.begin(), ::tolower);
  
-        if(isaRequest == "avx")
+        if (isaRequest == "avx")
          {
-            bForceAVX = true;
-            bForceAVX2 = false;
+            bForceAVX    = true;
+            bForceAVX2   = false;
              bForceAVX512 = false;
          }
-        else if(isaRequest == "avx2")
+        else if (isaRequest == "avx2")
          {
-            bForceAVX = false;
-            bForceAVX2 = true;
+            bForceAVX    = false;
+            bForceAVX2   = true;
              bForceAVX512 = false;
          }
-        else if(isaRequest == "avx512")
+        else if (isaRequest == "avx512")
          {
-            bForceAVX = false;
-            bForceAVX2 = false;
+            bForceAVX    = false;
+            bForceAVX2   = false;
              bForceAVX512 = true;
          }
      };
@@ -73,19 +73,16 @@ public:
      bool BMI2(void) { return bForceAVX ? 0 : InstructionSet::BMI2(); }
  
  private:
-    bool bForceAVX = false;
-    bool bForceAVX2 = false;
-    bool bForceAVX512 = false;
+    bool        bForceAVX    = false;
+    bool        bForceAVX2   = false;
+    bool        bForceAVX512 = false;
      std::string isaRequest;
  };
  
-
-
  struct JitLLVMContext : llvm::LLVMContext
  {
  };
  
-
  //////////////////////////////////////////////////////////////////////////
  /// JitCache
  //////////////////////////////////////////////////////////////////////////
@@ -97,18 +94,15 @@ public:
      JitCache();
      virtual ~JitCache() {}
  
-    void Init(
-        JitManager* pJitMgr,
-        const llvm::StringRef& cpu,
-        llvm::CodeGenOpt::Level level)
+    void Init(JitManager* pJitMgr, const llvm::StringRef& cpu, llvm::CodeGenOpt::Level level)
      {
-        mCpu = cpu.str();
-        mpJitMgr = pJitMgr;
+        mCpu      = cpu.str();
+        mpJitMgr  = pJitMgr;
          mOptLevel = level;
      }
  
      /// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
-    void notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj) override;
+    void notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj) override;
  
      /// Returns a pointer to a newly allocated MemoryBuffer that contains the
      /// object which corresponds with Module M, or 0 if an object is not
@@ -116,11 +110,11 @@ public:
      std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M) override;
  
  private:
-    std::string mCpu;
+    std::string                 mCpu;
      llvm::SmallString<MAX_PATH> mCacheDir;
-    uint32_t mCurrentModuleCRC = 0;
-    JitManager* mpJitMgr = nullptr;
-    llvm::CodeGenOpt::Level mOptLevel = llvm::CodeGenOpt::None;
+    uint32_t                    mCurrentModuleCRC = 0;
+    JitManager*                 mpJitMgr          = nullptr;
+    llvm::CodeGenOpt::Level     mOptLevel         = llvm::CodeGenOpt::None;
  };
  
  //////////////////////////////////////////////////////////////////////////
@@ -131,33 +125,33 @@ struct JitManager
      JitManager(uint32_t w, const char* arch, const char* core);
      ~JitManager(){};
  
-    JitLLVMContext          mContext;   ///< LLVM compiler
-    llvm::IRBuilder<>       mBuilder;   ///< LLVM IR Builder
-    llvm::ExecutionEngine*  mpExec;
-    JitCache                mCache;
+    JitLLVMContext         mContext; ///< LLVM compiler
+    llvm::IRBuilder<>      mBuilder; ///< LLVM IR Builder
+    llvm::ExecutionEngine* mpExec;
+    JitCache               mCache;
  
      // Need to be rebuilt after a JIT and before building new IR
-    llvm::Module*           mpCurrentModule;
-    bool                    mIsModuleFinalized;
-    uint32_t                mJitNumber;
+    llvm::Module* mpCurrentModule;
+    bool          mIsModuleFinalized;
+    uint32_t      mJitNumber;
  
-    uint32_t                mVWidth;
+    uint32_t mVWidth;
  
-    bool                    mUsingAVX512 = false;
+    bool mUsingAVX512 = false;
  
      // fetch shader types
-    llvm::FunctionType*     mFetchShaderTy;
+    llvm::FunctionType* mFetchShaderTy;
  
-    JitInstructionSet       mArch;
+    JitInstructionSet mArch;
  
      // Debugging support
      std::unordered_map<llvm::StructType*, llvm::DIType*> mDebugStructMap;
  
      void SetupNewModule();
  
-    void DumpAsm(llvm::Function* pFunction, const char* fileName);
-    static void DumpToFile(llvm::Function *f, const char *fileName);
-    static void DumpToFile(llvm::Module *M, const char *fileName);
+    void               DumpAsm(llvm::Function* pFunction, const char* fileName);
+    static void        DumpToFile(llvm::Function* f, const char* fileName);
+    static void        DumpToFile(llvm::Module* M, const char* fileName);
      static std::string GetOutputDir();
  
      // Debugging support methods
@@ -177,6 +171,10 @@ struct JitManager
          return mDebugStructMap[pStructTy];
      }
  
-    llvm::DIType* CreateDebugStructType(llvm::StructType* pType, const std::string& name, llvm::DIFile* pFile, uint32_t lineNum,
-        const std::vector<std::pair<std::string, uint32_t>>& members);
+    llvm::DIType*
+    CreateDebugStructType(llvm::StructType*                                    pType,
+                          const std::string&                                   name,
+                          llvm::DIFile*                                        pFile,
+                          uint32_t                                             lineNum,
+                          const std::vector<std::pair<std::string, uint32_t>>& members);
  };
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp

index 20f2e42eec91215bfb71779d71fa2147fcac8a99..f89c502db7d789d5ab68c25c48af4454394b8631 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file blend_jit.cpp
-*
-* @brief Implementation of the blend jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file blend_jit.cpp
+ *
+ * @brief Implementation of the blend jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "jit_pch.hpp"
  #include "builder.h"
  #include "jit_api.h"
@@ -47,8 +47,13 @@ struct BlendJit : public Builder
  {
      BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
  
-    template<bool Color, bool Alpha>
-    void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4])
+    template <bool Color, bool Alpha>
+    void GenerateBlendFactor(SWR_BLEND_FACTOR factor,
+                             Value*           constColor[4],
+                             Value*           src[4],
+                             Value*           src1[4],
+                             Value*           dst[4],
+                             Value*           result[4])
      {
          Value* out[4];
  
@@ -77,7 +82,7 @@ struct BlendJit : public Builder
              break;
          case BLENDFACTOR_SRC_ALPHA_SATURATE:
              out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
-            out[3] = VIMMED1(1.0f);
+            out[3]                   = VIMMED1(1.0f);
              break;
          case BLENDFACTOR_CONST_COLOR:
              out[0] = constColor[0];
@@ -158,7 +163,7 @@ struct BlendJit : public Builder
      void Clamp(SWR_FORMAT format, Value* src[4])
      {
          const SWR_FORMAT_INFO& info = GetFormatInfo(format);
-        SWR_TYPE type = info.type[0];
+        SWR_TYPE               type = info.type[0];
  
          switch (type)
          {
@@ -179,7 +184,8 @@ struct BlendJit : public Builder
              src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
              break;
  
-        case SWR_TYPE_UNKNOWN: SWR_INVALID("Unsupport format type: %d", type);
+        case SWR_TYPE_UNKNOWN:
+            SWR_INVALID("Unsupport format type: %d", type);
          }
      }
  
@@ -187,7 +193,7 @@ struct BlendJit : public Builder
      {
          const SWR_FORMAT_INFO& info = GetFormatInfo(format);
  
-        bool valid[] = { false, false, false, false };
+        bool valid[] = {false, false, false, false};
          for (uint32_t c = 0; c < info.numComps; ++c)
          {
              valid[info.swizzle[c]] = true;
@@ -210,7 +216,8 @@ struct BlendJit : public Builder
          {
              if (info.type[c] == SWR_TYPE_UNUSED)
              {
-                src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
+                src[info.swizzle[c]] =
+                    BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
              }
          }
      }
@@ -223,22 +230,28 @@ struct BlendJit : public Builder
              if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED)
              {
                  uint32_t swizComp = info.swizzle[c];
-                float factor = (float)((1 << info.bpc[c]) - 1);
+                float    factor   = (float)((1 << info.bpc[c]) - 1);
                  switch (info.type[c])
                  {
                  case SWR_TYPE_UNORM:
                      src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
                      src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
-                    src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor));
+                    src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f / factor));
                      break;
-                default: SWR_INVALID("Unsupported format type: %d", info.type[c]);
+                default:
+                    SWR_INVALID("Unsupported format type: %d", info.type[c]);
                  }
              }
          }
      }
  
-    template<bool Color, bool Alpha>
-    void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4])
+    template <bool Color, bool Alpha>
+    void BlendFunc(SWR_BLEND_OP blendOp,
+                   Value*       src[4],
+                   Value*       srcFactor[4],
+                   Value*       dst[4],
+                   Value*       dstFactor[4],
+                   Value*       result[4])
      {
          Value* out[4];
          Value* srcBlend[4];
@@ -308,7 +321,7 @@ struct BlendJit : public Builder
      void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
      {
          // Op: (s == PS output, d = RT contents)
-        switch(logicOp)
+        switch (logicOp)
          {
          case LOGICOP_CLEAR:
              result[0] = VIMMED1(0);
@@ -443,32 +456,49 @@ struct BlendJit : public Builder
          }
      }
  
-    void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
+    void
+    AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
      {
          // load uint32_t reference
-        Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference }));
-        
+        Value* pRef = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_alphaTestReference}));
+
          // load alpha
-        Value* pAlpha = LOAD(ppAlpha, { 0, 0 });
+        Value* pAlpha = LOAD(ppAlpha, {0, 0});
  
          Value* pTest = nullptr;
          if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
          {
              // convert float alpha to unorm8
              Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
-            pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
+            pAlphaU8        = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
  
              // compare
              switch (state.alphaTestFunction)
              {
-            case ZFUNC_ALWAYS:  pTest = VIMMED1(true); break;
-            case ZFUNC_NEVER:   pTest = VIMMED1(false); break;
-            case ZFUNC_LT:      pTest = ICMP_ULT(pAlphaU8, pRef); break;
-            case ZFUNC_EQ:      pTest = ICMP_EQ(pAlphaU8, pRef); break;
-            case ZFUNC_LE:      pTest = ICMP_ULE(pAlphaU8, pRef); break;
-            case ZFUNC_GT:      pTest = ICMP_UGT(pAlphaU8, pRef); break;
-            case ZFUNC_NE:      pTest = ICMP_NE(pAlphaU8, pRef); break;
-            case ZFUNC_GE:      pTest = ICMP_UGE(pAlphaU8, pRef); break;
+            case ZFUNC_ALWAYS:
+                pTest = VIMMED1(true);
+                break;
+            case ZFUNC_NEVER:
+                pTest = VIMMED1(false);
+                break;
+            case ZFUNC_LT:
+                pTest = ICMP_ULT(pAlphaU8, pRef);
+                break;
+            case ZFUNC_EQ:
+                pTest = ICMP_EQ(pAlphaU8, pRef);
+                break;
+            case ZFUNC_LE:
+                pTest = ICMP_ULE(pAlphaU8, pRef);
+                break;
+            case ZFUNC_GT:
+                pTest = ICMP_UGT(pAlphaU8, pRef);
+                break;
+            case ZFUNC_NE:
+                pTest = ICMP_NE(pAlphaU8, pRef);
+                break;
+            case ZFUNC_GE:
+                pTest = ICMP_UGE(pAlphaU8, pRef);
+                break;
              default:
                  SWR_INVALID("Invalid alpha test function");
                  break;
@@ -482,14 +512,30 @@ struct BlendJit : public Builder
              // compare
              switch (state.alphaTestFunction)
              {
-            case ZFUNC_ALWAYS:  pTest = VIMMED1(true); break;
-            case ZFUNC_NEVER:   pTest = VIMMED1(false); break;
-            case ZFUNC_LT:      pTest = FCMP_OLT(pAlpha, pRef); break;
-            case ZFUNC_EQ:      pTest = FCMP_OEQ(pAlpha, pRef); break;
-            case ZFUNC_LE:      pTest = FCMP_OLE(pAlpha, pRef); break;
-            case ZFUNC_GT:      pTest = FCMP_OGT(pAlpha, pRef); break;
-            case ZFUNC_NE:      pTest = FCMP_ONE(pAlpha, pRef); break;
-            case ZFUNC_GE:      pTest = FCMP_OGE(pAlpha, pRef); break;
+            case ZFUNC_ALWAYS:
+                pTest = VIMMED1(true);
+                break;
+            case ZFUNC_NEVER:
+                pTest = VIMMED1(false);
+                break;
+            case ZFUNC_LT:
+                pTest = FCMP_OLT(pAlpha, pRef);
+                break;
+            case ZFUNC_EQ:
+                pTest = FCMP_OEQ(pAlpha, pRef);
+                break;
+            case ZFUNC_LE:
+                pTest = FCMP_OLE(pAlpha, pRef);
+                break;
+            case ZFUNC_GT:
+                pTest = FCMP_OGT(pAlpha, pRef);
+                break;
+            case ZFUNC_NE:
+                pTest = FCMP_ONE(pAlpha, pRef);
+                break;
+            case ZFUNC_GE:
+                pTest = FCMP_OGE(pAlpha, pRef);
+                break;
              default:
                  SWR_INVALID("Invalid alpha test function");
                  break;
@@ -514,22 +560,24 @@ struct BlendJit : public Builder
  
      Function* Create(const BLEND_COMPILE_STATE& state)
      {
-        std::stringstream fnName("BLND_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
+        std::stringstream fnName("BLND_",
+                                 std::ios_base::in | std::ios_base::out | std::ios_base::ate);
          fnName << ComputeCRC(0, &state, sizeof(state));
  
          // blend function signature
-        //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*);
+        // typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*);
  
          std::vector<Type*> args{
              PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0) // SWR_BLEND_CONTEXT*
          };
  
-        //std::vector<Type*> args{
+        // std::vector<Type*> args{
          //    PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0), // SWR_BLEND_CONTEXT*
          //};
  
-        FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
-        Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
+        FunctionType* fTy       = FunctionType::get(IRB()->getVoidTy(), args, false);
+        Function*     blendFunc = Function::Create(
+            fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
          blendFunc->getParent()->setModuleIdentifier(blendFunc->getName());
  
          BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
@@ -537,29 +585,30 @@ struct BlendJit : public Builder
          IRB()->SetInsertPoint(entry);
  
          // arguments
-        auto argitr = blendFunc->arg_begin();
+        auto   argitr        = blendFunc->arg_begin();
          Value* pBlendContext = &*argitr++;
          pBlendContext->setName("pBlendContext");
-        Value* pBlendState = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pBlendState });
+        Value* pBlendState = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pBlendState});
          pBlendState->setName("pBlendState");
-        Value* pSrc = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_src });
+        Value* pSrc = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src});
          pSrc->setName("src");
-        Value* pSrc1 = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_src1 });
+        Value* pSrc1 = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src1});
          pSrc1->setName("src1");
-        Value* pSrc0Alpha = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_src0alpha });
+        Value* pSrc0Alpha = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_src0alpha});
          pSrc0Alpha->setName("src0alpha");
-        Value* sampleNum = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_sampleNum });
+        Value* sampleNum = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_sampleNum});
          sampleNum->setName("sampleNum");
-        Value* pDst = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pDst });
+        Value* pDst = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pDst});
          pDst->setName("pDst");
-        Value* pResult = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_result });
+        Value* pResult = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_result});
          pResult->setName("result");
-        Value* ppoMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_oMask });
+        Value* ppoMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_oMask});
          ppoMask->setName("ppoMask");
-        Value* ppMask = LOAD(pBlendContext, { 0, SWR_BLEND_CONTEXT_pMask });
+        Value* ppMask = LOAD(pBlendContext, {0, SWR_BLEND_CONTEXT_pMask});
          ppMask->setName("pMask");
  
-        static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
+        static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT,
+                      "Unsupported hot tile format");
          Value* dst[4];
          Value* constantColor[4];
          Value* src[4];
@@ -568,44 +617,44 @@ struct BlendJit : public Builder
          for (uint32_t i = 0; i < 4; ++i)
          {
              // load hot tile
-            dst[i] = LOAD(pDst, { 0, i });
+            dst[i] = LOAD(pDst, {0, i});
  
              // load constant color
-            constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i }));
-        
+            constantColor[i] = VBROADCAST(LOAD(pBlendState, {0, SWR_BLEND_STATE_constantColor, i}));
+
              // load src
-            src[i] = LOAD(pSrc, { 0, i });
+            src[i] = LOAD(pSrc, {0, i});
  
              // load src1
-            src1[i] = LOAD(pSrc1, { 0, i });
+            src1[i] = LOAD(pSrc1, {0, i});
          }
          Value* currentSampleMask = VIMMED1(-1);
          if (state.desc.alphaToCoverageEnable)
          {
-            Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
-            uint32_t bits = (1 << state.desc.numSamples) - 1;
-            currentSampleMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
-            currentSampleMask = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty);
+            Value*   pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
+            uint32_t bits        = (1 << state.desc.numSamples) - 1;
+            currentSampleMask    = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
+            currentSampleMask    = FP_TO_SI(FADD(currentSampleMask, VIMMED1(0.5f)), mSimdInt32Ty);
          }
  
          // alpha test
          if (state.desc.alphaTestEnable)
          {
              // Gather for archrast stats
-            STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+            STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
              AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
          }
          else
          {
              // Gather for archrast stats
-            STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaTested });
+            STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaTested});
          }
  
          // color blend
          if (state.blendState.blendEnable)
          {
              // Gather for archrast stats
-            STORE(C(1), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+            STORE(C(1), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
  
              // clamp sources
              Clamp(state.format, src);
@@ -635,40 +684,57 @@ struct BlendJit : public Builder
              Value* dstFactor[4];
              if (state.desc.independentAlphaBlendEnable)
              {
-                GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
-                GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor);
-
-                GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
-                GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor);
-
-                BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
-                BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
+                GenerateBlendFactor<true, false>(
+                    state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
+                GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor,
+                                                 constantColor,
+                                                 src,
+                                                 src1,
+                                                 dst,
+                                                 srcFactor);
+
+                GenerateBlendFactor<true, false>(
+                    state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
+                GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor,
+                                                 constantColor,
+                                                 src,
+                                                 src1,
+                                                 dst,
+                                                 dstFactor);
+
+                BlendFunc<true, false>(
+                    state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+                BlendFunc<false, true>(
+                    state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
              }
              else
              {
-                GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
-                GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
+                GenerateBlendFactor<true, true>(
+                    state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
+                GenerateBlendFactor<true, true>(
+                    state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
  
-                BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
+                BlendFunc<true, true>(
+                    state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
              }
  
              // store results out
              for (uint32_t i = 0; i < 4; ++i)
              {
-                STORE(result[i], pResult, { 0, i });
+                STORE(result[i], pResult, {0, i});
              }
          }
          else
          {
              // Gather for archrast stats
-            STORE(C(0), pBlendContext, { 0, SWR_BLEND_CONTEXT_isAlphaBlended });
+            STORE(C(0), pBlendContext, {0, SWR_BLEND_CONTEXT_isAlphaBlended});
          }
-        
-        if(state.blendState.logicOpEnable)
+
+        if (state.blendState.logicOpEnable)
          {
              const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
-            Value* vMask[4];
-            float scale[4];
+            Value*                 vMask[4];
+            float                  scale[4];
  
              if (!state.blendState.blendEnable)
              {
@@ -676,7 +742,7 @@ struct BlendJit : public Builder
                  Clamp(state.format, dst);
              }
  
-            for(uint32_t i = 0; i < 4; i++)
+            for (uint32_t i = 0; i < 4; i++)
              {
                  if (info.type[i] == SWR_TYPE_UNUSED)
                  {
@@ -713,20 +779,12 @@ struct BlendJit : public Builder
                      dst[i] = BITCAST(dst[i], mSimdInt32Ty);
                      break;
                  case SWR_TYPE_SNORM:
-                    src[i] = FP_TO_SI(
-                        FMUL(src[i], VIMMED1(scale[i])),
-                        mSimdInt32Ty);
-                    dst[i] = FP_TO_SI(
-                        FMUL(dst[i], VIMMED1(scale[i])),
-                        mSimdInt32Ty);
+                    src[i] = FP_TO_SI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
+                    dst[i] = FP_TO_SI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
                      break;
                  case SWR_TYPE_UNORM:
-                    src[i] = FP_TO_UI(
-                        FMUL(src[i], VIMMED1(scale[i])),
-                        mSimdInt32Ty);
-                    dst[i] = FP_TO_UI(
-                        FMUL(dst[i], VIMMED1(scale[i])),
-                        mSimdInt32Ty);
+                    src[i] = FP_TO_UI(FMUL(src[i], VIMMED1(scale[i])), mSimdInt32Ty);
+                    dst[i] = FP_TO_UI(FMUL(dst[i], VIMMED1(scale[i])), mSimdInt32Ty);
                      break;
                  }
              }
@@ -734,7 +792,7 @@ struct BlendJit : public Builder
              LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
  
              // store results out
-            for(uint32_t i = 0; i < 4; ++i)
+            for (uint32_t i = 0; i < 4; ++i)
              {
                  if (info.type[i] == SWR_TYPE_UNUSED)
                  {
@@ -761,12 +819,10 @@ struct BlendJit : public Builder
                  case SWR_TYPE_SNORM:
                      result[i] = SHL(result[i], C(32 - info.bpc[i]));
                      result[i] = ASHR(result[i], C(32 - info.bpc[i]));
-                    result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty),
-                                     VIMMED1(1.0f / scale[i]));
+                    result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
                      break;
                  case SWR_TYPE_UNORM:
-                    result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty),
-                                     VIMMED1(1.0f / scale[i]));
+                    result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty), VIMMED1(1.0f / scale[i]));
                      break;
                  }
  
@@ -774,27 +830,27 @@ struct BlendJit : public Builder
              }
          }
  
-        if(state.desc.oMaskEnable)
+        if (state.desc.oMaskEnable)
          {
              assert(!(state.desc.alphaToCoverageEnable));
              // load current mask
-            Value* oMask = LOAD(ppoMask);
+            Value* oMask      = LOAD(ppoMask);
              currentSampleMask = AND(oMask, currentSampleMask);
          }
  
-        if(state.desc.sampleMaskEnable)
+        if (state.desc.sampleMaskEnable)
          {
-            Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask});
+            Value* sampleMask = LOAD(pBlendState, {0, SWR_BLEND_STATE_sampleMask});
              currentSampleMask = AND(VBROADCAST(sampleMask), currentSampleMask);
          }
  
-        if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
-           state.desc.oMaskEnable)
+        if (state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
+            state.desc.oMaskEnable)
          {
              // load coverage mask and mask off any lanes with no samples
-            Value* pMask = LOAD(ppMask);
+            Value* pMask        = LOAD(ppMask);
              Value* sampleMasked = SHL(C(1), sampleNum);
-            currentSampleMask = AND(currentSampleMask, VBROADCAST(sampleMasked));
+            currentSampleMask   = AND(currentSampleMask, VBROADCAST(sampleMasked));
              currentSampleMask = S_EXT(ICMP_UGT(currentSampleMask, VBROADCAST(C(0))), mSimdInt32Ty);
              Value* outputMask = AND(pMask, currentSampleMask);
              // store new mask
@@ -836,11 +892,12 @@ struct BlendJit : public Builder
  /// @return PFN_FETCH_FUNC - pointer to fetch code
  PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
  {
-    const llvm::Function *func = (const llvm::Function*)hFunc;
-    JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
-    PFN_BLEND_JIT_FUNC pfnBlend;
+    const llvm::Function* func    = (const llvm::Function*)hFunc;
+    JitManager*           pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
+    PFN_BLEND_JIT_FUNC    pfnBlend;
      pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
-    // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
+    // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
+    // add new IR to the module
      pJitMgr->mIsModuleFinalized = true;
  
      return pfnBlend;
@@ -850,14 +907,15 @@ PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
  /// @brief JIT compiles blend shader
  /// @param hJitMgr - JitManager handle
  /// @param state   - blend state to build function from
-extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state)
+extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE                     hJitMgr,
+                                                      const BLEND_COMPILE_STATE& state)
  {
      JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
  
      pJitMgr->SetupNewModule();
  
      BlendJit theJit(pJitMgr);
-    HANDLE hFunc = theJit.Create(state);
+    HANDLE   hFunc = theJit.Create(state);
  
      return JitBlendFunc(hJitMgr, hFunc);
  }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h

index ddb7374d406d0f915b6ea7e11fd0eae88f7215e9..3e78054ecedb8bf0f08086764202151c948112d9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file blend_jit.h
-*
-* @brief Definition of the blend jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file blend_jit.h
+ *
+ * @brief Definition of the blend jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "common/formats.h"
@@ -34,15 +34,15 @@
  
  struct RENDER_TARGET_BLEND_COMPILE_STATE
  {
-    bool blendEnable;
-    bool logicOpEnable;
+    bool             blendEnable;
+    bool             logicOpEnable;
      SWR_BLEND_FACTOR sourceAlphaBlendFactor;
      SWR_BLEND_FACTOR destAlphaBlendFactor;
      SWR_BLEND_FACTOR sourceBlendFactor;
      SWR_BLEND_FACTOR destBlendFactor;
-    SWR_BLEND_OP colorBlendFunc;
-    SWR_BLEND_OP alphaBlendFunc;
-    SWR_LOGIC_OP logicOpFunc;
+    SWR_BLEND_OP     colorBlendFunc;
+    SWR_BLEND_OP     alphaBlendFunc;
+    SWR_LOGIC_OP     logicOpFunc;
  };
  
  enum ALPHA_TEST_FORMAT
@@ -60,14 +60,14 @@ struct BLEND_DESC
      {
          struct
          {
-            uint32_t            alphaTestEnable: 1;
-            uint32_t            independentAlphaBlendEnable: 1;
-            uint32_t            alphaToCoverageEnable: 1;
-            uint32_t            oMaskEnable:1;
-            uint32_t            inputCoverageEnable:1;
-            uint32_t            sampleMaskEnable:1;
-            uint32_t            numSamples:5;
-            uint32_t            _reserved : 21;
+            uint32_t alphaTestEnable : 1;
+            uint32_t independentAlphaBlendEnable : 1;
+            uint32_t alphaToCoverageEnable : 1;
+            uint32_t oMaskEnable : 1;
+            uint32_t inputCoverageEnable : 1;
+            uint32_t sampleMaskEnable : 1;
+            uint32_t numSamples : 5;
+            uint32_t _reserved : 21;
          };
          uint32_t bits;
      };
@@ -78,11 +78,11 @@ struct BLEND_DESC
  //////////////////////////////////////////////////////////////////////////
  struct BLEND_COMPILE_STATE
  {
-    SWR_FORMAT format;          // format of render target being blended
+    SWR_FORMAT                        format; // format of render target being blended
      RENDER_TARGET_BLEND_COMPILE_STATE blendState;
-    BLEND_DESC desc;
+    BLEND_DESC                        desc;
  
-    SWR_ZFUNCTION alphaTestFunction;
+    SWR_ZFUNCTION     alphaTestFunction;
      ALPHA_TEST_FORMAT alphaTestFormat;
  
      bool operator==(const BLEND_COMPILE_STATE& other) const
@@ -95,18 +95,18 @@ struct BLEND_COMPILE_STATE
      {
          if (!desc.alphaTestEnable)
          {
-            alphaTestFormat = (ALPHA_TEST_FORMAT)0;
+            alphaTestFormat   = (ALPHA_TEST_FORMAT)0;
              alphaTestFunction = (SWR_ZFUNCTION)0;
          }
  
          if (!blendState.blendEnable)
          {
              blendState.sourceAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
-            blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
-            blendState.sourceBlendFactor = (SWR_BLEND_FACTOR)0;
-            blendState.destBlendFactor = (SWR_BLEND_FACTOR)0;
-            blendState.colorBlendFunc = (SWR_BLEND_OP)0;
-            blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
+            blendState.destAlphaBlendFactor   = (SWR_BLEND_FACTOR)0;
+            blendState.sourceBlendFactor      = (SWR_BLEND_FACTOR)0;
+            blendState.destBlendFactor        = (SWR_BLEND_FACTOR)0;
+            blendState.colorBlendFunc         = (SWR_BLEND_OP)0;
+            blendState.alphaBlendFunc         = (SWR_BLEND_OP)0;
          }
  
          if (!blendState.logicOpEnable)
@@ -122,8 +122,8 @@ struct BLEND_COMPILE_STATE
          if (!desc.independentAlphaBlendEnable)
          {
              blendState.sourceAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
-            blendState.destAlphaBlendFactor = (SWR_BLEND_FACTOR)0;
-            blendState.alphaBlendFunc = (SWR_BLEND_OP)0;
+            blendState.destAlphaBlendFactor   = (SWR_BLEND_FACTOR)0;
+            blendState.alphaBlendFunc         = (SWR_BLEND_OP)0;
          }
      }
  };
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp

index 4b06aaa3ab17f1248926e720e49bb4e937c949f4..ef95e0103f8e0881013cc21a2b74aec761d43fe0 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
@@ -38,7 +38,7 @@ namespace SwrJit
      //////////////////////////////////////////////////////////////////////////
      /// @brief Contructor for Builder.
      /// @param pJitMgr - JitManager which contains modules, function passes, etc.
-    Builder::Builder(JitManager *pJitMgr) : mpJitMgr(pJitMgr), mpPrivateContext(nullptr)
+    Builder::Builder(JitManager* pJitMgr) : mpJitMgr(pJitMgr), mpPrivateContext(nullptr)
      {
          mVWidth   = pJitMgr->mVWidth;
          mVWidth16 = 16;
@@ -79,7 +79,7 @@ namespace SwrJit
  
          mSimd32Int8Ty = VectorType::get(mInt8Ty, 32);
  
-        if (sizeof(uint32_t *) == 4)
+        if (sizeof(uint32_t*) == 4)
          {
              mIntPtrTy       = mInt32Ty;
              mSimdIntPtrTy   = mSimdInt32Ty;
@@ -87,7 +87,7 @@ namespace SwrJit
          }
          else
          {
-            SWR_ASSERT(sizeof(uint32_t *) == 8);
+            SWR_ASSERT(sizeof(uint32_t*) == 8);
  
              mIntPtrTy       = mInt64Ty;
              mSimdIntPtrTy   = mSimdInt64Ty;
@@ -111,38 +111,38 @@ namespace SwrJit
      }
  
      /// @brief Mark this alloca as temporary to avoid hoisting later on
-    void Builder::SetTempAlloca(Value *inst)
+    void Builder::SetTempAlloca(Value* inst)
      {
-        AllocaInst *pAlloca = dyn_cast<AllocaInst>(inst);
+        AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
          SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
-        MDNode *N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, "is_temp_alloca"));
+        MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, "is_temp_alloca"));
          pAlloca->setMetadata("is_temp_alloca", N);
      }
  
-    bool Builder::IsTempAlloca(Value *inst)
+    bool Builder::IsTempAlloca(Value* inst)
      {
-        AllocaInst *pAlloca = dyn_cast<AllocaInst>(inst);
+        AllocaInst* pAlloca = dyn_cast<AllocaInst>(inst);
          SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
  
          return (pAlloca->getMetadata("is_temp_alloca") != nullptr);
      }
  
      // Returns true if able to find a call instruction to mark
-    bool Builder::SetNamedMetaDataOnCallInstr(Instruction *inst, StringRef mdName)
+    bool Builder::SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName)
      {
-        CallInst *pCallInstr = dyn_cast<CallInst>(inst);
+        CallInst* pCallInstr = dyn_cast<CallInst>(inst);
          if (pCallInstr)
          {
-            MDNode *N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, mdName));
+            MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, mdName));
              pCallInstr->setMetadata(mdName, N);
              return true;
          }
          else
          {
              // Follow use def chain back up
-            for (Use &u : inst->operands())
+            for (Use& u : inst->operands())
              {
-                Instruction *srcInst = dyn_cast<Instruction>(u.get());
+                Instruction* srcInst = dyn_cast<Instruction>(u.get());
                  if (srcInst)
                  {
                      if (SetNamedMetaDataOnCallInstr(srcInst, mdName))
@@ -156,10 +156,9 @@ namespace SwrJit
          return false;
      }
  
-    bool Builder::HasNamedMetaDataOnCallInstr(Instruction *inst,
-                                              StringRef    mdName)
+    bool Builder::HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName)
      {
-        CallInst *pCallInstr = dyn_cast<CallInst>(inst);
+        CallInst* pCallInstr = dyn_cast<CallInst>(inst);
  
          if (!pCallInstr)
          {
@@ -171,7 +170,7 @@ namespace SwrJit
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Packetizes the type. Assumes SOA conversion.
-    Type *Builder::GetVectorType(Type *pType)
+    Type* Builder::GetVectorType(Type* pType)
      {
          if (pType->isVectorTy())
          {
@@ -182,24 +181,24 @@ namespace SwrJit
          if (pType->isArrayTy())
          {
              uint32_t arraySize     = pType->getArrayNumElements();
-            Type *   pArrayType    = pType->getArrayElementType();
-            Type *   pVecArrayType = GetVectorType(pArrayType);
-            Type *   pVecType      = ArrayType::get(pVecArrayType, arraySize);
+            Type*    pArrayType    = pType->getArrayElementType();
+            Type*    pVecArrayType = GetVectorType(pArrayType);
+            Type*    pVecType      = ArrayType::get(pVecArrayType, arraySize);
              return pVecType;
          }
  
          // {float,int} should packetize to {<8 x float>, <8 x int>}
          if (pType->isAggregateType())
          {
-            uint32_t               numElems = pType->getStructNumElements();
-            SmallVector<Type *, 8> vecTypes;
+            uint32_t              numElems = pType->getStructNumElements();
+            SmallVector<Type*, 8> vecTypes;
              for (uint32_t i = 0; i < numElems; ++i)
              {
-                Type *pElemType    = pType->getStructElementType(i);
-                Type *pVecElemType = GetVectorType(pElemType);
+                Type* pElemType    = pType->getStructElementType(i);
+                Type* pVecElemType = GetVectorType(pElemType);
                  vecTypes.push_back(pVecElemType);
              }
-            Type *pVecType = StructType::get(JM()->mContext, vecTypes);
+            Type* pVecType = StructType::get(JM()->mContext, vecTypes);
              return pVecType;
          }
  
@@ -211,7 +210,7 @@ namespace SwrJit
          }
  
          // <ty> should packetize to <8 x <ty>>
-        Type *vecType = VectorType::get(pType, JM()->mVWidth);
+        Type* vecType = VectorType::get(pType, JM()->mVWidth);
          return vecType;
      }
-}
+} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h b/src/gallium/drivers/swr/rasterizer/jitter/builder.h

index c49d07e056cb3b3e56a2cffc56f7383267f1da48..a047f2a065fef42ca5f6991386d8bca9d3202e18 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file builder.h
-* 
-* @brief Includes all the builder related functionality
-* 
-* Notes:
-* 
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder.h
+ *
+ * @brief Includes all the builder related functionality
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "JitManager.h"
@@ -37,98 +37,99 @@ namespace SwrJit
      ///@todo Move this to better place
      enum SHADER_STATS_COUNTER_TYPE
      {
-        STATS_INST_EXECUTED = 0,
-        STATS_SAMPLE_EXECUTED = 1,
-        STATS_SAMPLE_L_EXECUTED = 2,
-        STATS_SAMPLE_B_EXECUTED = 3,
-        STATS_SAMPLE_C_EXECUTED = 4,
-        STATS_SAMPLE_C_LZ_EXECUTED = 5,
-        STATS_SAMPLE_C_D_EXECUTED = 6,
-        STATS_LOD_EXECUTED = 7,
-        STATS_GATHER4_EXECUTED = 8,
-        STATS_GATHER4_C_EXECUTED = 9,
-        STATS_GATHER4_C_PO_EXECUTED = 10,
+        STATS_INST_EXECUTED           = 0,
+        STATS_SAMPLE_EXECUTED         = 1,
+        STATS_SAMPLE_L_EXECUTED       = 2,
+        STATS_SAMPLE_B_EXECUTED       = 3,
+        STATS_SAMPLE_C_EXECUTED       = 4,
+        STATS_SAMPLE_C_LZ_EXECUTED    = 5,
+        STATS_SAMPLE_C_D_EXECUTED     = 6,
+        STATS_LOD_EXECUTED            = 7,
+        STATS_GATHER4_EXECUTED        = 8,
+        STATS_GATHER4_C_EXECUTED      = 9,
+        STATS_GATHER4_C_PO_EXECUTED   = 10,
          STATS_GATHER4_C_PO_C_EXECUTED = 11,
-        STATS_LOAD_RAW_UAV = 12,
-        STATS_LOAD_RAW_RESOURCE = 13,
-        STATS_STORE_RAW_UAV = 14,
-        STATS_STORE_TGSM = 15,
-        STATS_DISCARD = 16,
-        STATS_BARRIER = 17,
+        STATS_LOAD_RAW_UAV            = 12,
+        STATS_LOAD_RAW_RESOURCE       = 13,
+        STATS_STORE_RAW_UAV           = 14,
+        STATS_STORE_TGSM              = 15,
+        STATS_DISCARD                 = 16,
+        STATS_BARRIER                 = 17,
      };
  
      using namespace llvm;
      struct Builder
      {
-        Builder(JitManager *pJitMgr);
+        Builder(JitManager* pJitMgr);
          virtual ~Builder() {}
  
-        IRBuilder<> *IRB() { return mpIRBuilder; };
-        JitManager *JM() { return mpJitMgr; }
+        IRBuilder<>* IRB() { return mpIRBuilder; };
+        JitManager*  JM() { return mpJitMgr; }
  
-        JitManager *mpJitMgr;
-        IRBuilder<> *mpIRBuilder;
+        JitManager*  mpJitMgr;
+        IRBuilder<>* mpIRBuilder;
  
-        uint32_t             mVWidth;   // vector width target simd
-        uint32_t             mVWidth16; // vector width simd16
+        uint32_t mVWidth;   // vector width target simd
+        uint32_t mVWidth16; // vector width simd16
  
          // Built in types: scalar
  
-        Type*                mVoidTy;
-        Type*                mInt1Ty;
-        Type*                mInt8Ty;
-        Type*                mInt16Ty;
-        Type*                mInt32Ty;
-        Type*                mInt64Ty;
-        Type*                mIntPtrTy;
-        Type*                mFP16Ty;
-        Type*                mFP32Ty;
-        Type*                mFP32PtrTy;
-        Type*                mDoubleTy;
-        Type*                mInt8PtrTy;
-        Type*                mInt16PtrTy;
-        Type*                mInt32PtrTy;
-
-        Type*                mSimd4FP64Ty;
+        Type* mVoidTy;
+        Type* mInt1Ty;
+        Type* mInt8Ty;
+        Type* mInt16Ty;
+        Type* mInt32Ty;
+        Type* mInt64Ty;
+        Type* mIntPtrTy;
+        Type* mFP16Ty;
+        Type* mFP32Ty;
+        Type* mFP32PtrTy;
+        Type* mDoubleTy;
+        Type* mInt8PtrTy;
+        Type* mInt16PtrTy;
+        Type* mInt32PtrTy;
+
+        Type* mSimd4FP64Ty;
  
          // Built in types: target SIMD
  
-        Type*                mSimdFP16Ty;
-        Type*                mSimdFP32Ty;
-        Type*                mSimdInt1Ty;
-        Type*                mSimdInt16Ty;
-        Type*                mSimdInt32Ty;
-        Type*                mSimdInt64Ty;
-        Type*                mSimdIntPtrTy;
-        Type*                mSimdVectorTy;
-        Type*                mSimdVectorTRTy;
-        Type*                mSimdVectorIntTy;
+        Type* mSimdFP16Ty;
+        Type* mSimdFP32Ty;
+        Type* mSimdInt1Ty;
+        Type* mSimdInt16Ty;
+        Type* mSimdInt32Ty;
+        Type* mSimdInt64Ty;
+        Type* mSimdIntPtrTy;
+        Type* mSimdVectorTy;
+        Type* mSimdVectorTRTy;
+        Type* mSimdVectorIntTy;
  
          // Built in types: simd16
  
-        Type*                mSimd16FP16Ty;
-        Type*                mSimd16FP32Ty;
-        Type*                mSimd16Int1Ty;
-        Type*                mSimd16Int16Ty;
-        Type*                mSimd16Int32Ty;
-        Type*                mSimd16Int64Ty;
-        Type*                mSimd16IntPtrTy;
-        Type*                mSimd16VectorTy;
-        Type*                mSimd16VectorTRTy;
-
-        Type*                mSimd32Int8Ty;
-
-        void SetTargetWidth(uint32_t width);
-        void SetTempAlloca(Value* inst);
-        bool IsTempAlloca(Value* inst);
-        bool SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
-        bool HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
+        Type* mSimd16FP16Ty;
+        Type* mSimd16FP32Ty;
+        Type* mSimd16Int1Ty;
+        Type* mSimd16Int16Ty;
+        Type* mSimd16Int32Ty;
+        Type* mSimd16Int64Ty;
+        Type* mSimd16IntPtrTy;
+        Type* mSimd16VectorTy;
+        Type* mSimd16VectorTRTy;
+
+        Type* mSimd32Int8Ty;
+
+        void  SetTargetWidth(uint32_t width);
+        void  SetTempAlloca(Value* inst);
+        bool  IsTempAlloca(Value* inst);
+        bool  SetNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
+        bool  HasNamedMetaDataOnCallInstr(Instruction* inst, StringRef mdName);
          Type* GetVectorType(Type* pType);
-        void SetMetadata(StringRef s, uint32_t val)
+        void  SetMetadata(StringRef s, uint32_t val)
          {
-            llvm::NamedMDNode *metaData = mpJitMgr->mpCurrentModule->getOrInsertNamedMetadata(s);
-            Constant* cval = mpIRBuilder->getInt32(val);
-            llvm::MDNode *mdNode = llvm::MDNode::get(mpJitMgr->mpCurrentModule->getContext(), llvm::ConstantAsMetadata::get(cval));
+            llvm::NamedMDNode* metaData = mpJitMgr->mpCurrentModule->getOrInsertNamedMetadata(s);
+            Constant*          cval     = mpIRBuilder->getInt32(val);
+            llvm::MDNode*      mdNode   = llvm::MDNode::get(mpJitMgr->mpCurrentModule->getContext(),
+                                                     llvm::ConstantAsMetadata::get(cval));
              if (metaData->getNumOperands())
              {
                  metaData->setOperand(0, mdNode);
@@ -143,8 +144,8 @@ namespace SwrJit
              NamedMDNode* metaData = mpJitMgr->mpCurrentModule->getNamedMetadata(s);
              if (metaData)
              {
-                MDNode* mdNode = metaData->getOperand(0);
-                Metadata* val = mdNode->getOperand(0);
+                MDNode*   mdNode = metaData->getOperand(0);
+                Metadata* val    = mdNode->getOperand(0);
                  return mdconst::dyn_extract<ConstantInt>(val)->getZExtValue();
              }
              else
@@ -161,17 +162,15 @@ namespace SwrJit
  #include "builder_mem.h"
  
      protected:
-
-        void SetPrivateContext(Value* pPrivateContext) 
-        { 
-            mpPrivateContext = pPrivateContext; 
+        void SetPrivateContext(Value* pPrivateContext)
+        {
+            mpPrivateContext = pPrivateContext;
              NotifyPrivateContextSet();
          }
-        virtual void NotifyPrivateContextSet() {}
+        virtual void  NotifyPrivateContextSet() {}
          inline Value* GetPrivateContext() { return mpPrivateContext; }
  
-    private: 
+    private:
          Value* mpPrivateContext;
-
      };
-}
+} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp

index 3013bc53d7fa9e556e1a470b517093ca4b1c3e3d..3f4b090cfc8d32523d58ebb0448005e1bd7cef34 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_gfx_mem.cpp
-*
-* @brief Definition of the gfx mem builder
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_gfx_mem.cpp
+ *
+ * @brief Definition of the gfx mem builder
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "jit_pch.hpp"
  #include "builder.h"
  #include "common/rdtsc_buckets.h"
@@ -37,12 +37,11 @@ namespace SwrJit
  {
      using namespace llvm;
  
-    BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) :
-        Builder(pJitMgr)
+    BuilderGfxMem::BuilderGfxMem(JitManager *pJitMgr) : Builder(pJitMgr)
      {
-        mpTranslationFuncTy = nullptr;
+        mpTranslationFuncTy     = nullptr;
          mpfnTranslateGfxAddress = nullptr;
-        mpParamSimDC = nullptr;
+        mpParamSimDC            = nullptr;
  
      }
  
@@ -50,9 +49,10 @@ namespace SwrJit
      {
      }
  
-    void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage)
+    void BuilderGfxMem::AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage)
      {
-        SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL), "Internal memory should not be gfxptr_t.");
+        SWR_ASSERT(!(ptr->getType() == mInt64Ty && usage == MEM_CLIENT_INTERNAL),
+                   "Internal memory should not be gfxptr_t.");
      }
  
  
@@ -64,16 +64,20 @@ namespace SwrJit
      /// @param vIndices - SIMD wide value of VB byte offsets
      /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
      /// @param scale - value to scale indices by
-    Value* BuilderGfxMem::GATHERPS(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask,
-                                   uint8_t scale, JIT_MEM_CLIENT usage)
-    {
-        // address may be coming in as 64bit int now so get the pointer
+    Value *BuilderGfxMem::GATHERPS(Value *        vSrc,
+                                   Value *        pBase,
+                                   Value *        vIndices,
+                                   Value *        vMask,
+                                   uint8_t        scale,
+                                   JIT_MEM_CLIENT usage)
+    {
+       // address may be coming in as 64bit int now so get the pointer
          if (pBase->getType() == mInt64Ty)
          {
              pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
          }
  
-        Value* vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
+        Value *vGather = Builder::GATHERPS(vSrc, pBase, vIndices, vMask, scale);
          return vGather;
      }
  
@@ -85,8 +89,12 @@ namespace SwrJit
      /// @param vIndices - SIMD wide value of VB byte offsets
      /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
      /// @param scale - value to scale indices by
-    Value* BuilderGfxMem::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask,
-                                   uint8_t scale, JIT_MEM_CLIENT usage)
+    Value *BuilderGfxMem::GATHERDD(Value *        vSrc,
+                                   Value *        pBase,
+                                   Value *        vIndices,
+                                   Value *        vMask,
+                                   uint8_t        scale,
+                                   JIT_MEM_CLIENT usage)
      {
  
          // address may be coming in as 64bit int now so get the pointer
@@ -95,41 +103,42 @@ namespace SwrJit
              pBase = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
          }
  
-        Value* vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
+        Value *vGather = Builder::GATHERDD(vSrc, pBase, vIndices, vMask, scale);
          return vGather;
      }
  
  
-    Value* BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
+    Value *BuilderGfxMem::OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset)
      {
          return ADD(base, offset);
      }
  
-    Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
+    Value *BuilderGfxMem::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
      {
          Ptr = TranslationHelper(Ptr, Ty);
          return Builder::GEP(Ptr, Idx, nullptr, Name);
      }
  
-    Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
+    Value *BuilderGfxMem::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
      {
          Ptr = TranslationHelper(Ptr, Ty);
          return Builder::GEP(Ty, Ptr, Idx, Name);
      }
  
-    Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
+    Value *BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty)
      {
          Ptr = TranslationHelper(Ptr, Ty);
          return Builder::GEP(Ptr, indexList);
      }
  
-    Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
+    Value *
+    BuilderGfxMem::GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
      {
          Ptr = TranslationHelper(Ptr, Ty);
          return Builder::GEP(Ptr, indexList);
      }
  
-    Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty)
+    Value *BuilderGfxMem::TranslationHelper(Value *Ptr, Type *Ty)
      {
          SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr),
                     "Access of GFX pointers must have non-null type specified.");
@@ -144,7 +153,7 @@ namespace SwrJit
          return Ptr;
      }
  
-    LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
          AssertGFXMemoryParams(Ptr, usage);
  
@@ -152,7 +161,7 @@ namespace SwrJit
          return Builder::LOAD(Ptr, Name);
      }
  
-    LoadInst* BuilderGfxMem::LOAD(Value* Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst *BuilderGfxMem::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
          AssertGFXMemoryParams(Ptr, usage);
  
@@ -160,7 +169,8 @@ namespace SwrJit
          return Builder::LOAD(Ptr, Name);
      }
  
-    LoadInst* BuilderGfxMem::LOAD(Value* Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst *BuilderGfxMem::LOAD(
+        Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
      {
          AssertGFXMemoryParams(Ptr, usage);
  
@@ -168,7 +178,11 @@ namespace SwrJit
          return Builder::LOAD(Ptr, isVolatile, Name);
      }
  
-    LoadInst* BuilderGfxMem::LOAD(Value* BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst *BuilderGfxMem::LOAD(Value *                                BasePtr,
+                                  const std::initializer_list<uint32_t> &offset,
+                                  const llvm::Twine &                    name,
+                                  Type *                                 Ty,
+                                  JIT_MEM_CLIENT                         usage)
      {
          AssertGFXMemoryParams(BasePtr, usage);
  
@@ -176,10 +190,10 @@ namespace SwrJit
          if (BasePtr->getType() == mInt64Ty)
          {
              SWR_ASSERT(Ty);
-            BasePtr = INT_TO_PTR(BasePtr, Ty, name);
+            BasePtr          = INT_TO_PTR(BasePtr, Ty, name);
              bNeedTranslation = true;
          }
-        std::vector<Value*> valIndices;
+        std::vector<Value *> valIndices;
          for (auto i : offset)
          {
              valIndices.push_back(C(i));
@@ -193,7 +207,13 @@ namespace SwrJit
          return LOAD(BasePtr, name, Ty, usage);
      }
  
-    CallInst* BuilderGfxMem::MASKED_LOAD(Value* Ptr, unsigned Align, Value* Mask, Value* PassThru, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+    CallInst *BuilderGfxMem::MASKED_LOAD(Value *        Ptr,
+                                         unsigned       Align,
+                                         Value *        Mask,
+                                         Value *        PassThru,
+                                         const Twine &  Name,
+                                         Type *         Ty,
+                                         JIT_MEM_CLIENT usage)
      {
          AssertGFXMemoryParams(Ptr, usage);
  
@@ -201,7 +221,10 @@ namespace SwrJit
          return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage);
      }
  
-    Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress, Type* PtrTy, const Twine &Name, JIT_MEM_CLIENT /* usage */)
+    Value *BuilderGfxMem::TranslateGfxAddress(Value *      xpGfxAddress,
+                                              Type *       PtrTy,
+                                              const Twine &Name,
+                                              JIT_MEM_CLIENT /* usage */)
      {
          if (PtrTy == nullptr)
          {
@@ -209,4 +232,4 @@ namespace SwrJit
          }
          return INT_TO_PTR(xpGfxAddress, PtrTy, Name);
      }
-}
+} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h

index 00817b2b52bd67cd007de1cea80c3564b7b23d43..ab6f78ee81768709d47e5d4bd7e2c5afb92d40ba 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_gfx_mem.h
-*
-* @brief Definition of the builder to support different translation types for gfx memory access
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_gfx_mem.h
+ *
+ * @brief Definition of the builder to support different translation types for gfx memory access
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "builder.h"
@@ -38,28 +38,67 @@ namespace SwrJit
      class BuilderGfxMem : public Builder
      {
      public:
-        BuilderGfxMem(JitManager* pJitMgr);
+        BuilderGfxMem(JitManager *pJitMgr);
          virtual ~BuilderGfxMem() {}
  
          virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
          virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
-        virtual Value *GEP(Value* Ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
-        virtual Value *GEP(Value* Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
-
-        virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-        virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-        virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-        virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-        virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-        virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-        virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-
-        Value* TranslateGfxAddress(Value* xpGfxAddress, Type* PtrTy = nullptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual Value *
+        GEP(Value *Ptr, const std::initializer_list<Value *> &indexList, Type *Ty = nullptr);
+        virtual Value *
+        GEP(Value *Ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
+
+        virtual LoadInst *LOAD(Value *        Ptr,
+                               const char *   Name,
+                               Type *         Ty    = nullptr,
+                               JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual LoadInst *LOAD(Value *        Ptr,
+                               const Twine &  Name  = "",
+                               Type *         Ty    = nullptr,
+                               JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual LoadInst *LOAD(Value *        Ptr,
+                               bool           isVolatile,
+                               const Twine &  Name  = "",
+                               Type *         Ty    = nullptr,
+                               JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual LoadInst *LOAD(Value *                                BasePtr,
+                               const std::initializer_list<uint32_t> &offset,
+                               const llvm::Twine &                    Name  = "",
+                               Type *                                 Ty    = nullptr,
+                               JIT_MEM_CLIENT                         usage = MEM_CLIENT_INTERNAL);
+
+        virtual CallInst *MASKED_LOAD(Value *        Ptr,
+                                      unsigned       Align,
+                                      Value *        Mask,
+                                      Value *        PassThru = nullptr,
+                                      const Twine &  Name     = "",
+                                      Type *         Ty       = nullptr,
+                                      JIT_MEM_CLIENT usage    = MEM_CLIENT_INTERNAL);
+
+        virtual Value *GATHERPS(Value *        src,
+                                Value *        pBase,
+                                Value *        indices,
+                                Value *        mask,
+                                uint8_t        scale = 1,
+                                JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+        virtual Value *GATHERDD(Value *        src,
+                                Value *        pBase,
+                                Value *        indices,
+                                Value *        mask,
+                                uint8_t        scale = 1,
+                                JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+
+        Value *TranslateGfxAddress(Value *        xpGfxAddress,
+                                   Type *         PtrTy = nullptr,
+                                   const Twine &  Name  = "",
+                                   JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
          template <typename T>
-        Value* TranslateGfxAddress(Value* xpGfxBaseAddress, const std::initializer_list<T> &offset, Type* PtrTy = nullptr, const Twine &Name = "", JIT_MEM_CLIENT usage = GFX_MEM_CLIENT_SHADER)
+        Value *TranslateGfxAddress(Value *                         xpGfxBaseAddress,
+                                   const std::initializer_list<T> &offset,
+                                   Type *                          PtrTy = nullptr,
+                                   const Twine &                   Name  = "",
+                                   JIT_MEM_CLIENT                  usage = GFX_MEM_CLIENT_SHADER)
          {
              AssertGFXMemoryParams(xpGfxBaseAddress, usage);
              SWR_ASSERT(xpGfxBaseAddress->getType()->isPointerTy() == false);
@@ -69,31 +108,29 @@ namespace SwrJit
                  PtrTy = mInt8PtrTy;
              }
  
-            Value* ptr = INT_TO_PTR(xpGfxBaseAddress, PtrTy);
-            ptr = GEP(ptr, offset);
+            Value *ptr = INT_TO_PTR(xpGfxBaseAddress, PtrTy);
+            ptr        = GEP(ptr, offset);
              return TranslateGfxAddress(PTR_TO_INT(ptr, mInt64Ty), PtrTy, Name, usage);
          }
  
  
      protected:
+        void AssertGFXMemoryParams(Value *ptr, Builder::JIT_MEM_CLIENT usage);
  
-        void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
-            
          virtual void NotifyPrivateContextSet();
  
-        virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
+        virtual Value *OFFSET_TO_NEXT_COMPONENT(Value *base, Constant *offset);
  
-        Value* TranslationHelper(Value *Ptr, Type *Ty);
+        Value *TranslationHelper(Value *Ptr, Type *Ty);
  
-        FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; }
-        Value* GetTranslationFunction() { return mpfnTranslateGfxAddress; }
-        Value* GetParamSimDC() { return mpParamSimDC; }
+        FunctionType *GetTranslationFunctionType() { return mpTranslationFuncTy; }
+        Value *       GetTranslationFunction() { return mpfnTranslateGfxAddress; }
+        Value *       GetParamSimDC() { return mpParamSimDC; }
  
  
      private:
-
-        FunctionType* mpTranslationFuncTy;
-        Value* mpfnTranslateGfxAddress;
-        Value* mpParamSimDC;
+        FunctionType *mpTranslationFuncTy;
+        Value *       mpfnTranslateGfxAddress;
+        Value *       mpParamSimDC;
      };
-}
+} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_math.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_math.h

index 92867ec9836aafa7fe68bd6d43269801148c9a6b..02aa6f97cdfe3a679771ae94e023ebd3a36227c9 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_math.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_math.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file builder_math.h
-* 
-* @brief math/alu builder functions
-* 
-* Notes:
-* 
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_math.h
+ *
+ * @brief math/alu builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  Value* VLOG2PS(Value* src);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp

index 77c2095ea9c313bad0b8d7ccb7dfc897042b63df..94489f1c7fd2e98ad6e2530c4e8da7638269e4d0 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_misc.cpp
-*
-* @brief Implementation for miscellaneous builder functions
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.cpp
+ *
+ * @brief Implementation for miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "jit_pch.hpp"
  #include "builder.h"
  #include "common/rdtsc_buckets.h"
@@ -37,20 +37,22 @@ namespace SwrJit
  {
      void Builder::AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage)
      {
-        SWR_ASSERT(ptr->getType() != mInt64Ty, "Address appears to be GFX access.  Requires translation through BuilderGfxMem.");
+        SWR_ASSERT(
+            ptr->getType() != mInt64Ty,
+            "Address appears to be GFX access.  Requires translation through BuilderGfxMem.");
      }
  
-    Value *Builder::GEP(Value *Ptr, Value *Idx, Type *Ty, const Twine &Name)
+    Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name)
      {
          return IRB()->CreateGEP(Ptr, Idx, Name);
      }
  
-    Value *Builder::GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name)
+    Value* Builder::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name)
      {
          return IRB()->CreateGEP(Ty, Ptr, Idx, Name);
      }
  
-    Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty)
+    Value* Builder::GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty)
      {
          std::vector<Value*> indices;
          for (auto i : indexList)
@@ -58,7 +60,7 @@ namespace SwrJit
          return GEPA(ptr, indices);
      }
  
-    Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty)
+    Value* Builder::GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty)
      {
          std::vector<Value*> indices;
          for (auto i : indexList)
@@ -66,17 +68,17 @@ namespace SwrJit
          return GEPA(ptr, indices);
      }
  
-    Value *Builder::GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+    Value* Builder::GEPA(Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name)
      {
          return IRB()->CreateGEP(Ptr, IdxList, Name);
      }
  
-    Value *Builder::GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name)
+    Value* Builder::GEPA(Type* Ty, Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name)
      {
          return IRB()->CreateGEP(Ty, Ptr, IdxList, Name);
      }
  
-    Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
+    Value* Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList)
      {
          std::vector<Value*> indices;
          for (auto i : indexList)
@@ -84,7 +86,7 @@ namespace SwrJit
          return IN_BOUNDS_GEP(ptr, indices);
      }
  
-    Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
+    Value* Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList)
      {
          std::vector<Value*> indices;
          for (auto i : indexList)
@@ -92,31 +94,36 @@ namespace SwrJit
          return IN_BOUNDS_GEP(ptr, indices);
      }
  
-    LoadInst* Builder::LOAD(Value *Ptr, const char *Name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value* Ptr, const char* Name, Type* Ty, JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(Ptr, usage);
          return IRB()->CreateLoad(Ptr, Name);
      }
  
-    LoadInst* Builder::LOAD(Value *Ptr, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value* Ptr, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(Ptr, usage);
          return IRB()->CreateLoad(Ptr, Name);
      }
  
-    LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine &Name, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Type* Ty, Value* Ptr, const Twine& Name, JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(Ptr, usage);
          return IRB()->CreateLoad(Ty, Ptr, Name);
      }
  
-    LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine &Name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst*
+    Builder::LOAD(Value* Ptr, bool isVolatile, const Twine& Name, Type* Ty, JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(Ptr, usage);
          return IRB()->CreateLoad(Ptr, isVolatile, Name);
      }
  
-    LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name, Type *Ty, JIT_MEM_CLIENT usage)
+    LoadInst* Builder::LOAD(Value*                                 basePtr,
+                            const std::initializer_list<uint32_t>& indices,
+                            const llvm::Twine&                     name,
+                            Type*                                  Ty,
+                            JIT_MEM_CLIENT                         usage)
      {
          std::vector<Value*> valIndices;
          for (auto i : indices)
@@ -124,7 +131,9 @@ namespace SwrJit
          return Builder::LOAD(GEPA(basePtr, valIndices), name);
      }
  
-    LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list<Value*> &indices, const llvm::Twine& name)
+    LoadInst* Builder::LOADV(Value*                               basePtr,
+                             const std::initializer_list<Value*>& indices,
+                             const llvm::Twine&                   name)
      {
          std::vector<Value*> valIndices;
          for (auto i : indices)
@@ -132,7 +141,8 @@ namespace SwrJit
          return LOAD(GEPA(basePtr, valIndices), name);
      }
  
-    StoreInst *Builder::STORE(Value *val, Value *basePtr, const std::initializer_list<uint32_t> &indices)
+    StoreInst*
+    Builder::STORE(Value* val, Value* basePtr, const std::initializer_list<uint32_t>& indices)
      {
          std::vector<Value*> valIndices;
          for (auto i : indices)
@@ -140,7 +150,8 @@ namespace SwrJit
          return STORE(val, GEPA(basePtr, valIndices));
      }
  
-    StoreInst *Builder::STOREV(Value *val, Value *basePtr, const std::initializer_list<Value*> &indices)
+    StoreInst*
+    Builder::STOREV(Value* val, Value* basePtr, const std::initializer_list<Value*>& indices)
      {
          std::vector<Value*> valIndices;
          for (auto i : indices)
@@ -148,27 +159,35 @@ namespace SwrJit
          return STORE(val, GEPA(basePtr, valIndices));
      }
  
-    Value* Builder::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset)
+    Value* Builder::OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset)
      {
          return GEP(base, offset);
      }
  
-    Value* Builder::MEM_ADD(Value* i32Incr, Value* basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name)
+    Value* Builder::MEM_ADD(Value*                                 i32Incr,
+                            Value*                                 basePtr,
+                            const std::initializer_list<uint32_t>& indices,
+                            const llvm::Twine&                     name)
      {
-        Value* i32Value = LOAD(GEP(basePtr, indices), name);
+        Value* i32Value  = LOAD(GEP(basePtr, indices), name);
          Value* i32Result = ADD(i32Value, i32Incr);
          return STORE(i32Result, GEP(basePtr, indices));
      }
  
      //////////////////////////////////////////////////////////////////////////
-    /// @brief Generate a masked gather operation in LLVM IR.  If not  
+    /// @brief Generate a masked gather operation in LLVM IR.  If not
      /// supported on the underlying platform, emulate it with loads
      /// @param vSrc - SIMD wide value that will be loaded if mask is invalid
      /// @param pBase - Int8* base VB address pointer value
      /// @param vIndices - SIMD wide value of VB byte offsets
      /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
      /// @param scale - value to scale indices by
-    Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
+    Value* Builder::GATHERPS(Value*         vSrc,
+                             Value*         pBase,
+                             Value*         vIndices,
+                             Value*         vMask,
+                             uint8_t        scale,
+                             JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(pBase, usage);
  
@@ -176,14 +195,19 @@ namespace SwrJit
      }
  
      //////////////////////////////////////////////////////////////////////////
-    /// @brief Generate a masked gather operation in LLVM IR.  If not  
+    /// @brief Generate a masked gather operation in LLVM IR.  If not
      /// supported on the underlying platform, emulate it with loads
      /// @param vSrc - SIMD wide value that will be loaded if mask is invalid
      /// @param pBase - Int8* base VB address pointer value
      /// @param vIndices - SIMD wide value of VB byte offsets
      /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
      /// @param scale - value to scale indices by
-    Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage)
+    Value* Builder::GATHERDD(Value*         vSrc,
+                             Value*         pBase,
+                             Value*         vIndices,
+                             Value*         vMask,
+                             uint8_t        scale,
+                             JIT_MEM_CLIENT usage)
      {
          AssertMemoryUsageParams(pBase, usage);
  
@@ -198,7 +222,8 @@ namespace SwrJit
      /// @param vIndices - SIMD wide value of VB byte offsets
      /// @param vMask - SIMD wide mask that controls whether to access memory or the src values
      /// @param scale - value to scale indices by
-    Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
+    Value*
+    Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
      {
          return VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
      }
@@ -213,10 +238,15 @@ namespace SwrJit
          return MASKED_GATHER(pVecSrcPtr, 4, pVecMask, pVecPassthru);
      }
  
-    void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
-        Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
+    void Builder::Gather4(const SWR_FORMAT format,
+                          Value*           pSrcBase,
+                          Value*           byteOffsets,
+                          Value*           mask,
+                          Value*           vGatherComponents[],
+                          bool             bPackedOutput,
+                          JIT_MEM_CLIENT   usage)
      {
-        const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+        const SWR_FORMAT_INFO& info = GetFormatInfo(format);
          if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
          {
              GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput, usage);
@@ -227,8 +257,13 @@ namespace SwrJit
          }
      }
  
-    void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-        Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
+    void Builder::GATHER4PS(const SWR_FORMAT_INFO& info,
+                            Value*                 pSrcBase,
+                            Value*                 byteOffsets,
+                            Value*                 vMask,
+                            Value*                 vGatherComponents[],
+                            bool                   bPackedOutput,
+                            JIT_MEM_CLIENT         usage)
      {
          switch (info.bpp / info.numComps)
          {
@@ -253,10 +288,11 @@ namespace SwrJit
                  // offset base to the next components(zw) in the vertex to gather
                  pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
  
-                vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
+                vGatherResult[1] =
+                    GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
                  // e.g. result of second 8x32bit integer gather for 16bit components
                  // 256i - 0    1    2    3    4    5    6    7
-                //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw 
+                //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
                  //
              }
              else
@@ -281,7 +317,8 @@ namespace SwrJit
                  uint32_t swizzleIndex = info.swizzle[i];
  
                  // Gather a SIMD of components
-                vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
+                vGatherComponents[swizzleIndex] = GATHERPS(
+                    vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
  
                  // offset base to the next component to gather
                  pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
@@ -294,18 +331,24 @@ namespace SwrJit
          }
      }
  
-    void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-        Value* vMask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage)
+    void Builder::GATHER4DD(const SWR_FORMAT_INFO& info,
+                            Value*                 pSrcBase,
+                            Value*                 byteOffsets,
+                            Value*                 vMask,
+                            Value*                 vGatherComponents[],
+                            bool                   bPackedOutput,
+                            JIT_MEM_CLIENT         usage)
      {
          switch (info.bpp / info.numComps)
          {
          case 8:
          {
              Value* vGatherMaskedVal = VIMMED1((int32_t)0);
-            Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
+            Value* vGatherResult =
+                GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
              // e.g. result of an 8x32bit integer gather for 8bit components
              // 256i - 0    1    2    3    4    5    6    7
-            //        xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw 
+            //        xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
  
              Shuffle8bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
          }
@@ -331,10 +374,11 @@ namespace SwrJit
                  // offset base to the next components(zw) in the vertex to gather
                  pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
  
-                vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
+                vGatherResult[1] =
+                    GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask, 1, usage);
                  // e.g. result of second 8x32bit integer gather for 16bit components
                  // 256i - 0    1    2    3    4    5    6    7
-                //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw 
+                //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
                  //
              }
              else
@@ -344,7 +388,6 @@ namespace SwrJit
  
              // Shuffle gathered components into place, each row is a component
              Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
-
          }
          break;
          case 32:
@@ -360,7 +403,8 @@ namespace SwrJit
                  uint32_t swizzleIndex = info.swizzle[i];
  
                  // Gather a SIMD of components
-                vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
+                vGatherComponents[swizzleIndex] = GATHERDD(
+                    vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask, 1, usage);
  
                  // offset base to the next component to gather
                  pSrcBase = OFFSET_TO_NEXT_COMPONENT(pSrcBase, C((intptr_t)4));
@@ -373,29 +417,35 @@ namespace SwrJit
          }
      }
  
-    void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[2], Value* vGatherOutput[4], bool bPackedOutput)
+    void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO& info,
+                                      Value*                 vGatherInput[2],
+                                      Value*                 vGatherOutput[4],
+                                      bool                   bPackedOutput)
      {
          // cast types
          Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
-        Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+        Type* v32x8Ty   = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
  
-                                                               // input could either be float or int vector; do shuffle work in int
+        // input could either be float or int vector; do shuffle work in int
          vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
          vGatherInput[1] = BITCAST(vGatherInput[1], mSimdInt32Ty);
  
          if (bPackedOutput)
          {
-            Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
-
-                                                                                                         // shuffle mask
-            Value* vConstMask = C<char>({ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
-                0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 });
-            Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[0], v32x8Ty), vConstMask), vGatherTy);
+            Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
+                                              mVWidth / 4); // vwidth is units of 32 bits
+
+            // shuffle mask
+            Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
+                                         0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
+            Value* vShufResult =
+                BITCAST(PSHUFB(BITCAST(vGatherInput[0], v32x8Ty), vConstMask), vGatherTy);
              // after pshufb: group components together in each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
              //        xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
  
-            Value* vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+            Value* vi128XY =
+                BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
              // after PERMD: move and pack xy components into each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
              //        xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
@@ -404,8 +454,10 @@ namespace SwrJit
              Value* vi128ZW = nullptr;
              if (info.numComps > 2)
              {
-                Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
-                vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+                Value* vShufResult =
+                    BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
+                vi128ZW =
+                    BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
              }
  
              for (uint32_t i = 0; i < 4; i++)
@@ -425,23 +477,23 @@ namespace SwrJit
                  // if x or y, use vi128XY permute result, else use vi128ZW
                  Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
  
-                // extract packed component 128 bit lanes 
+                // extract packed component 128 bit lanes
                  vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
              }
-
          }
          else
          {
              // pshufb masks for each component
              Value* vConstMask[2];
              // x/z shuffle mask
-            vConstMask[0] = C<char>({ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
-                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
+            vConstMask[0] = C<char>({
+                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+            });
  
              // y/w shuffle mask
-            vConstMask[1] = C<char>({ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
-                2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1 });
-
+            vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
+                                     2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
  
              // shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
              // apply defaults
@@ -459,32 +511,41 @@ namespace SwrJit
                  // if x or y, use vi128XY permute result, else use vi128ZW
                  uint32_t selectedGather = (i < 2) ? 0 : 1;
  
-                vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
+                vGatherOutput[swizzleIndex] =
+                    BITCAST(PSHUFB(BITCAST(vGatherInput[selectedGather], v32x8Ty),
+                                   vConstMask[selectedMask]),
+                            vGatherTy);
                  // after pshufb mask for x channel; z uses the same shuffle from the second gather
                  // 256i - 0    1    2    3    4    5    6    7
-                //        xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00 
+                //        xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
              }
          }
      }
  
-    void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput)
+    void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
+                                     Value*                 vGatherInput,
+                                     Value*                 vGatherOutput[],
+                                     bool                   bPackedOutput)
      {
          // cast types
          Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
-        Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+        Type* v32x8Ty   = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
  
          if (bPackedOutput)
          {
-            Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
-                                                                                                      // shuffle mask
-            Value* vConstMask = C<char>({ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
-                0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 });
-            Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
+            Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
+                                           mVWidth / 4); // vwidth is units of 32 bits
+                                                         // shuffle mask
+            Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
+                                         0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15});
+            Value* vShufResult =
+                BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
              // after pshufb: group components together in each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
              //        xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
  
-            Value* vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
+            Value* vi128XY =
+                BITCAST(VPERMD(vShufResult, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})), v128Ty);
              // after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
              //        xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
@@ -493,10 +554,12 @@ namespace SwrJit
              Value* vi128ZW = nullptr;
              if (info.numComps > 2)
              {
-                vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
+                vi128ZW =
+                    BITCAST(VPERMD(vShufResult, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})), v128Ty);
              }
  
-            // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+            // sign extend all enabled components. If we have a fill vVertexElements, output to
+            // current simdvertex
              for (uint32_t i = 0; i < 4; i++)
              {
                  uint32_t swizzleIndex = info.swizzle[i];
@@ -519,7 +582,8 @@ namespace SwrJit
              }
          }
          // else zero extend
-        else {
+        else
+        {
              // shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
              // apply defaults
              for (uint32_t i = 0; i < 4; ++i)
@@ -527,7 +591,8 @@ namespace SwrJit
                  vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
              }
  
-            for (uint32_t i = 0; i < info.numComps; i++) {
+            for (uint32_t i = 0; i < info.numComps; i++)
+            {
                  uint32_t swizzleIndex = info.swizzle[i];
  
                  // pshufb masks for each component
@@ -536,45 +601,53 @@ namespace SwrJit
                  {
                  case 0:
                      // x shuffle mask
-                    vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
-                        0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1 });
+                    vConstMask =
+                        C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
+                                 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
                      break;
                  case 1:
                      // y shuffle mask
-                    vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
-                        1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1 });
+                    vConstMask =
+                        C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
+                                 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
                      break;
                  case 2:
                      // z shuffle mask
-                    vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
-                        2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1 });
+                    vConstMask =
+                        C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
+                                 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
                      break;
                  case 3:
                      // w shuffle mask
-                    vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
-                        3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1 });
+                    vConstMask =
+                        C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
+                                 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
                      break;
                  default:
                      vConstMask = nullptr;
                      break;
                  }
  
-                vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
+                vGatherOutput[swizzleIndex] =
+                    BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
                  // after pshufb for x channel
                  // 256i - 0    1    2    3    4    5    6    7
-                //        x000 x000 x000 x000 x000 x000 x000 x000 
+                //        x000 x000 x000 x000 x000 x000 x000 x000
              }
          }
      }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief emulates a scatter operation.
-    /// @param pDst - pointer to destination 
+    /// @param pDst - pointer to destination
      /// @param vSrc - vector of src data to scatter
      /// @param vOffsets - vector of byte offsets from pDst
      /// @param vMask - mask of valid lanes
-    void Builder::SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask)
+    void Builder::SCATTERPS(
+        Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, JIT_MEM_CLIENT usage)
      {
+        AssertMemoryUsageParams(pDst, usage);
+
          /* Scatter algorithm
  
          while(Index = BitScanForward(mask))
@@ -586,25 +659,25 @@ namespace SwrJit
          */
  
          BasicBlock* pCurBB = IRB()->GetInsertBlock();
-        Function* pFunc = pCurBB->getParent();
-        Type* pSrcTy = vSrc->getType()->getVectorElementType();
+        Function*   pFunc  = pCurBB->getParent();
+        Type*       pSrcTy = vSrc->getType()->getVectorElementType();
  
          // Store vectors on stack
          if (pScatterStackSrc == nullptr)
          {
              // Save off stack allocations and reuse per scatter. Significantly reduces stack
              // requirements for shaders with a lot of scatters.
-            pScatterStackSrc = CreateEntryAlloca(pFunc, mSimdInt64Ty);
+            pScatterStackSrc     = CreateEntryAlloca(pFunc, mSimdInt64Ty);
              pScatterStackOffsets = CreateEntryAlloca(pFunc, mSimdInt32Ty);
          }
  
-        Value* pSrcArrayPtr = BITCAST(pScatterStackSrc, PointerType::get(vSrc->getType(), 0));
+        Value* pSrcArrayPtr     = BITCAST(pScatterStackSrc, PointerType::get(vSrc->getType(), 0));
          Value* pOffsetsArrayPtr = pScatterStackOffsets;
          STORE(vSrc, pSrcArrayPtr);
          STORE(vOffsets, pOffsetsArrayPtr);
  
          // Cast to pointers for random access
-        pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
+        pSrcArrayPtr     = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
          pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0));
  
          Value* pMask = VMOVMSK(vMask);
@@ -643,18 +716,18 @@ namespace SwrJit
          // Add loop basic block contents
          IRB()->SetInsertPoint(pLoop);
          PHINode* pIndexPhi = PHI(mInt32Ty, 2);
-        PHINode* pMaskPhi = PHI(mInt32Ty, 2);
+        PHINode* pMaskPhi  = PHI(mInt32Ty, 2);
  
          pIndexPhi->addIncoming(pIndex, pCurBB);
          pMaskPhi->addIncoming(pMask, pCurBB);
  
          // Extract elements for this index
-        Value* pSrcElem = LOADV(pSrcArrayPtr, { pIndexPhi });
-        Value* pOffsetElem = LOADV(pOffsetsArrayPtr, { pIndexPhi });
+        Value* pSrcElem    = LOADV(pSrcArrayPtr, {pIndexPhi});
+        Value* pOffsetElem = LOADV(pOffsetsArrayPtr, {pIndexPhi});
  
          // GEP to this offset in dst
          Value* pCurDst = GEP(pDst, pOffsetElem, mInt8PtrTy);
-        pCurDst = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0));
+        pCurDst        = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0));
          STORE(pSrcElem, pCurDst);
  
          // Update the mask
@@ -673,4 +746,4 @@ namespace SwrJit
          // Move builder to beginning of post loop
          IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
      }
-}
+} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h

index 3823a136bb8642c09b44c3db8a42fee41bff9b6b..15def96cb76c9e7775cc5972406cae62b37a6eee 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -1,36 +1,35 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file builder_misc.h
-*
-* @brief miscellaneous builder functions
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.h
+ *
+ * @brief miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  public:
-
  typedef enum _JIT_MEM_CLIENT
  {
      MEM_CLIENT_INTERNAL,
@@ -40,62 +39,119 @@ typedef enum _JIT_MEM_CLIENT
  } JIT_MEM_CLIENT;
  
  protected:
-
-virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant *offset);
-void AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
+virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset);
+void           AssertMemoryUsageParams(Value* ptr, JIT_MEM_CLIENT usage);
  
  public:
-
-virtual Value *GEP(Value *Ptr, Value *Idx, Type *Ty = nullptr, const Twine &Name = "");
-virtual Value *GEP(Type *Ty, Value *Ptr, Value *Idx, const Twine &Name = "");
-virtual Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList, Type *Ty = nullptr);
-virtual Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList, Type *Ty = nullptr);
-
-Value *GEPA(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
-Value *GEPA(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList, const Twine &Name = "");
-
-Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
-Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
-
-virtual LoadInst* LOAD(Value *Ptr, const char *Name, Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *Ptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Type *Ty, Value *Ptr, const Twine &Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *Ptr, bool isVolatile, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-virtual LoadInst* LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-virtual CallInst* MASKED_LOAD(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru = nullptr, const Twine &Name = "", Type *Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL)
+virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = "");
+virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = "");
+virtual Value* GEP(Value* ptr, const std::initializer_list<Value*>& indexList, Type* Ty = nullptr);
+virtual Value*
+GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList, Type* Ty = nullptr);
+
+Value* GEPA(Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name = "");
+Value* GEPA(Type* Ty, Value* Ptr, ArrayRef<Value*> IdxList, const Twine& Name = "");
+
+Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*>& indexList);
+Value* IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t>& indexList);
+
+virtual LoadInst*
+                  LOAD(Value* Ptr, const char* Name, Type* Ty = nullptr, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value*         Ptr,
+                       const Twine&   Name  = "",
+                       Type*          Ty    = nullptr,
+                       JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst*
+                  LOAD(Type* Ty, Value* Ptr, const Twine& Name = "", JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value*         Ptr,
+                       bool           isVolatile,
+                       const Twine&   Name  = "",
+                       Type*          Ty    = nullptr,
+                       JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+virtual LoadInst* LOAD(Value*                                 BasePtr,
+                       const std::initializer_list<uint32_t>& offset,
+                       const llvm::Twine&                     Name  = "",
+                       Type*                                  Ty    = nullptr,
+                       JIT_MEM_CLIENT                         usage = MEM_CLIENT_INTERNAL);
+
+virtual CallInst* MASKED_LOAD(Value*         Ptr,
+                              unsigned       Align,
+                              Value*         Mask,
+                              Value*         PassThru = nullptr,
+                              const Twine&   Name     = "",
+                              Type*          Ty       = nullptr,
+                              JIT_MEM_CLIENT usage    = MEM_CLIENT_INTERNAL)
  {
      return IRB()->CreateMaskedLoad(Ptr, Align, Mask, PassThru, Name);
  }
  
-LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
-StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
-StoreInst *STOREV(Value *Val, Value *BasePtr, const std::initializer_list<Value*> &offset);
-
-Value* MEM_ADD(Value* i32Incr, Value* basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name = "");
-
-void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
-    Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-    Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
-    Value* mask, Value* vGatherComponents[], bool bPackedOutput, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
-
-Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
-
-Value *GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
-
-void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
-
-void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
-void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Value* vGatherOutput[], bool bPackedOutput);
+LoadInst*
+           LOADV(Value* BasePtr, const std::initializer_list<Value*>& offset, const llvm::Twine& name = "");
+StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list<uint32_t>& offset);
+StoreInst* STOREV(Value* Val, Value* BasePtr, const std::initializer_list<Value*>& offset);
+
+Value* MEM_ADD(Value*                                 i32Incr,
+               Value*                                 basePtr,
+               const std::initializer_list<uint32_t>& indices,
+               const llvm::Twine&                     name = "");
+
+void Gather4(const SWR_FORMAT format,
+             Value*           pSrcBase,
+             Value*           byteOffsets,
+             Value*           mask,
+             Value*           vGatherComponents[],
+             bool             bPackedOutput,
+             JIT_MEM_CLIENT   usage = MEM_CLIENT_INTERNAL);
+
+virtual Value* GATHERPS(Value*         src,
+                        Value*         pBase,
+                        Value*         indices,
+                        Value*         mask,
+                        uint8_t        scale = 1,
+                        JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+void GATHER4PS(const SWR_FORMAT_INFO& info,
+               Value*                 pSrcBase,
+               Value*                 byteOffsets,
+               Value*                 mask,
+               Value*                 vGatherComponents[],
+               bool                   bPackedOutput,
+               JIT_MEM_CLIENT         usage = MEM_CLIENT_INTERNAL);
+
+virtual Value* GATHERDD(Value*         src,
+                        Value*         pBase,
+                        Value*         indices,
+                        Value*         mask,
+                        uint8_t        scale = 1,
+                        JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+void GATHER4DD(const SWR_FORMAT_INFO& info,
+               Value*                 pSrcBase,
+               Value*                 byteOffsets,
+               Value*                 mask,
+               Value*                 vGatherComponents[],
+               bool                   bPackedOutput,
+               JIT_MEM_CLIENT         usage = MEM_CLIENT_INTERNAL);
+
+Value* GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
+
+Value* GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* pVecPassthru);
+
+virtual void SCATTERPS(Value*         pDst,
+                       Value*         vSrc,
+                       Value*         vOffsets,
+                       Value*         vMask,
+                       JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
+
+void Shuffle8bpcGather4(const SWR_FORMAT_INFO& info,
+                        Value*                 vGatherInput,
+                        Value*                 vGatherOutput[],
+                        bool                   bPackedOutput);
+void Shuffle16bpcGather4(const SWR_FORMAT_INFO& info,
+                         Value*                 vGatherInput[],
+                         Value*                 vGatherOutput[],
+                         bool                   bPackedOutput);
  
  // Static stack allocations for scatter operations
-Value* pScatterStackSrc{ nullptr };
-Value* pScatterStackOffsets{ nullptr };
+Value* pScatterStackSrc{nullptr};
+Value* pScatterStackOffsets{nullptr};
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp

index 231fa94d00c5036c062d967dedd9e7b7b5b8baff..4116dad4430c234558ce9d3ec8f413281ff3177d 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file builder_misc.cpp
-* 
-* @brief Implementation for miscellaneous builder functions
-* 
-* Notes:
-* 
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.cpp
+ *
+ * @brief Implementation for miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "jit_pch.hpp"
  #include "builder.h"
  #include "common/rdtsc_buckets.h"
@@ -50,25 +50,25 @@ namespace SwrJit
  
          // Extract the sign, exponent, and mantissa
          uint32_t uf = *(uint32_t*)&val;
-        sign = (uf & 0x80000000) >> 31;
-        exp = (uf & 0x7F800000) >> 23;
-        mant = uf & 0x007FFFFF;
+        sign        = (uf & 0x80000000) >> 31;
+        exp         = (uf & 0x7F800000) >> 23;
+        mant        = uf & 0x007FFFFF;
  
          // Check for out of range
          if (std::isnan(val))
          {
-            exp = 0x1F;
+            exp  = 0x1F;
              mant = 0x200;
-            sign = 1;                     // set the sign bit for NANs
+            sign = 1; // set the sign bit for NANs
          }
          else if (std::isinf(val))
          {
-            exp = 0x1f;
+            exp  = 0x1f;
              mant = 0x0;
          }
          else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
          {
-            exp = 0x1E;
+            exp  = 0x1E;
              mant = 0x3FF;
          }
          else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
@@ -76,12 +76,12 @@ namespace SwrJit
              mant |= 0x00800000;
              for (; exp <= 0x70; mant >>= 1, exp++)
                  ;
-            exp = 0;
+            exp  = 0;
              mant = mant >> 13;
          }
          else if (exp < 0x66) // Too small to represent -> Zero
          {
-            exp = 0;
+            exp  = 0;
              mant = 0;
          }
          else
@@ -89,7 +89,7 @@ namespace SwrJit
              // Saves bits that will be shifted off for rounding
              roundBits = mant & 0x1FFFu;
              // convert exponent and mantissa to 16 bit format
-            exp = exp - 0x70;
+            exp  = exp - 0x70;
              mant = mant >> 13;
  
              // Essentially RTZ, but round up if off by only 1 lsb
@@ -129,7 +129,7 @@ namespace SwrJit
          {
              uint32_t sign = (val & 0x8000) << 16;
              uint32_t mant = (val & 0x3ff) << 13;
-            uint32_t exp = (val >> 10) & 0x1f;
+            uint32_t exp  = (val >> 10) & 0x1f;
              if ((exp == 0) && (mant != 0)) // Adjust exponent and mantissa for denormals
              {
                  mant <<= 1;
@@ -140,139 +140,94 @@ namespace SwrJit
                  }
                  mant &= (0x3ff << 13);
              }
-            exp = ((exp - 15 + 127) & 0xff) << 23;
+            exp    = ((exp - 15 + 127) & 0xff) << 23;
              result = sign | exp | mant;
          }
  
          return *(float*)&result;
      }
  
-    Constant *Builder::C(bool i)
-    {
-        return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0));
-    }
+    Constant* Builder::C(bool i) { return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0)); }
  
-    Constant *Builder::C(char i)
-    {
-        return ConstantInt::get(IRB()->getInt8Ty(), i);
-    }
+    Constant* Builder::C(char i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
  
-    Constant *Builder::C(uint8_t i)
-    {
-        return ConstantInt::get(IRB()->getInt8Ty(), i);
-    }
+    Constant* Builder::C(uint8_t i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
  
-    Constant *Builder::C(int i)
-    {
-        return ConstantInt::get(IRB()->getInt32Ty(), i);
-    }
+    Constant* Builder::C(int i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
  
-    Constant *Builder::C(int64_t i)
-    {
-        return ConstantInt::get(IRB()->getInt64Ty(), i);
-    }
+    Constant* Builder::C(int64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
  
-    Constant *Builder::C(uint16_t i)
-    {
-        return ConstantInt::get(mInt16Ty,i);
-    }
+    Constant* Builder::C(uint16_t i) { return ConstantInt::get(mInt16Ty, i); }
  
-    Constant *Builder::C(uint32_t i)
-    {
-        return ConstantInt::get(IRB()->getInt32Ty(), i);
-    }
+    Constant* Builder::C(uint32_t i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
  
-    Constant *Builder::C(uint64_t i)
-    {
-        return ConstantInt::get(IRB()->getInt64Ty(), i);
-    }
+    Constant* Builder::C(uint64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
  
-    Constant *Builder::C(float i)
-    {
-        return ConstantFP::get(IRB()->getFloatTy(), i);
-    }
+    Constant* Builder::C(float i) { return ConstantFP::get(IRB()->getFloatTy(), i); }
  
-    Constant *Builder::PRED(bool pred)
+    Constant* Builder::PRED(bool pred)
      {
          return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0));
      }
  
-    Value *Builder::VIMMED1(int i)
+    Value* Builder::VIMMED1(int i)
      {
          return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
      }
  
-    Value *Builder::VIMMED1_16(int i)
+    Value* Builder::VIMMED1_16(int i)
      {
          return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
      }
  
-    Value *Builder::VIMMED1(uint32_t i)
+    Value* Builder::VIMMED1(uint32_t i)
      {
          return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
      }
  
-    Value *Builder::VIMMED1_16(uint32_t i)
+    Value* Builder::VIMMED1_16(uint32_t i)
      {
          return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
      }
  
-    Value *Builder::VIMMED1(float i)
+    Value* Builder::VIMMED1(float i)
      {
          return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
      }
  
-    Value *Builder::VIMMED1_16(float i)
+    Value* Builder::VIMMED1_16(float i)
      {
          return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i)));
      }
  
-    Value *Builder::VIMMED1(bool i)
+    Value* Builder::VIMMED1(bool i)
      {
          return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
      }
  
-    Value *Builder::VIMMED1_16(bool i)
+    Value* Builder::VIMMED1_16(bool i)
      {
          return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
      }
  
-    Value *Builder::VUNDEF_IPTR()
-    {
-        return UndefValue::get(VectorType::get(mInt32PtrTy,mVWidth));
-    }
+    Value* Builder::VUNDEF_IPTR() { return UndefValue::get(VectorType::get(mInt32PtrTy, mVWidth)); }
  
-    Value *Builder::VUNDEF(Type* t)
-    {
-        return UndefValue::get(VectorType::get(t, mVWidth));
-    }
+    Value* Builder::VUNDEF(Type* t) { return UndefValue::get(VectorType::get(t, mVWidth)); }
  
-    Value *Builder::VUNDEF_I()
-    {
-        return UndefValue::get(VectorType::get(mInt32Ty, mVWidth));
-    }
+    Value* Builder::VUNDEF_I() { return UndefValue::get(VectorType::get(mInt32Ty, mVWidth)); }
  
-    Value *Builder::VUNDEF_I_16()
-    {
-        return UndefValue::get(VectorType::get(mInt32Ty, mVWidth16));
-    }
+    Value* Builder::VUNDEF_I_16() { return UndefValue::get(VectorType::get(mInt32Ty, mVWidth16)); }
  
-    Value *Builder::VUNDEF_F()
-    {
-        return UndefValue::get(VectorType::get(mFP32Ty, mVWidth));
-    }
+    Value* Builder::VUNDEF_F() { return UndefValue::get(VectorType::get(mFP32Ty, mVWidth)); }
  
-    Value *Builder::VUNDEF_F_16()
-    {
-        return UndefValue::get(VectorType::get(mFP32Ty, mVWidth16));
-    }
+    Value* Builder::VUNDEF_F_16() { return UndefValue::get(VectorType::get(mFP32Ty, mVWidth16)); }
  
-    Value *Builder::VUNDEF(Type *ty, uint32_t size)
+    Value* Builder::VUNDEF(Type* ty, uint32_t size)
      {
          return UndefValue::get(VectorType::get(ty, size));
      }
  
-    Value *Builder::VBROADCAST(Value *src, const llvm::Twine& name)
+    Value* Builder::VBROADCAST(Value* src, const llvm::Twine& name)
      {
          // check if src is already a vector
          if (src->getType()->isVectorTy())
@@ -283,7 +238,7 @@ namespace SwrJit
          return VECTOR_SPLAT(mVWidth, src, name);
      }
  
-    Value *Builder::VBROADCAST_16(Value *src)
+    Value* Builder::VBROADCAST_16(Value* src)
      {
          // check if src is already a vector
          if (src->getType()->isVectorTy())
@@ -297,18 +252,20 @@ namespace SwrJit
      uint32_t Builder::IMMED(Value* v)
      {
          SWR_ASSERT(isa<ConstantInt>(v));
-        ConstantInt *pValConst = cast<ConstantInt>(v);
+        ConstantInt* pValConst = cast<ConstantInt>(v);
          return pValConst->getZExtValue();
      }
  
      int32_t Builder::S_IMMED(Value* v)
      {
          SWR_ASSERT(isa<ConstantInt>(v));
-        ConstantInt *pValConst = cast<ConstantInt>(v);
+        ConstantInt* pValConst = cast<ConstantInt>(v);
          return pValConst->getSExtValue();
      }
  
-    CallInst *Builder::CALL(Value *Callee, const std::initializer_list<Value*> &argsList, const llvm::Twine& name)
+    CallInst* Builder::CALL(Value*                               Callee,
+                            const std::initializer_list<Value*>& argsList,
+                            const llvm::Twine&                   name)
      {
          std::vector<Value*> args;
          for (auto arg : argsList)
@@ -316,14 +273,14 @@ namespace SwrJit
          return CALLA(Callee, args, name);
      }
  
-    CallInst *Builder::CALL(Value *Callee, Value* arg)
+    CallInst* Builder::CALL(Value* Callee, Value* arg)
      {
          std::vector<Value*> args;
          args.push_back(arg);
          return CALLA(Callee, args);
      }
  
-    CallInst *Builder::CALL2(Value *Callee, Value* arg1, Value* arg2)
+    CallInst* Builder::CALL2(Value* Callee, Value* arg1, Value* arg2)
      {
          std::vector<Value*> args;
          args.push_back(arg1);
@@ -331,7 +288,7 @@ namespace SwrJit
          return CALLA(Callee, args);
      }
  
-    CallInst *Builder::CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3)
+    CallInst* Builder::CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3)
      {
          std::vector<Value*> args;
          args.push_back(arg1);
@@ -340,15 +297,15 @@ namespace SwrJit
          return CALLA(Callee, args);
      }
  
-    Value *Builder::VRCP(Value *va, const llvm::Twine& name)
+    Value* Builder::VRCP(Value* va, const llvm::Twine& name)
      {
-        return FDIV(VIMMED1(1.0f), va, name);  // 1 / a
+        return FDIV(VIMMED1(1.0f), va, name); // 1 / a
      }
  
-    Value *Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY)
+    Value* Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY)
      {
          Value* vOut = FMADDPS(vA, vX, vC);
-        vOut = FMADDPS(vB, vY, vOut);
+        vOut        = FMADDPS(vB, vY, vOut);
          return vOut;
      }
  
@@ -362,7 +319,8 @@ namespace SwrJit
      ///   result from a GEP, printing out the pointer to memory
      /// @param printStr - constant string to print, which includes format specifiers
      /// @param printArgs - initializer list of Value*'s to print to std out
-    CallInst *Builder::PRINT(const std::string &printStr,const std::initializer_list<Value*> &printArgs)
+    CallInst* Builder::PRINT(const std::string&                   printStr,
+                             const std::initializer_list<Value*>& printArgs)
      {
          // push the arguments to CallPrint into a vector
          std::vector<Value*> printCallArgs;
@@ -370,15 +328,15 @@ namespace SwrJit
          printCallArgs.resize(1);
  
          // search through the format string for special processing
-        size_t pos = 0;
+        size_t      pos = 0;
          std::string tempStr(printStr);
-        pos = tempStr.find('%', pos);
+        pos    = tempStr.find('%', pos);
          auto v = printArgs.begin();
  
          while ((pos != std::string::npos) && (v != printArgs.end()))
          {
-            Value* pArg = *v;
-            Type* pType = pArg->getType();
+            Value* pArg  = *v;
+            Type*  pType = pArg->getType();
  
              if (pType->isVectorTy())
              {
@@ -386,7 +344,7 @@ namespace SwrJit
  
                  if (toupper(tempStr[pos + 1]) == 'X')
                  {
-                    tempStr[pos] = '0';
+                    tempStr[pos]     = '0';
                      tempStr[pos + 1] = 'x';
                      tempStr.insert(pos + 2, "%08X ");
                      pos += 7;
@@ -410,9 +368,11 @@ namespace SwrJit
                      {
                          tempStr.insert(pos, std::string("%f "));
                          pos += 3;
-                        printCallArgs.push_back(FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
+                        printCallArgs.push_back(
+                            FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
                      }
-                    printCallArgs.push_back(FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
+                    printCallArgs.push_back(
+                        FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
                  }
                  else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy()))
                  {
@@ -421,9 +381,11 @@ namespace SwrJit
                      {
                          tempStr.insert(pos, std::string("%d "));
                          pos += 3;
-                        printCallArgs.push_back(S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+                        printCallArgs.push_back(
+                            S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
                      }
-                    printCallArgs.push_back(S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+                    printCallArgs.push_back(
+                        S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
                  }
                  else if ((tempStr[pos + 1] == 'u') && (pContainedType->isIntegerTy()))
                  {
@@ -432,9 +394,11 @@ namespace SwrJit
                      {
                          tempStr.insert(pos, std::string("%d "));
                          pos += 3;
-                        printCallArgs.push_back(Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+                        printCallArgs.push_back(
+                            Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
                      }
-                    printCallArgs.push_back(Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
+                    printCallArgs.push_back(
+                        Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
                  }
              }
              else
@@ -464,89 +428,82 @@ namespace SwrJit
          }
  
          // create global variable constant string
-        Constant *constString = ConstantDataArray::getString(JM()->mContext,tempStr,true);
-        GlobalVariable *gvPtr = new GlobalVariable(constString->getType(),true,GlobalValue::InternalLinkage,constString,"printStr");
+        Constant*       constString = ConstantDataArray::getString(JM()->mContext, tempStr, true);
+        GlobalVariable* gvPtr       = new GlobalVariable(
+            constString->getType(), true, GlobalValue::InternalLinkage, constString, "printStr");
          JM()->mpCurrentModule->getGlobalList().push_back(gvPtr);
  
          // get a pointer to the first character in the constant string array
-        std::vector<Constant*> geplist{C(0),C(0)};
-        Constant *strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr,geplist,false);
+        std::vector<Constant*> geplist{C(0), C(0)};
+        Constant* strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr, geplist, false);
  
          // insert the pointer to the format string in the argument vector
          printCallArgs[0] = strGEP;
  
          // get pointer to CallPrint function and insert decl into the module if needed
          std::vector<Type*> args;
-        args.push_back(PointerType::get(mInt8Ty,0));
-        FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext),args,true);
-        Function *callPrintFn = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
+        args.push_back(PointerType::get(mInt8Ty, 0));
+        FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, true);
+        Function*     callPrintFn =
+            cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
  
          // if we haven't yet added the symbol to the symbol table
-        if((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
+        if ((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
          {
-            sys::DynamicLibrary::AddSymbol("CallPrint", (void *)&CallPrint);
+            sys::DynamicLibrary::AddSymbol("CallPrint", (void*)&CallPrint);
          }
  
          // insert a call to CallPrint
-        return CALLA(callPrintFn,printCallArgs);
+        return CALLA(callPrintFn, printCallArgs);
      }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief Wrapper around PRINT with initializer list.
-    CallInst* Builder::PRINT(const std::string &printStr)
-    {
-        return PRINT(printStr, {});
-    }
+    CallInst* Builder::PRINT(const std::string& printStr) { return PRINT(printStr, {}); }
  
-    Value *Builder::EXTRACT_16(Value *x, uint32_t imm)
+    Value* Builder::EXTRACT_16(Value* x, uint32_t imm)
      {
          if (imm == 0)
          {
-            return VSHUFFLE(x, UndefValue::get(x->getType()), { 0, 1, 2, 3, 4, 5, 6, 7 });
+            return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7});
          }
          else
          {
-            return VSHUFFLE(x, UndefValue::get(x->getType()), { 8, 9, 10, 11, 12, 13, 14, 15 });
+            return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15});
          }
      }
  
-    Value *Builder::JOIN_16(Value *a, Value *b)
+    Value* Builder::JOIN_16(Value* a, Value* b)
      {
-        return VSHUFFLE(a, b, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+        return VSHUFFLE(a, b, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
      }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief convert x86 <N x float> mask to llvm <N x i1> mask
-    Value *Builder::MASK(Value *vmask)
+    Value* Builder::MASK(Value* vmask)
      {
-        Value *src = BITCAST(vmask, mSimdInt32Ty);
+        Value* src = BITCAST(vmask, mSimdInt32Ty);
          return ICMP_SLT(src, VIMMED1(0));
      }
  
-    Value *Builder::MASK_16(Value *vmask)
+    Value* Builder::MASK_16(Value* vmask)
      {
-        Value *src = BITCAST(vmask, mSimd16Int32Ty);
+        Value* src = BITCAST(vmask, mSimd16Int32Ty);
          return ICMP_SLT(src, VIMMED1_16(0));
      }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask
-    Value *Builder::VMASK(Value *mask)
-    {
-        return S_EXT(mask, mSimdInt32Ty);
-    }
+    Value* Builder::VMASK(Value* mask) { return S_EXT(mask, mSimdInt32Ty); }
  
-    Value *Builder::VMASK_16(Value *mask)
-    {
-        return S_EXT(mask, mSimd16Int32Ty);
-    }
+    Value* Builder::VMASK_16(Value* mask) { return S_EXT(mask, mSimd16Int32Ty); }
  
      /// @brief Convert <Nxi1> llvm mask to integer
-    Value *Builder::VMOVMSK(Value* mask)
+    Value* Builder::VMOVMSK(Value* mask)
      {
          SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
          uint32_t numLanes = mask->getType()->getVectorNumElements();
-        Value* i32Result;
+        Value*   i32Result;
          if (numLanes == 8)
          {
              i32Result = BITCAST(mask, mInt8Ty);
@@ -564,18 +521,18 @@ namespace SwrJit
      }
  
      //////////////////////////////////////////////////////////////////////////
-    /// @brief Generate a VPSHUFB operation in LLVM IR.  If not  
+    /// @brief Generate a VPSHUFB operation in LLVM IR.  If not
      /// supported on the underlying platform, emulate it
      /// @param a - 256bit SIMD(32x8bit) of 8bit integer values
      /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values
-    /// Byte masks in lower 128 lane of b selects 8 bit values from lower 
-    /// 128bits of a, and vice versa for the upper lanes.  If the mask 
+    /// Byte masks in lower 128 lane of b selects 8 bit values from lower
+    /// 128bits of a, and vice versa for the upper lanes.  If the mask
      /// value is negative, '0' is inserted.
-    Value *Builder::PSHUFB(Value* a, Value* b)
+    Value* Builder::PSHUFB(Value* a, Value* b)
      {
          Value* res;
          // use avx2 pshufb instruction if available
-        if(JM()->mArch.AVX2())
+        if (JM()->mArch.AVX2())
          {
              res = VPSHUFB(a, b);
          }
@@ -589,22 +546,26 @@ namespace SwrJit
  
              // insert an 8 bit value from the high and low lanes of a per loop iteration
              numElms /= 2;
-            for(uint32_t i = 0; i < numElms; i++)
+            for (uint32_t i = 0; i < numElms; i++)
              {
-                ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i));
+                ConstantInt* cLow128b  = cast<ConstantInt>(cB->getAggregateElement(i));
                  ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms));
  
                  // extract values from constant mask
-                char valLow128bLane =  (char)(cLow128b->getSExtValue());
+                char valLow128bLane  = (char)(cLow128b->getSExtValue());
                  char valHigh128bLane = (char)(cHigh128b->getSExtValue());
  
                  Value* insertValLow128b;
                  Value* insertValHigh128b;
  
                  // if the mask value is negative, insert a '0' in the respective output position
-                // otherwise, lookup the value at mask position (bits 3..0 of the respective mask byte) in a and insert in output vector
-                insertValLow128b = (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
-                insertValHigh128b = (valHigh128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
+                // otherwise, lookup the value at mask position (bits 3..0 of the respective mask
+                // byte) in a and insert in output vector
+                insertValLow128b =
+                    (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
+                insertValHigh128b = (valHigh128bLane < 0)
+                                        ? C((char)0)
+                                        : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
  
                  vShuf = VINSERT(vShuf, insertValLow128b, i);
                  vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms));
@@ -615,11 +576,11 @@ namespace SwrJit
      }
  
      //////////////////////////////////////////////////////////////////////////
-    /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32 
+    /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
      /// bits)in LLVM IR.  If not supported on the underlying platform, emulate it
-    /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values.  Only 
+    /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values.  Only
      /// lower 8 values are used.
-    Value *Builder::PMOVSXBD(Value* a)
+    Value* Builder::PMOVSXBD(Value* a)
      {
          // VPMOVSXBD output type
          Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
@@ -628,10 +589,10 @@ namespace SwrJit
      }
  
      //////////////////////////////////////////////////////////////////////////
-    /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32 
+    /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
      /// bits)in LLVM IR.  If not supported on the underlying platform, emulate it
      /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
-    Value *Builder::PMOVSXWD(Value* a)
+    Value* Builder::PMOVSXWD(Value* a)
      {
          // VPMOVSXWD output type
          Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
@@ -643,7 +604,7 @@ namespace SwrJit
      /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
      /// in LLVM IR.  If not supported on the underlying platform, emulate it
      /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
-    Value *Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
+    Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
      {
          if (JM()->mArch.F16C())
          {
@@ -651,20 +612,22 @@ namespace SwrJit
          }
          else
          {
-            FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty);
-            Function* pCvtPh2Ps = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy));
+            FunctionType* pFuncTy   = FunctionType::get(mFP32Ty, mInt16Ty);
+            Function*     pCvtPh2Ps = cast<Function>(
+                JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy));
  
              if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr)
              {
-                sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32", (void *)&ConvertFloat16ToFloat32);
+                sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32",
+                                               (void*)&ConvertFloat16ToFloat32);
              }
  
              Value* pResult = UndefValue::get(mSimdFP32Ty);
              for (uint32_t i = 0; i < mVWidth; ++i)
              {
-                Value* pSrc = VEXTRACT(a, C(i));
+                Value* pSrc  = VEXTRACT(a, C(i));
                  Value* pConv = CALL(pCvtPh2Ps, std::initializer_list<Value*>{pSrc});
-                pResult = VINSERT(pResult, pConv, C(i));
+                pResult      = VINSERT(pResult, pConv, C(i));
              }
  
              pResult->setName(name);
@@ -676,7 +639,7 @@ namespace SwrJit
      /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion)
      /// in LLVM IR.  If not supported on the underlying platform, emulate it
      /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
-    Value *Builder::CVTPS2PH(Value* a, Value* rounding)
+    Value* Builder::CVTPS2PH(Value* a, Value* rounding)
      {
          if (JM()->mArch.F16C())
          {
@@ -685,45 +648,47 @@ namespace SwrJit
          else
          {
              // call scalar C function for now
-            FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty);
-            Function* pCvtPs2Ph = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
+            FunctionType* pFuncTy   = FunctionType::get(mInt16Ty, mFP32Ty);
+            Function*     pCvtPs2Ph = cast<Function>(
+                JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
  
              if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr)
              {
-                sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16", (void *)&ConvertFloat32ToFloat16);
+                sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16",
+                                               (void*)&ConvertFloat32ToFloat16);
              }
  
              Value* pResult = UndefValue::get(mSimdInt16Ty);
              for (uint32_t i = 0; i < mVWidth; ++i)
              {
-                Value* pSrc = VEXTRACT(a, C(i));
+                Value* pSrc  = VEXTRACT(a, C(i));
                  Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
-                pResult = VINSERT(pResult, pConv, C(i));
+                pResult      = VINSERT(pResult, pConv, C(i));
              }
  
              return pResult;
          }
      }
  
-    Value *Builder::PMAXSD(Value* a, Value* b)
+    Value* Builder::PMAXSD(Value* a, Value* b)
      {
          Value* cmp = ICMP_SGT(a, b);
          return SELECT(cmp, a, b);
      }
  
-    Value *Builder::PMINSD(Value* a, Value* b)
+    Value* Builder::PMINSD(Value* a, Value* b)
      {
          Value* cmp = ICMP_SLT(a, b);
          return SELECT(cmp, a, b);
      }
  
-    Value *Builder::PMAXUD(Value* a, Value* b)
+    Value* Builder::PMAXUD(Value* a, Value* b)
      {
          Value* cmp = ICMP_UGT(a, b);
          return SELECT(cmp, a, b);
      }
  
-    Value *Builder::PMINUD(Value* a, Value* b)
+    Value* Builder::PMINUD(Value* a, Value* b)
      {
          Value* cmp = ICMP_ULT(a, b);
          return SELECT(cmp, a, b);
@@ -733,65 +698,65 @@ namespace SwrJit
      Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
      {
          auto saveIP = IRB()->saveIP();
-        IRB()->SetInsertPoint(&pFunc->getEntryBlock(),
-                              pFunc->getEntryBlock().begin());
+        IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
          Value* pAlloca = ALLOCA(pType);
-        if (saveIP.isSet()) IRB()->restoreIP(saveIP);
+        if (saveIP.isSet())
+            IRB()->restoreIP(saveIP);
          return pAlloca;
      }
  
      Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize)
      {
          auto saveIP = IRB()->saveIP();
-        IRB()->SetInsertPoint(&pFunc->getEntryBlock(),
-            pFunc->getEntryBlock().begin());
+        IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
          Value* pAlloca = ALLOCA(pType, pArraySize);
-        if (saveIP.isSet()) IRB()->restoreIP(saveIP);
+        if (saveIP.isSet())
+            IRB()->restoreIP(saveIP);
          return pAlloca;
      }
  
      Value* Builder::VABSPS(Value* a)
      {
-        Value* asInt = BITCAST(a, mSimdInt32Ty);
+        Value* asInt  = BITCAST(a, mSimdInt32Ty);
          Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty);
          return result;
      }
  
-    Value *Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
+    Value* Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
      {
-        Value *lowCmp = ICMP_SLT(src, low);
-        Value *ret = SELECT(lowCmp, low, src);
+        Value* lowCmp = ICMP_SLT(src, low);
+        Value* ret    = SELECT(lowCmp, low, src);
  
-        Value *highCmp = ICMP_SGT(ret, high);
-        ret = SELECT(highCmp, high, ret, name);
+        Value* highCmp = ICMP_SGT(ret, high);
+        ret            = SELECT(highCmp, high, ret, name);
  
          return ret;
      }
  
-    Value *Builder::FCLAMP(Value* src, Value* low, Value* high)
+    Value* Builder::FCLAMP(Value* src, Value* low, Value* high)
      {
-        Value *lowCmp = FCMP_OLT(src, low);
-        Value *ret = SELECT(lowCmp, low, src);
+        Value* lowCmp = FCMP_OLT(src, low);
+        Value* ret    = SELECT(lowCmp, low, src);
  
-        Value *highCmp = FCMP_OGT(ret, high);
-        ret = SELECT(highCmp, high, ret);
+        Value* highCmp = FCMP_OGT(ret, high);
+        ret            = SELECT(highCmp, high, ret);
  
          return ret;
      }
  
-    Value *Builder::FCLAMP(Value* src, float low, float high)
+    Value* Builder::FCLAMP(Value* src, float low, float high)
      {
          Value* result = VMAXPS(src, VIMMED1(low));
-        result = VMINPS(result, VIMMED1(high));
+        result        = VMINPS(result, VIMMED1(high));
  
          return result;
      }
  
-    Value *Builder::FMADDPS(Value* a, Value* b, Value* c)
+    Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
      {
          Value* vOut;
          // use FMADs if available
-        if(JM()->mArch.AVX2())
+        if (JM()->mArch.AVX2())
          {
              vOut = VFMADDPS(a, b, c);
          }
@@ -804,39 +769,40 @@ namespace SwrJit
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief pop count on vector mask (e.g. <8 x i1>)
-    Value* Builder::VPOPCNT(Value* a)
-    {
-        return POPCNT(VMOVMSK(a));
-    }
+    Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); }
  
      //////////////////////////////////////////////////////////////////////////
      /// @brief C functions called by LLVM IR
      //////////////////////////////////////////////////////////////////////////
  
-    Value *Builder::VEXTRACTI128(Value* a, Constant* imm8)
+    Value* Builder::VEXTRACTI128(Value* a, Constant* imm8)
      {
-        bool flag = !imm8->isZeroValue();
-        SmallVector<Constant*,8> idx;
-        for (unsigned i = 0; i < mVWidth / 2; i++) {
+        bool                      flag = !imm8->isZeroValue();
+        SmallVector<Constant*, 8> idx;
+        for (unsigned i = 0; i < mVWidth / 2; i++)
+        {
              idx.push_back(C(flag ? i + mVWidth / 2 : i));
          }
          return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
      }
  
-    Value *Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
+    Value* Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
      {
-        bool flag = !imm8->isZeroValue();
-        SmallVector<Constant*,8> idx;
-        for (unsigned i = 0; i < mVWidth; i++) {
+        bool                      flag = !imm8->isZeroValue();
+        SmallVector<Constant*, 8> idx;
+        for (unsigned i = 0; i < mVWidth; i++)
+        {
              idx.push_back(C(i));
          }
-        Value *inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
+        Value* inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
  
-        SmallVector<Constant*,8> idx2;
-        for (unsigned i = 0; i < mVWidth / 2; i++) {
+        SmallVector<Constant*, 8> idx2;
+        for (unsigned i = 0; i < mVWidth / 2; i++)
+        {
              idx2.push_back(C(flag ? i : i + mVWidth));
          }
-        for (unsigned i = mVWidth / 2; i < mVWidth; i++) {
+        for (unsigned i = mVWidth / 2; i < mVWidth; i++)
+        {
              idx2.push_back(C(flag ? i + mVWidth / 2 : i));
          }
          return VSHUFFLE(a, inter, ConstantVector::get(idx2));
@@ -845,45 +811,51 @@ namespace SwrJit
      // rdtsc buckets macros
      void Builder::RDTSC_START(Value* pBucketMgr, Value* pId)
      {
-        // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
-        // buckets framework when single threaded
+        // @todo due to an issue with thread local storage propagation in llvm, we can only safely
+        // call into buckets framework when single threaded
          if (KNOB_SINGLE_THREADED)
          {
              std::vector<Type*> args{
-                PointerType::get(mInt32Ty, 0),   // pBucketMgr
-                mInt32Ty                        // id
+                PointerType::get(mInt32Ty, 0), // pBucketMgr
+                mInt32Ty                       // id
              };
  
              FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
-            Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
-            if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") == nullptr)
+            Function*     pFunc   = cast<Function>(
+                JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
+            if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") ==
+                nullptr)
              {
-                sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket", (void*)&BucketManager_StartBucket);
+                sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket",
+                                               (void*)&BucketManager_StartBucket);
              }
  
-            CALL(pFunc, { pBucketMgr, pId });
+            CALL(pFunc, {pBucketMgr, pId});
          }
      }
  
      void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId)
      {
-        // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
-        // buckets framework when single threaded
+        // @todo due to an issue with thread local storage propagation in llvm, we can only safely
+        // call into buckets framework when single threaded
          if (KNOB_SINGLE_THREADED)
          {
              std::vector<Type*> args{
-                PointerType::get(mInt32Ty, 0),   // pBucketMgr
-                mInt32Ty                        // id
+                PointerType::get(mInt32Ty, 0), // pBucketMgr
+                mInt32Ty                       // id
              };
  
              FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
-            Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
-            if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") == nullptr)
+            Function*     pFunc   = cast<Function>(
+                JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
+            if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") ==
+                nullptr)
              {
-                sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket", (void*)&BucketManager_StopBucket);
+                sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket",
+                                               (void*)&BucketManager_StopBucket);
              }
  
-            CALL(pFunc, { pBucketMgr, pId });
+            CALL(pFunc, {pBucketMgr, pId});
          }
      }
  
@@ -892,14 +864,14 @@ namespace SwrJit
          if (pType->isStructTy())
          {
              uint32_t numElems = pType->getStructNumElements();
-            Type* pElemTy = pType->getStructElementType(0);
+            Type*    pElemTy  = pType->getStructElementType(0);
              return numElems * GetTypeSize(pElemTy);
          }
  
          if (pType->isArrayTy())
          {
              uint32_t numElems = pType->getArrayNumElements();
-            Type* pElemTy = pType->getArrayElementType();
+            Type*    pElemTy  = pType->getArrayElementType();
              return numElems * GetTypeSize(pElemTy);
          }
  
@@ -927,4 +899,4 @@ namespace SwrJit
          SWR_ASSERT(false, "Unimplemented type.");
          return 0;
      }
-}
+} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h

index d7732ef8c2ae6311850853ca5ba56d984f5959e3..f8701f9ba84a64227143c914ea29d7ffe7f753bf 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -1,156 +1,164 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-* 
-* @file builder_misc.h
-* 
-* @brief miscellaneous builder functions
-* 
-* Notes:
-* 
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file builder_misc.h
+ *
+ * @brief miscellaneous builder functions
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
-Constant *C(bool i);
-Constant *C(char i);
-Constant *C(uint8_t i);
-Constant *C(int i);
-Constant *C(int64_t i);
-Constant *C(uint64_t i);
-Constant *C(uint16_t i);
-Constant *C(uint32_t i);
-Constant *C(float i);
-
-template<typename Ty>
-Constant *C(const std::initializer_list<Ty> &constList)
+Constant* C(bool i);
+Constant* C(char i);
+Constant* C(uint8_t i);
+Constant* C(int i);
+Constant* C(int64_t i);
+Constant* C(uint64_t i);
+Constant* C(uint16_t i);
+Constant* C(uint32_t i);
+Constant* C(float i);
+
+template <typename Ty>
+Constant* C(const std::initializer_list<Ty>& constList)
  {
      std::vector<Constant*> vConsts;
-    for(auto i : constList) {
-
+    for (auto i : constList)
+    {
          vConsts.push_back(C((Ty)i));
      }
      return ConstantVector::get(vConsts);
  }
  
-template<typename Ty>
-Constant *CA(LLVMContext& ctx, ArrayRef<Ty> constList)
+template <typename Ty>
+Constant* CA(LLVMContext& ctx, ArrayRef<Ty> constList)
  {
      return ConstantDataArray::get(ctx, constList);
  }
  
-template<typename Ty>
-Constant *CInc(uint32_t base, uint32_t count)
+template <typename Ty>
+Constant* CInc(uint32_t base, uint32_t count)
  {
      std::vector<Constant*> vConsts;
  
-    for(uint32_t i = 0; i < count; i++) {
+    for (uint32_t i = 0; i < count; i++)
+    {
          vConsts.push_back(C((Ty)base));
          base++;
      }
      return ConstantVector::get(vConsts);
  }
  
-Constant *PRED(bool pred);
+Constant* PRED(bool pred);
  
-Value *VIMMED1(int i);
-Value *VIMMED1_16(int i);
+Value* VIMMED1(int i);
+Value* VIMMED1_16(int i);
  
-Value *VIMMED1(uint32_t i);
-Value *VIMMED1_16(uint32_t i);
+Value* VIMMED1(uint32_t i);
+Value* VIMMED1_16(uint32_t i);
  
-Value *VIMMED1(float i);
-Value *VIMMED1_16(float i);
+Value* VIMMED1(float i);
+Value* VIMMED1_16(float i);
  
-Value *VIMMED1(bool i);
-Value *VIMMED1_16(bool i);
+Value* VIMMED1(bool i);
+Value* VIMMED1_16(bool i);
  
-Value *VUNDEF(Type* t);
+Value* VUNDEF(Type* t);
  
-Value *VUNDEF_F();
-Value *VUNDEF_F_16();
+Value* VUNDEF_F();
+Value* VUNDEF_F_16();
  
-Value *VUNDEF_I();
-Value *VUNDEF_I_16();
+Value* VUNDEF_I();
+Value* VUNDEF_I_16();
  
-Value *VUNDEF(Type* ty, uint32_t size);
+Value* VUNDEF(Type* ty, uint32_t size);
  
-Value *VUNDEF_IPTR();
+Value* VUNDEF_IPTR();
  
-Value *VBROADCAST(Value *src, const llvm::Twine& name = "");
-Value *VBROADCAST_16(Value *src);
+Value* VBROADCAST(Value* src, const llvm::Twine& name = "");
+Value* VBROADCAST_16(Value* src);
  
-Value *VRCP(Value *va, const llvm::Twine& name = "");
-Value *VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY);
+Value* VRCP(Value* va, const llvm::Twine& name = "");
+Value* VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY);
  
  uint32_t IMMED(Value* i);
-int32_t S_IMMED(Value* i);
+int32_t  S_IMMED(Value* i);
  
-CallInst *CALL(Value *Callee, const std::initializer_list<Value*> &args, const llvm::Twine& name = "");
-CallInst *CALL(Value *Callee) { return CALLA(Callee); }
-CallInst *CALL(Value *Callee, Value* arg);
-CallInst *CALL2(Value *Callee, Value* arg1, Value* arg2);
-CallInst *CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3);
+CallInst*
+          CALL(Value* Callee, const std::initializer_list<Value*>& args, const llvm::Twine& name = "");
+CallInst* CALL(Value* Callee)
+{
+    return CALLA(Callee);
+}
+CallInst* CALL(Value* Callee, Value* arg);
+CallInst* CALL2(Value* Callee, Value* arg1, Value* arg2);
+CallInst* CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3);
  
-Value *MASK(Value *vmask);
-Value *MASK_16(Value *vmask);
+Value* MASK(Value* vmask);
+Value* MASK_16(Value* vmask);
  
-Value *VMASK(Value *mask);
-Value *VMASK_16(Value *mask);
+Value* VMASK(Value* mask);
+Value* VMASK_16(Value* mask);
  
-Value *VMOVMSK(Value *mask);
+Value* VMOVMSK(Value* mask);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief functions that build IR to call x86 intrinsics directly, or
  /// emulate them with other instructions if not available on the host
  //////////////////////////////////////////////////////////////////////////
  
-Value *EXTRACT_16(Value *x, uint32_t imm);
-Value *JOIN_16(Value *a, Value *b);
+Value* EXTRACT_16(Value* x, uint32_t imm);
+Value* JOIN_16(Value* a, Value* b);
  
-Value *PSHUFB(Value* a, Value* b);
-Value *PMOVSXBD(Value* a);
-Value *PMOVSXWD(Value* a);
-Value *CVTPH2PS(Value* a, const llvm::Twine& name = "");
-Value *CVTPS2PH(Value* a, Value* rounding);
-Value *PMAXSD(Value* a, Value* b);
-Value *PMINSD(Value* a, Value* b);
-Value *PMAXUD(Value* a, Value* b);
-Value *PMINUD(Value* a, Value* b);
-Value *VABSPS(Value* a);
-Value *FMADDPS(Value* a, Value* b, Value* c);
+Value* PSHUFB(Value* a, Value* b);
+Value* PMOVSXBD(Value* a);
+Value* PMOVSXWD(Value* a);
+Value* CVTPH2PS(Value* a, const llvm::Twine& name = "");
+Value* CVTPS2PH(Value* a, Value* rounding);
+Value* PMAXSD(Value* a, Value* b);
+Value* PMINSD(Value* a, Value* b);
+Value* PMAXUD(Value* a, Value* b);
+Value* PMINUD(Value* a, Value* b);
+Value* VABSPS(Value* a);
+Value* FMADDPS(Value* a, Value* b, Value* c);
  
-Value *ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name = "");
-Value *FCLAMP(Value* src, Value* low, Value* high);
-Value *FCLAMP(Value* src, float low, float high);
+Value* ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name = "");
+Value* FCLAMP(Value* src, Value* low, Value* high);
+Value* FCLAMP(Value* src, float low, float high);
  
-CallInst *PRINT(const std::string &printStr);
-CallInst *PRINT(const std::string &printStr,const std::initializer_list<Value*> &printArgs);
+CallInst* PRINT(const std::string& printStr);
+CallInst* PRINT(const std::string& printStr, const std::initializer_list<Value*>& printArgs);
  
  Value* VPOPCNT(Value* a);
  
-Value* INT3() { return DEBUGTRAP(); }
+Value* INT3()
+{
+    return DEBUGTRAP();
+}
  
  
-Value *VEXTRACTI128(Value* a, Constant* imm8);
-Value *VINSERTI128(Value* a, Value* b, Constant* imm8);
+Value* VEXTRACTI128(Value* a, Constant* imm8);
+Value* VINSERTI128(Value* a, Value* b, Constant* imm8);
  
  // rdtsc buckets macros
  void RDTSC_START(Value* pBucketMgr, Value* pId);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp

index 0abcd1a8d765d47dfc9c6d6656cf35460fc26542..b4d326ebdcc2a34ad98d236cdd0073559e6d50ef 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file fetch_jit.cpp
-*
-* @brief Implementation of the fetch jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file fetch_jit.cpp
+ *
+ * @brief Implementation of the fetch jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "jit_pch.hpp"
  #include "builder_gfx_mem.h"
  #include "jit_api.h"
@@ -54,42 +54,64 @@ enum ConversionType
  //////////////////////////////////////////////////////////////////////////
  struct FetchJit : public BuilderGfxMem
  {
-    FetchJit(JitManager* pJitMgr) :
-        BuilderGfxMem(pJitMgr)
-    {}
+    FetchJit(JitManager* pJitMgr) : BuilderGfxMem(pJitMgr) {}
  
      Function* Create(const FETCH_COMPILE_STATE& fetchState);
  
      Value* GetSimdValid32bitIndices(Value* vIndices, Value* pLastIndex);
      Value* GetSimdValid16bitIndices(Value* vIndices, Value* pLastIndex);
      Value* GetSimdValid8bitIndices(Value* vIndices, Value* pLastIndex);
-    template<typename T> Value* GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex);
+    template <typename T>
+    Value* GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex);
  
      // package up Shuffle*bpcGatherd args into a tuple for convenience
-    typedef std::tuple<Value*&, Value*, const Instruction::CastOps, const ConversionType,
-        uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4],
-        const uint32_t(&)[4]> Shuffle8bpcArgs;
-
-    void Shuffle8bpcGatherd16(Shuffle8bpcArgs &args);
-    void Shuffle8bpcGatherd(Shuffle8bpcArgs &args);
-
-    typedef std::tuple<Value*(&)[2], Value*, const Instruction::CastOps, const ConversionType,
-        uint32_t&, uint32_t&, const ComponentEnable, const ComponentControl(&)[4], Value*(&)[4]> Shuffle16bpcArgs;
-
-    void Shuffle16bpcGather16(Shuffle16bpcArgs &args);
-    void Shuffle16bpcGather(Shuffle16bpcArgs &args);
-
-    void StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4]);
-
-    Value *GenerateCompCtrlVector(const ComponentControl ctrl);
-
-    void JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value* streams, Value* vIndices, Value* pVtxOut);
+    typedef std::tuple<Value*&,
+                       Value*,
+                       const Instruction::CastOps,
+                       const ConversionType,
+                       uint32_t&,
+                       uint32_t&,
+                       const ComponentEnable,
+                       const ComponentControl (&)[4],
+                       Value* (&)[4],
+                       const uint32_t (&)[4]>
+        Shuffle8bpcArgs;
+
+    void Shuffle8bpcGatherd16(Shuffle8bpcArgs& args);
+    void Shuffle8bpcGatherd(Shuffle8bpcArgs& args);
+
+    typedef std::tuple<Value* (&)[2],
+                       Value*,
+                       const Instruction::CastOps,
+                       const ConversionType,
+                       uint32_t&,
+                       uint32_t&,
+                       const ComponentEnable,
+                       const ComponentControl (&)[4],
+                       Value* (&)[4]>
+        Shuffle16bpcArgs;
+
+    void Shuffle16bpcGather16(Shuffle16bpcArgs& args);
+    void Shuffle16bpcGather(Shuffle16bpcArgs& args);
+
+    void StoreVertexElements(Value*         pVtxOut,
+                             const uint32_t outputElt,
+                             const uint32_t numEltsToStore,
+                             Value* (&vVertexElements)[4]);
+
+    Value* GenerateCompCtrlVector(const ComponentControl ctrl);
+
+    void JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
+                           Value*                     streams,
+                           Value*                     vIndices,
+                           Value*                     pVtxOut);
  
      bool IsOddFormat(SWR_FORMAT format);
      bool IsUniformFormat(SWR_FORMAT format);
      void UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4]);
-    void CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
-    void ConvertFormat(SWR_FORMAT format, Value *texels[4]);
+    void CreateGatherOddFormats(
+        SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4]);
+    void ConvertFormat(SWR_FORMAT format, Value* texels[4]);
  
      Value* mpWorkerData;
      Value* mpFetchInfo;
@@ -100,25 +122,29 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
      std::stringstream fnName("FCH_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
      fnName << ComputeCRC(0, &fetchState, sizeof(fetchState));
  
-    Function*    fetch = Function::Create(JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
-    BasicBlock*    entry = BasicBlock::Create(JM()->mContext, "entry", fetch);
+    Function* fetch = Function::Create(
+        JM()->mFetchShaderTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
+    BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", fetch);
  
      fetch->getParent()->setModuleIdentifier(fetch->getName());
  
      IRB()->SetInsertPoint(entry);
  
-    auto    argitr = fetch->arg_begin();
+    auto argitr = fetch->arg_begin();
  
      // Fetch shader arguments
-    Value* privateContext = &*argitr; ++argitr;
+    Value* privateContext = &*argitr;
+    ++argitr;
      privateContext->setName("privateContext");
      SetPrivateContext(privateContext);
  
-    mpWorkerData = &*argitr; ++argitr;
+    mpWorkerData = &*argitr;
+    ++argitr;
      mpWorkerData->setName("pWorkerData");
-    mpFetchInfo = &*argitr; ++argitr;
+    mpFetchInfo = &*argitr;
+    ++argitr;
      mpFetchInfo->setName("fetchInfo");
-    Value*    pVtxOut = &*argitr;
+    Value* pVtxOut = &*argitr;
      pVtxOut->setName("vtxOutput");
  
      uint32_t baseWidth = mVWidth;
@@ -133,71 +159,77 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
      pVtxOut = BITCAST(pVtxOut, PointerType::get(mSimdFP32Ty, 0));
  
      // SWR_FETCH_CONTEXT::pStreams
-    Value*    streams = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_pStreams});
+    Value* streams = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_pStreams});
      streams->setName("pStreams");
  
      // SWR_FETCH_CONTEXT::pIndices
-    Value*    indices = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpIndices});
+    Value* indices = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_xpIndices});
      indices->setName("pIndices");
  
      // SWR_FETCH_CONTEXT::pLastIndex
-    Value*    pLastIndex = LOAD(mpFetchInfo,{0, SWR_FETCH_CONTEXT_xpLastIndex});
+    Value* pLastIndex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_xpLastIndex});
      pLastIndex->setName("pLastIndex");
  
      Value* vIndices;
-    switch(fetchState.indexType)
+    switch (fetchState.indexType)
      {
-        case R8_UINT:
-            indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
-            if(fetchState.bDisableIndexOOBCheck)
-            {
-                vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
-                vIndices = Z_EXT(vIndices, mSimdInt32Ty);
-            }
-            else
-            {
-                vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
-            }
-            break;
-        case R16_UINT: 
-            if(fetchState.bDisableIndexOOBCheck)
-            {
-                vIndices = LOAD(BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)), {(uint32_t)0});
-                vIndices = Z_EXT(vIndices, mSimdInt32Ty);
-            }
-            else
-            {
-                vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
-            }
-            break;
-        case R32_UINT:
-            (fetchState.bDisableIndexOOBCheck) ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH)
-                                               : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
-            break; // incoming type is already 32bit int
-        default:
-            SWR_INVALID("Unsupported index type");
-            vIndices = nullptr;
-            break;
+    case R8_UINT:
+        indices = BITCAST(indices, Type::getInt8PtrTy(JM()->mContext, 0));
+        if (fetchState.bDisableIndexOOBCheck)
+        {
+            vIndices = LOAD(
+                BITCAST(indices, PointerType::get(VectorType::get(mInt8Ty, mpJitMgr->mVWidth), 0)),
+                {(uint32_t)0});
+            vIndices = Z_EXT(vIndices, mSimdInt32Ty);
+        }
+        else
+        {
+            vIndices = GetSimdValid8bitIndices(indices, pLastIndex);
+        }
+        break;
+    case R16_UINT:
+        if (fetchState.bDisableIndexOOBCheck)
+        {
+            vIndices = LOAD(
+                BITCAST(indices, PointerType::get(VectorType::get(mInt16Ty, mpJitMgr->mVWidth), 0)),
+                {(uint32_t)0});
+            vIndices = Z_EXT(vIndices, mSimdInt32Ty);
+        }
+        else
+        {
+            vIndices = GetSimdValid16bitIndices(indices, pLastIndex);
+        }
+        break;
+    case R32_UINT:
+        (fetchState.bDisableIndexOOBCheck)
+            ? vIndices = LOAD(indices, "", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH)
+            : vIndices = GetSimdValid32bitIndices(indices, pLastIndex);
+        break; // incoming type is already 32bit int
+    default:
+        SWR_INVALID("Unsupported index type");
+        vIndices = nullptr;
+        break;
      }
  
-    if(fetchState.bForceSequentialAccessEnable)
+    if (fetchState.bForceSequentialAccessEnable)
      {
-        Value* pOffsets = mVWidth == 8 ? C({ 0, 1, 2, 3, 4, 5, 6, 7 }) : 
-            C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+        Value* pOffsets = mVWidth == 8 ? C({0, 1, 2, 3, 4, 5, 6, 7})
+                                       : C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
  
          // VertexData buffers are accessed sequentially, the index is equal to the vertex number
-        vIndices = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex }));
+        vIndices = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}));
          vIndices = ADD(vIndices, pOffsets);
      }
  
      Value* vVertexId = vIndices;
      if (fetchState.bVertexIDOffsetEnable)
      {
-        // Assuming one of baseVertex or startVertex is 0, so adding both should be functionally correct
-        Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_BaseVertex }));
-        Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_StartVertex }));
-        vVertexId = ADD(vIndices, vBaseVertex);
-        vVertexId = ADD(vVertexId, vStartVertex);
+        // Assuming one of baseVertex or startVertex is 0, so adding both should be functionally
+        // correct
+        Value* vBaseVertex  = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex}));
+        Value* vStartVertex = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex}));
+        vVertexId           = ADD(vIndices, vBaseVertex);
+        vVertexId           = ADD(vVertexId, vStartVertex);
      }
  
      // store out vertex IDs
@@ -206,30 +238,30 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
          // store out in simd8 halves until core supports 16-wide natively
          auto vVertexIdLo = EXTRACT_16(vVertexId, 0);
          auto vVertexIdHi = EXTRACT_16(vVertexId, 1);
-        STORE(vVertexIdLo, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
-        STORE(vVertexIdHi, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 }));
+        STORE(vVertexIdLo, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID}));
+        STORE(vVertexIdHi, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID2}));
      }
      else if (mVWidth == 8)
      {
-        STORE(vVertexId, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID }));
+        STORE(vVertexId, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID}));
      }
  
      // store out cut mask if enabled
      if (fetchState.bEnableCutIndex)
      {
          Value* vCutIndex = VIMMED1(fetchState.cutIndex);
-        Value* cutMask = VMASK(ICMP_EQ(vIndices, vCutIndex));
-        
+        Value* cutMask   = VMASK(ICMP_EQ(vIndices, vCutIndex));
+
          if (mVWidth == 16)
          {
              auto cutMaskLo = EXTRACT_16(cutMask, 0);
              auto cutMaskHi = EXTRACT_16(cutMask, 1);
-            STORE(cutMaskLo, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask }));
-            STORE(cutMaskHi, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask2 }));
+            STORE(cutMaskLo, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask}));
+            STORE(cutMaskHi, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask2}));
          }
          else if (mVWidth == 8)
          {
-            STORE(cutMask, GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CutMask }));
+            STORE(cutMask, GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CutMask}));
          }
      }
  
@@ -279,7 +311,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
  #if USE_SIMD16_SHADERS
      SetTargetWidth(baseWidth);
  #endif
- 
+
      return fetch;
  }
  
@@ -297,9 +329,9 @@ bool FetchJit::IsOddFormat(SWR_FORMAT format)
  // format is uniform if all components are the same size and type
  bool FetchJit::IsUniformFormat(SWR_FORMAT format)
  {
-    const SWR_FORMAT_INFO& info = GetFormatInfo(format);
-    uint32_t bpc0 = info.bpc[0];
-    uint32_t type0 = info.type[0];
+    const SWR_FORMAT_INFO& info  = GetFormatInfo(format);
+    uint32_t               bpc0  = info.bpc[0];
+    uint32_t               type0 = info.type[0];
  
      for (uint32_t c = 1; c < info.numComps; ++c)
      {
@@ -323,10 +355,10 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[
      for (uint32_t c = 0; c < info.numComps; ++c)
      {
          uint32_t swizzledIndex = info.swizzle[c];
-        uint32_t compBits = info.bpc[c];
-        uint32_t bitmask = ((1 << compBits) - 1) << bitOffset;
-        Value* comp = AND(vInput, bitmask);
-        comp = LSHR(comp, bitOffset);
+        uint32_t compBits      = info.bpc[c];
+        uint32_t bitmask       = ((1 << compBits) - 1) << bitOffset;
+        Value*   comp          = AND(vInput, bitmask);
+        comp                   = LSHR(comp, bitOffset);
  
          result[swizzledIndex] = comp;
          bitOffset += compBits;
@@ -336,14 +368,15 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[
  // gather for odd component size formats
  // gather SIMD full pixels per lane then shift/mask to move each component to their
  // own vector
-void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4])
+void FetchJit::CreateGatherOddFormats(
+    SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4])
  {
-    const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+    const SWR_FORMAT_INFO& info = GetFormatInfo(format);
  
      // only works if pixel size is <= 32bits
      SWR_ASSERT(info.bpp <= 32);
  
-    Value *pGather;
+    Value* pGather;
      if (info.bpp == 32)
      {
          pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask);
@@ -351,17 +384,17 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pB
      else
      {
          // Can't use 32-bit gather for items less than 32-bits, could cause page faults.
-        Value *pMem = ALLOCA(mSimdInt32Ty);
+        Value* pMem = ALLOCA(mSimdInt32Ty);
          STORE(VIMMED1(0u), pMem);
  
-        pBase = BITCAST(pBase, PointerType::get(mInt8Ty, 0));
+        pBase          = BITCAST(pBase, PointerType::get(mInt8Ty, 0));
          Value* pDstMem = BITCAST(pMem, mInt32PtrTy);
  
          for (uint32_t lane = 0; lane < mVWidth; ++lane)
          {
              // Get index
              Value* index = VEXTRACT(pOffsets, C(lane));
-            Value* mask = VEXTRACT(pMask, C(lane));
+            Value* mask  = VEXTRACT(pMask, C(lane));
              switch (info.bpp)
              {
              case 8:
@@ -418,9 +451,9 @@ void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pB
      pResult[3] = BITCAST(pResult[3], mSimdFP32Ty);
  }
  
-void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
+void FetchJit::ConvertFormat(SWR_FORMAT format, Value* texels[4])
  {
-    const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+    const SWR_FORMAT_INFO& info = GetFormatInfo(format);
  
      for (uint32_t c = 0; c < info.numComps; ++c)
      {
@@ -436,13 +469,14 @@ void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
          {
              if (info.type[c] == SWR_TYPE_SNORM)
              {
-                /// @todo The most-negative value maps to -1.0f. e.g. the 5-bit value 10000 maps to -1.0f.
+                /// @todo The most-negative value maps to -1.0f. e.g. the 5-bit value 10000 maps to
+                /// -1.0f.
  
                  /// result = c * (1.0f / (2^(n-1) - 1);
-                uint32_t n = info.bpc[c];
-                uint32_t pow2 = 1 << (n - 1);
-                float scale = 1.0f / (float)(pow2 - 1);
-                Value *vScale = VIMMED1(scale);
+                uint32_t n        = info.bpc[c];
+                uint32_t pow2     = 1 << (n - 1);
+                float    scale    = 1.0f / (float)(pow2 - 1);
+                Value*   vScale   = VIMMED1(scale);
                  texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
                  texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
                  texels[compIndex] = FMUL(texels[compIndex], vScale);
@@ -452,21 +486,22 @@ void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
                  SWR_ASSERT(info.type[c] == SWR_TYPE_UNORM);
  
                  /// result = c * (1.0f / (2^n - 1))
-                uint32_t n = info.bpc[c];
+                uint32_t n    = info.bpc[c];
                  uint32_t pow2 = 1 << n;
-                // special case 24bit unorm format, which requires a full divide to meet ULP requirement
+                // special case 24bit unorm format, which requires a full divide to meet ULP
+                // requirement
                  if (n == 24)
                  {
-                    float scale = (float)(pow2 - 1);
-                    Value* vScale = VIMMED1(scale);
+                    float  scale      = (float)(pow2 - 1);
+                    Value* vScale     = VIMMED1(scale);
                      texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
                      texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
                      texels[compIndex] = FDIV(texels[compIndex], vScale);
                  }
                  else
                  {
-                    float scale = 1.0f / (float)(pow2 - 1);
-                    Value *vScale = VIMMED1(scale);
+                    float  scale      = 1.0f / (float)(pow2 - 1);
+                    Value* vScale     = VIMMED1(scale);
                      texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
                      texels[compIndex] = UI_TO_FP(texels[compIndex], mSimdFP32Ty);
                      texels[compIndex] = FMUL(texels[compIndex], vScale);
@@ -483,17 +518,19 @@ void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
  /// @param streams - value pointer to the current vertex stream
  /// @param vIndices - vector value of indices to gather
  /// @param pVtxOut - value pointer to output simdvertex struct
-void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
-    Value* streams, Value* vIndices, Value* pVtxOut)
+void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE& fetchState,
+                                 Value*                     streams,
+                                 Value*                     vIndices,
+                                 Value*                     pVtxOut)
  {
      uint32_t currentVertexElement = 0;
-    uint32_t outputElt = 0;
-    Value* vVertexElements[4];
+    uint32_t outputElt            = 0;
+    Value*   vVertexElements[4];
  
-    Value* startVertex = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex});
+    Value* startVertex   = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartVertex});
      Value* startInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_StartInstance});
-    Value* curInstance = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance});
-    Value* vBaseVertex = VBROADCAST(LOAD(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_BaseVertex }));
+    Value* curInstance   = LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance});
+    Value* vBaseVertex   = VBROADCAST(LOAD(mpFetchInfo, {0, SWR_FETCH_CONTEXT_BaseVertex}));
      curInstance->setName("curInstance");
  
      for (uint32_t nInputElt = 0; nInputElt < fetchState.numAttribs; nInputElt += 1)
@@ -506,23 +543,25 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
              continue;
          }
  
-        const SWR_FORMAT_INFO &info = GetFormatInfo((SWR_FORMAT)ied.Format);
+        const SWR_FORMAT_INFO& info = GetFormatInfo((SWR_FORMAT)ied.Format);
          SWR_ASSERT((info.bpp != 0), "Unsupported format in JitGatherVertices.");
-        uint32_t bpc = info.bpp / info.numComps;  ///@todo Code below assumes all components are same size. Need to fix.
+        uint32_t bpc =
+            info.bpp /
+            info.numComps; ///@todo Code below assumes all components are same size. Need to fix.
  
-        Value *stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
+        Value* stream = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_xpData});
  
          // VGATHER* takes an *i8 src pointer
-        Value *pStreamBase = INT_TO_PTR(stream, PointerType::get(mInt8Ty, 0));
+        Value* pStreamBase = INT_TO_PTR(stream, PointerType::get(mInt8Ty, 0));
  
-        Value *stride = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
-        Value *vStride = VBROADCAST(stride);
+        Value* stride  = LOAD(streams, {ied.StreamIndex, SWR_VERTEX_BUFFER_STATE_pitch});
+        Value* vStride = VBROADCAST(stride);
  
          // max vertex index that is fully in bounds
-        Value *maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
-        maxVertex = LOAD(maxVertex);
+        Value* maxVertex = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_maxVertex)});
+        maxVertex        = LOAD(maxVertex);
  
-        Value *minVertex = NULL;
+        Value* minVertex = NULL;
          if (fetchState.bPartialVertexBuffer)
          {
              // min vertex index for low bounds OOB checking
@@ -536,9 +575,9 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
              curInstance = ADD(curInstance, startInstance);
          }
  
-        Value *vCurIndices;
-        Value *startOffset;
-        Value *vInstanceStride = VIMMED1(0);
+        Value* vCurIndices;
+        Value* startOffset;
+        Value* vInstanceStride = VIMMED1(0);
  
          if (ied.InstanceEnable)
          {
@@ -546,7 +585,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
  
              // prevent a div by 0 for 0 step rate
              Value* isNonZeroStep = ICMP_UGT(stepRate, C(0));
-            stepRate = SELECT(isNonZeroStep, stepRate, C(1));
+            stepRate             = SELECT(isNonZeroStep, stepRate, C(1));
  
              // calc the current offset into instanced data buffer
              Value* calcInstance = UDIV(curInstance, stepRate);
@@ -559,7 +598,8 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
          }
          else if (ied.InstanceStrideEnable)
          {
-            // grab the instance advancement state, determines stride in bytes from one instance to the next
+            // grab the instance advancement state, determines stride in bytes from one instance to
+            // the next
              Value* stepRate = C(ied.InstanceAdvancementState);
              vInstanceStride = VBROADCAST(MUL(curInstance, stepRate));
  
@@ -576,16 +616,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
              startOffset = startVertex;
          }
  
-        // All of the OOB calculations are in vertices, not VB offsets, to prevent having to 
+        // All of the OOB calculations are in vertices, not VB offsets, to prevent having to
          // do 64bit address offset calculations.
  
          // calculate byte offset to the start of the VB
-        Value* baseOffset = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
-        pStreamBase = GEP(pStreamBase, baseOffset);
+        Value* baseOffset     = MUL(Z_EXT(startOffset, mInt64Ty), Z_EXT(stride, mInt64Ty));
+        pStreamBase           = GEP(pStreamBase, baseOffset);
          Value* pStreamBaseGFX = ADD(stream, baseOffset);
  
          // if we have a start offset, subtract from max vertex. Used for OOB check
-        maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
+        maxVertex     = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
          Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0));
          // if we have a negative value, we're already OOB. clamp at 0.
          maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty));
@@ -593,38 +633,39 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
          if (fetchState.bPartialVertexBuffer)
          {
              // similary for min vertex
-            minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
-            Value *minNeg = ICMP_SLT(minVertex, C((int64_t)0));
-            minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
+            minVertex     = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, mInt64Ty));
+            Value* minNeg = ICMP_SLT(minVertex, C((int64_t)0));
+            minVertex     = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty));
          }
  
          // Load the in bounds size of a partially valid vertex
-        Value *partialInboundsSize = GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
-        partialInboundsSize = LOAD(partialInboundsSize);
-        Value *vPartialVertexSize = VBROADCAST(partialInboundsSize);
-        Value *vBpp = VBROADCAST(C(info.Bpp));
-        Value *vAlignmentOffsets = VBROADCAST(C(ied.AlignedByteOffset));
+        Value* partialInboundsSize =
+            GEP(streams, {C(ied.StreamIndex), C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)});
+        partialInboundsSize       = LOAD(partialInboundsSize);
+        Value* vPartialVertexSize = VBROADCAST(partialInboundsSize);
+        Value* vBpp               = VBROADCAST(C(info.Bpp));
+        Value* vAlignmentOffsets  = VBROADCAST(C(ied.AlignedByteOffset));
  
          // is the element is <= the partially valid size
-        Value *vElementInBoundsMask = ICMP_SLE(vBpp, SUB(vPartialVertexSize, vAlignmentOffsets));
+        Value* vElementInBoundsMask = ICMP_SLE(vBpp, SUB(vPartialVertexSize, vAlignmentOffsets));
  
          // override cur indices with 0 if pitch is 0
          Value* pZeroPitchMask = ICMP_EQ(vStride, VIMMED1(0));
-        vCurIndices = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices);
+        vCurIndices           = SELECT(pZeroPitchMask, VIMMED1(0), vCurIndices);
  
          // are vertices partially OOB?
-        Value* vMaxVertex = VBROADCAST(maxVertex);
+        Value* vMaxVertex      = VBROADCAST(maxVertex);
          Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex);
  
          // are vertices fully in bounds?
          Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex);
  
-        Value *vGatherMask;
+        Value* vGatherMask;
          if (fetchState.bPartialVertexBuffer)
          {
              // are vertices below minVertex limit?
-            Value *vMinVertex = VBROADCAST(minVertex);
-            Value *vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
+            Value* vMinVertex     = VBROADCAST(minVertex);
+            Value* vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex);
  
              // only fetch lanes that pass both tests
              vGatherMask = AND(vMaxGatherMask, vMinGatherMask);
@@ -639,23 +680,26 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
  
          // calculate the actual offsets into the VB
          Value* vOffsets = MUL(vCurIndices, vStride);
-        vOffsets = ADD(vOffsets, vAlignmentOffsets);
+        vOffsets        = ADD(vOffsets, vAlignmentOffsets);
  
          // if instance stride enable is:
          //  true  - add product of the instanceID and advancement state to the offst into the VB
          //  false - value of vInstanceStride has been initialialized to zero
          vOffsets = ADD(vOffsets, vInstanceStride);
  
-        // Packing and component control 
-        ComponentEnable compMask = (ComponentEnable)ied.ComponentPacking;
-        const ComponentControl compCtrl[4] { (ComponentControl)ied.ComponentControl0, (ComponentControl)ied.ComponentControl1, 
-                                             (ComponentControl)ied.ComponentControl2, (ComponentControl)ied.ComponentControl3}; 
+        // Packing and component control
+        ComponentEnable        compMask = (ComponentEnable)ied.ComponentPacking;
+        const ComponentControl compCtrl[4]{(ComponentControl)ied.ComponentControl0,
+                                           (ComponentControl)ied.ComponentControl1,
+                                           (ComponentControl)ied.ComponentControl2,
+                                           (ComponentControl)ied.ComponentControl3};
  
          // Special gather/conversion for formats without equal component sizes
          if (IsOddFormat((SWR_FORMAT)ied.Format))
          {
-            Value *pResults[4];
-            CreateGatherOddFormats((SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults);
+            Value* pResults[4];
+            CreateGatherOddFormats(
+                (SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults);
              ConvertFormat((SWR_FORMAT)ied.Format, pResults);
  
              for (uint32_t c = 0; c < 4; c += 1)
@@ -672,193 +716,214 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
                  }
              }
          }
-        else if(info.type[0] == SWR_TYPE_FLOAT)
+        else if (info.type[0] == SWR_TYPE_FLOAT)
          {
              ///@todo: support 64 bit vb accesses
-            Value *gatherSrc = VIMMED1(0.0f);
+            Value* gatherSrc = VIMMED1(0.0f);
  
-            SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format), 
-                "Unsupported format for standard gather fetch.");
+            SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
+                       "Unsupported format for standard gather fetch.");
  
              // Gather components from memory to store in a simdvertex structure
              switch (bpc)
              {
-                case 16:
-                {
-                    Value *vGatherResult[2];
+            case 16:
+            {
+                Value* vGatherResult[2];
  
-                    // if we have at least one component out of x or y to fetch
-                    if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
-                    {
-                        vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
-                        // e.g. result of first 8x32bit integer gather for 16bit components
-                        // 256i - 0    1    2    3    4    5    6    7
-                        //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
-                        //
-                    }
+                // if we have at least one component out of x or y to fetch
+                if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
+                {
+                    vGatherResult[0] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    // e.g. result of first 8x32bit integer gather for 16bit components
+                    // 256i - 0    1    2    3    4    5    6    7
+                    //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
+                    //
+                }
  
-                    // if we have at least one component out of z or w to fetch
-                    if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
-                    {
-                        // offset base to the next components(zw) in the vertex to gather
-                        pStreamBase = GEP(pStreamBase, C((char)4));
-
-                        vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
-                        // e.g. result of second 8x32bit integer gather for 16bit components
-                        // 256i - 0    1    2    3    4    5    6    7
-                        //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw 
-                        //
-                    }
+                // if we have at least one component out of z or w to fetch
+                if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
+                {
+                    // offset base to the next components(zw) in the vertex to gather
+                    pStreamBase = GEP(pStreamBase, C((char)4));
  
-                    // if we have at least one component to shuffle into place
-                    if (compMask)
-                    {
-                        Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, Instruction::CastOps::FPExt, CONVERT_NONE,
-                            currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
+                    vGatherResult[1] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    // e.g. result of second 8x32bit integer gather for 16bit components
+                    // 256i - 0    1    2    3    4    5    6    7
+                    //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
+                    //
+                }
  
-                        // Shuffle gathered components into place in simdvertex struct
-                        mVWidth == 16 ? Shuffle16bpcGather16(args) : Shuffle16bpcGather(args);  // outputs to vVertexElements ref
-                    }
+                // if we have at least one component to shuffle into place
+                if (compMask)
+                {
+                    Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult,
+                                                                  pVtxOut,
+                                                                  Instruction::CastOps::FPExt,
+                                                                  CONVERT_NONE,
+                                                                  currentVertexElement,
+                                                                  outputElt,
+                                                                  compMask,
+                                                                  compCtrl,
+                                                                  vVertexElements);
+
+                    // Shuffle gathered components into place in simdvertex struct
+                    mVWidth == 16 ? Shuffle16bpcGather16(args)
+                                  : Shuffle16bpcGather(args); // outputs to vVertexElements ref
                  }
-                    break;
-                case 32:
+            }
+            break;
+            case 32:
+            {
+                for (uint32_t i = 0; i < 4; i += 1)
                  {
-                    for (uint32_t i = 0; i < 4; i += 1)
+                    if (isComponentEnabled(compMask, i))
                      {
-                        if (isComponentEnabled(compMask, i))
+                        // if we need to gather the component
+                        if (compCtrl[i] == StoreSrc)
                          {
-                            // if we need to gather the component
-                            if (compCtrl[i] == StoreSrc)
-                            {
-                                // Gather a SIMD of vertices
-                                // APIs allow a 4GB range for offsets
-                                // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
-                                // But, we know that elements must be aligned for FETCH. :)
-                                // Right shift the offset by a bit and then scale by 2 to remove the sign extension.
-                                Value *vShiftedOffsets = LSHR(vOffsets, 1);
-                                vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBaseGFX, vShiftedOffsets, vGatherMask, 2, GFX_MEM_CLIENT_FETCH);
-                            }
-                            else
-                            {
-                                vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
-                            }
-
-                            if (currentVertexElement > 3)
-                            {
-                                StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
-                                // reset to the next vVertexElement to output
-                                currentVertexElement = 0;
-                            }
+                            // Gather a SIMD of vertices
+                            // APIs allow a 4GB range for offsets
+                            // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
+                            // But, we know that elements must be aligned for FETCH. :)
+                            // Right shift the offset by a bit and then scale by 2 to remove the
+                            // sign extension.
+                            Value* vShiftedOffsets = LSHR(vOffsets, 1);
+                            vVertexElements[currentVertexElement++] =
+                                GATHERPS(gatherSrc,
+                                         pStreamBaseGFX,
+                                         vShiftedOffsets,
+                                         vGatherMask,
+                                         2,
+                                         GFX_MEM_CLIENT_FETCH);
+                        }
+                        else
+                        {
+                            vVertexElements[currentVertexElement++] =
+                                GenerateCompCtrlVector(compCtrl[i]);
                          }
  
-                        // offset base to the next component in the vertex to gather
-                        pStreamBase = GEP(pStreamBase, C((char)4));
-                        pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
+                        if (currentVertexElement > 3)
+                        {
+                            StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
+                            // reset to the next vVertexElement to output
+                            currentVertexElement = 0;
+                        }
                      }
+
+                    // offset base to the next component in the vertex to gather
+                    pStreamBase    = GEP(pStreamBase, C((char)4));
+                    pStreamBaseGFX = ADD(pStreamBaseGFX, C((int64_t)4));
                  }
-                    break;
-                case 64:
+            }
+            break;
+            case 64:
+            {
+                for (uint32_t i = 0; i < 4; i += 1)
                  {
-                    for (uint32_t i = 0; i < 4; i += 1)
+                    if (isComponentEnabled(compMask, i))
                      {
-                        if (isComponentEnabled(compMask, i))
+                        // if we need to gather the component
+                        if (compCtrl[i] == StoreSrc)
                          {
-                            // if we need to gather the component
-                            if (compCtrl[i] == StoreSrc)
-                            {
-                                Value* vShufLo;
-                                Value* vShufHi;
-                                Value* vShufAll;
+                            Value* vShufLo;
+                            Value* vShufHi;
+                            Value* vShufAll;
  
-                                if (mVWidth == 8)
-                                {
-                                    vShufLo = C({ 0, 1, 2, 3 });
-                                    vShufHi = C({ 4, 5, 6, 7 });
-                                    vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
-                                }
-                                else
-                                {
-                                    SWR_ASSERT(mVWidth == 16);
-                                    vShufLo = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
-                                    vShufHi = C({ 8, 9, 10, 11, 12, 13, 14, 15 });
-                                    vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
-                                }
+                            if (mVWidth == 8)
+                            {
+                                vShufLo  = C({0, 1, 2, 3});
+                                vShufHi  = C({4, 5, 6, 7});
+                                vShufAll = C({0, 1, 2, 3, 4, 5, 6, 7});
+                            }
+                            else
+                            {
+                                SWR_ASSERT(mVWidth == 16);
+                                vShufLo = C({0, 1, 2, 3, 4, 5, 6, 7});
+                                vShufHi = C({8, 9, 10, 11, 12, 13, 14, 15});
+                                vShufAll =
+                                    C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
+                            }
  
-                                Value *vMaskLo = VSHUFFLE(vGatherMask, vGatherMask, vShufLo);
-                                Value *vMaskHi = VSHUFFLE(vGatherMask, vGatherMask, vShufHi);
+                            Value* vMaskLo = VSHUFFLE(vGatherMask, vGatherMask, vShufLo);
+                            Value* vMaskHi = VSHUFFLE(vGatherMask, vGatherMask, vShufHi);
  
-                                Value *vOffsetsLo = VSHUFFLE(vOffsets, vOffsets, vShufLo);
-                                Value *vOffsetsHi = VSHUFFLE(vOffsets, vOffsets, vShufHi);
+                            Value* vOffsetsLo = VSHUFFLE(vOffsets, vOffsets, vShufLo);
+                            Value* vOffsetsHi = VSHUFFLE(vOffsets, vOffsets, vShufHi);
  
-                                Value *vZeroDouble = VECTOR_SPLAT(mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
+                            Value* vZeroDouble = VECTOR_SPLAT(
+                                mVWidth / 2, ConstantFP::get(IRB()->getDoubleTy(), 0.0f));
  
-                                Value* pGatherLo = GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo);
-                                Value* pGatherHi = GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi);
+                            Value* pGatherLo =
+                                GATHERPD(vZeroDouble, pStreamBase, vOffsetsLo, vMaskLo);
+                            Value* pGatherHi =
+                                GATHERPD(vZeroDouble, pStreamBase, vOffsetsHi, vMaskHi);
  
-                                pGatherLo = VCVTPD2PS(pGatherLo);
-                                pGatherHi = VCVTPD2PS(pGatherHi);
+                            pGatherLo = VCVTPD2PS(pGatherLo);
+                            pGatherHi = VCVTPD2PS(pGatherHi);
  
-                                Value *pGather = VSHUFFLE(pGatherLo, pGatherHi, vShufAll);
+                            Value* pGather = VSHUFFLE(pGatherLo, pGatherHi, vShufAll);
  
-                                vVertexElements[currentVertexElement++] = pGather;
-                            }
-                            else
-                            {
-                                vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
-                            }
-
-                            if (currentVertexElement > 3)
-                            {
-                                StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
-                                // reset to the next vVertexElement to output
-                                currentVertexElement = 0;
-                            }
+                            vVertexElements[currentVertexElement++] = pGather;
+                        }
+                        else
+                        {
+                            vVertexElements[currentVertexElement++] =
+                                GenerateCompCtrlVector(compCtrl[i]);
                          }
  
-                        // offset base to the next component  in the vertex to gather
-                        pStreamBase = GEP(pStreamBase, C((char)8));
+                        if (currentVertexElement > 3)
+                        {
+                            StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
+                            // reset to the next vVertexElement to output
+                            currentVertexElement = 0;
+                        }
                      }
+
+                    // offset base to the next component  in the vertex to gather
+                    pStreamBase = GEP(pStreamBase, C((char)8));
                  }
-                    break;
-                default:
-                    SWR_INVALID("Tried to fetch invalid FP format");
-                    break;
+            }
+            break;
+            default:
+                SWR_INVALID("Tried to fetch invalid FP format");
+                break;
              }
          }
          else
          {
              Instruction::CastOps extendCastType = Instruction::CastOps::CastOpsEnd;
-            ConversionType conversionType = CONVERT_NONE;
+            ConversionType       conversionType = CONVERT_NONE;
  
-            SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format), 
-                "Unsupported format for standard gather fetch.");
+            SWR_ASSERT(IsUniformFormat((SWR_FORMAT)ied.Format),
+                       "Unsupported format for standard gather fetch.");
  
-            switch(info.type[0])
+            switch (info.type[0])
              {
-                case SWR_TYPE_UNORM: 
-                    conversionType = CONVERT_NORMALIZED;
-                case SWR_TYPE_UINT:
-                    extendCastType = Instruction::CastOps::ZExt;
-                    break;
-                case SWR_TYPE_SNORM:
-                    conversionType = CONVERT_NORMALIZED;
-                case SWR_TYPE_SINT:
-                    extendCastType = Instruction::CastOps::SExt;
-                    break;
-                case SWR_TYPE_USCALED:
-                    conversionType = CONVERT_USCALED;
-                    extendCastType = Instruction::CastOps::UIToFP;
-                    break;
-                case SWR_TYPE_SSCALED:
-                    conversionType = CONVERT_SSCALED;
-                    extendCastType = Instruction::CastOps::SIToFP;
-                    break;
-                case SWR_TYPE_SFIXED:
-                    conversionType = CONVERT_SFIXED;
-                    extendCastType = Instruction::CastOps::SExt;
-                    break;
-                default:
-                    break;
+            case SWR_TYPE_UNORM:
+                conversionType = CONVERT_NORMALIZED;
+            case SWR_TYPE_UINT:
+                extendCastType = Instruction::CastOps::ZExt;
+                break;
+            case SWR_TYPE_SNORM:
+                conversionType = CONVERT_NORMALIZED;
+            case SWR_TYPE_SINT:
+                extendCastType = Instruction::CastOps::SExt;
+                break;
+            case SWR_TYPE_USCALED:
+                conversionType = CONVERT_USCALED;
+                extendCastType = Instruction::CastOps::UIToFP;
+                break;
+            case SWR_TYPE_SSCALED:
+                conversionType = CONVERT_SSCALED;
+                extendCastType = Instruction::CastOps::SIToFP;
+                break;
+            case SWR_TYPE_SFIXED:
+                conversionType = CONVERT_SFIXED;
+                extendCastType = Instruction::CastOps::SExt;
+                break;
+            default:
+                break;
              }
  
              // value substituted when component of gather is masked
@@ -867,113 +932,132 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
              // Gather components from memory to store in a simdvertex structure
              switch (bpc)
              {
-                case 8:
+            case 8:
+            {
+                // if we have at least one component to fetch
+                if (compMask)
                  {
-                    // if we have at least one component to fetch
-                    if (compMask)
-                    {
-                        Value *vGatherResult = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
-                        // e.g. result of an 8x32bit integer gather for 8bit components
-                        // 256i - 0    1    2    3    4    5    6    7
-                        //        xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw 
-
-                        Shuffle8bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
-                            currentVertexElement, outputElt, compMask, compCtrl, vVertexElements, info.swizzle);
-
-                        // Shuffle gathered components into place in simdvertex struct
-                        mVWidth == 16 ? Shuffle8bpcGatherd16(args) : Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
-                    }
+                    Value* vGatherResult = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    // e.g. result of an 8x32bit integer gather for 8bit components
+                    // 256i - 0    1    2    3    4    5    6    7
+                    //        xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
+
+                    Shuffle8bpcArgs args = std::forward_as_tuple(vGatherResult,
+                                                                 pVtxOut,
+                                                                 extendCastType,
+                                                                 conversionType,
+                                                                 currentVertexElement,
+                                                                 outputElt,
+                                                                 compMask,
+                                                                 compCtrl,
+                                                                 vVertexElements,
+                                                                 info.swizzle);
+
+                    // Shuffle gathered components into place in simdvertex struct
+                    mVWidth == 16 ? Shuffle8bpcGatherd16(args)
+                                  : Shuffle8bpcGatherd(args); // outputs to vVertexElements ref
                  }
-                break;
-                case 16:
-                {
-                    Value *vGatherResult[2];
+            }
+            break;
+            case 16:
+            {
+                Value* vGatherResult[2];
  
-                    // if we have at least one component out of x or y to fetch
-                    if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
-                    {
-                        vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
-                        // e.g. result of first 8x32bit integer gather for 16bit components
-                        // 256i - 0    1    2    3    4    5    6    7
-                        //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
-                        //
-                    }
+                // if we have at least one component out of x or y to fetch
+                if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
+                {
+                    vGatherResult[0] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    // e.g. result of first 8x32bit integer gather for 16bit components
+                    // 256i - 0    1    2    3    4    5    6    7
+                    //        xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
+                    //
+                }
  
-                    // if we have at least one component out of z or w to fetch
-                    if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
-                    {
-                        // offset base to the next components(zw) in the vertex to gather
-                        pStreamBase = GEP(pStreamBase, C((char)4));
-
-                        vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
-                        // e.g. result of second 8x32bit integer gather for 16bit components
-                        // 256i - 0    1    2    3    4    5    6    7
-                        //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw 
-                        //
-                    }
+                // if we have at least one component out of z or w to fetch
+                if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
+                {
+                    // offset base to the next components(zw) in the vertex to gather
+                    pStreamBase = GEP(pStreamBase, C((char)4));
  
-                    // if we have at least one component to shuffle into place
-                    if (compMask)
-                    {
-                        Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult, pVtxOut, extendCastType, conversionType,
-                            currentVertexElement, outputElt, compMask, compCtrl, vVertexElements);
+                    vGatherResult[1] = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+                    // e.g. result of second 8x32bit integer gather for 16bit components
+                    // 256i - 0    1    2    3    4    5    6    7
+                    //        zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
+                    //
+                }
  
-                        // Shuffle gathered components into place in simdvertex struct
-                        mVWidth == 16 ? Shuffle16bpcGather16(args) : Shuffle16bpcGather(args); // outputs to vVertexElements ref
-                    }
+                // if we have at least one component to shuffle into place
+                if (compMask)
+                {
+                    Shuffle16bpcArgs args = std::forward_as_tuple(vGatherResult,
+                                                                  pVtxOut,
+                                                                  extendCastType,
+                                                                  conversionType,
+                                                                  currentVertexElement,
+                                                                  outputElt,
+                                                                  compMask,
+                                                                  compCtrl,
+                                                                  vVertexElements);
+
+                    // Shuffle gathered components into place in simdvertex struct
+                    mVWidth == 16 ? Shuffle16bpcGather16(args)
+                                  : Shuffle16bpcGather(args); // outputs to vVertexElements ref
                  }
-                break;
-                case 32:
+            }
+            break;
+            case 32:
+            {
+                // Gathered components into place in simdvertex struct
+                for (uint32_t i = 0; i < 4; i++)
                  {
-                    // Gathered components into place in simdvertex struct
-                    for (uint32_t i = 0; i < 4; i++)
+                    if (isComponentEnabled(compMask, i))
                      {
-                        if (isComponentEnabled(compMask, i))
+                        // if we need to gather the component
+                        if (compCtrl[i] == StoreSrc)
                          {
-                            // if we need to gather the component
-                            if (compCtrl[i] == StoreSrc)
+                            Value* pGather =
+                                GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
+
+                            if (conversionType == CONVERT_USCALED)
                              {
-                                Value* pGather = GATHERDD(gatherSrc, pStreamBase, vOffsets, vGatherMask);
-
-                                if (conversionType == CONVERT_USCALED)
-                                {
-                                    pGather = UI_TO_FP(pGather, mSimdFP32Ty);
-                                }
-                                else if (conversionType == CONVERT_SSCALED)
-                                {
-                                    pGather = SI_TO_FP(pGather, mSimdFP32Ty);
-                                }
-                                else if (conversionType == CONVERT_SFIXED)
-                                {
-                                    pGather = FMUL(SI_TO_FP(pGather, mSimdFP32Ty), VBROADCAST(C(1/65536.0f)));
-                                }
-
-                                vVertexElements[currentVertexElement++] = pGather;
-
-                                // e.g. result of a single 8x32bit integer gather for 32bit components
-                                // 256i - 0    1    2    3    4    5    6    7
-                                //        xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx 
+                                pGather = UI_TO_FP(pGather, mSimdFP32Ty);
                              }
-                            else
+                            else if (conversionType == CONVERT_SSCALED)
                              {
-                                vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+                                pGather = SI_TO_FP(pGather, mSimdFP32Ty);
                              }
-
-                            if (currentVertexElement > 3)
+                            else if (conversionType == CONVERT_SFIXED)
                              {
-                                StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
-
-                                // reset to the next vVertexElement to output
-                                currentVertexElement = 0;
+                                pGather = FMUL(SI_TO_FP(pGather, mSimdFP32Ty),
+                                               VBROADCAST(C(1 / 65536.0f)));
                              }
  
+                            vVertexElements[currentVertexElement++] = pGather;
+
+                            // e.g. result of a single 8x32bit integer gather for 32bit components
+                            // 256i - 0    1    2    3    4    5    6    7
+                            //        xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
                          }
+                        else
+                        {
+                            vVertexElements[currentVertexElement++] =
+                                GenerateCompCtrlVector(compCtrl[i]);
+                        }
+
+                        if (currentVertexElement > 3)
+                        {
+                            StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
  
-                        // offset base to the next component  in the vertex to gather
-                        pStreamBase = GEP(pStreamBase, C((char)4));
+                            // reset to the next vVertexElement to output
+                            currentVertexElement = 0;
+                        }
                      }
+
+                    // offset base to the next component  in the vertex to gather
+                    pStreamBase = GEP(pStreamBase, C((char)4));
                  }
-                break;
+            }
+            break;
              }
          }
      }
@@ -985,13 +1069,16 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
      }
  }
  
-template<typename T> Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
+template <typename T>
+Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices, Value* pLastIndex)
  {
-    SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty, "Function expects gfxptr_t for both input parameters.");
+    SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == mInt64Ty,
+               "Function expects gfxptr_t for both input parameters.");
  
      Type* Ty = nullptr;
  
-    static_assert(sizeof(T) == sizeof(uint16_t) || sizeof(T) == sizeof(uint8_t), "Unsupported type for use with GetSimdValidIndicesHelper<T>");
+    static_assert(sizeof(T) == sizeof(uint16_t) || sizeof(T) == sizeof(uint8_t),
+                  "Unsupported type for use with GetSimdValidIndicesHelper<T>");
      constexpr bool bSize = (sizeof(T) == sizeof(uint16_t));
      if (bSize)
      {
@@ -1017,19 +1104,19 @@ template<typename T> Value* FetchJit::GetSimdValidIndicesHelper(Value* pIndices,
          for (int64_t lane = 0; lane < mVWidth; lane++)
          {
              // Calculate the address of the requested index
-            Value *pIndex = GEP(pIndices, C(lane), Ty);
+            Value* pIndex = GEP(pIndices, C(lane), Ty);
  
              pLastIndex = INT_TO_PTR(pLastIndex, Ty);
  
-            // check if the address is less than the max index, 
+            // check if the address is less than the max index,
              Value* mask = ICMP_ULT(pIndex, pLastIndex);
  
              // if valid, load the index. if not, load 0 from the stack
              Value* pValid = SELECT(mask, pIndex, pZeroIndex);
-            Value *index = LOAD(pValid, "valid index", Ty, GFX_MEM_CLIENT_FETCH);
+            Value* index  = LOAD(pValid, "valid index", Ty, GFX_MEM_CLIENT_FETCH);
  
              // zero extended index to 32 bits and insert into the correct simd lane
-            index = Z_EXT(index, mInt32Ty);
+            index    = Z_EXT(index, mInt32Ty);
              vIndices = VINSERT(vIndices, index, lane);
          }
      }
@@ -1066,23 +1153,23 @@ Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
  Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
  {
      DataLayout dL(JM()->mpCurrentModule);
-    Value* iLastIndex = pLastIndex; 
-    Value* iIndices = pIndices;
+    Value*     iLastIndex = pLastIndex;
+    Value*     iIndices   = pIndices;
  
      // get the number of indices left in the buffer (endPtr - curPtr) / sizeof(index)
-    Value* numIndicesLeft = SUB(iLastIndex,iIndices);
-    numIndicesLeft = TRUNC(numIndicesLeft, mInt32Ty);
-    numIndicesLeft = SDIV(numIndicesLeft, C(4));
+    Value* numIndicesLeft = SUB(iLastIndex, iIndices);
+    numIndicesLeft        = TRUNC(numIndicesLeft, mInt32Ty);
+    numIndicesLeft        = SDIV(numIndicesLeft, C(4));
  
      // create a vector of index counts from the base index ptr passed into the fetch
      Constant* vIndexOffsets;
      if (mVWidth == 8)
      {
-        vIndexOffsets = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
+        vIndexOffsets = C({0, 1, 2, 3, 4, 5, 6, 7});
      }
      else
      {
-        vIndexOffsets = C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
+        vIndexOffsets = C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
      }
  
      // compare index count to the max valid index
@@ -1091,16 +1178,22 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
      //     ------------------------------
      //     vIndexMask    -1-1-1-1 0 0 0 0 : offsets < max pass
      //     vLoadedIndices 0 1 2 3 0 0 0 0 : offsets >= max masked to 0
-    Value* vMaxIndex = VBROADCAST(numIndicesLeft);
+    Value* vMaxIndex  = VBROADCAST(numIndicesLeft);
      Value* vIndexMask = ICMP_SGT(vMaxIndex, vIndexOffsets);
  
      // Load the indices; OOB loads 0
-    return MASKED_LOAD(pIndices, 4, vIndexMask, VIMMED1(0), "vIndices", PointerType::get(mSimdInt32Ty, 0), GFX_MEM_CLIENT_FETCH);
+    return MASKED_LOAD(pIndices,
+                       4,
+                       vIndexMask,
+                       VIMMED1(0),
+                       "vIndices",
+                       PointerType::get(mSimdInt32Ty, 0),
+                       GFX_MEM_CLIENT_FETCH);
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Takes a SIMD of gathered 8bpc verts, zero or sign extends, 
-/// denormalizes if needed, converts to F32 if needed, and positions in 
+/// @brief Takes a SIMD of gathered 8bpc verts, zero or sign extends,
+/// denormalizes if needed, converts to F32 if needed, and positions in
  //  the proper SIMD rows to be output to the simdvertex structure
  /// @param args: (tuple of args, listed below)
  ///   @param vGatherResult - 8 gathered 8bpc vertices
@@ -1113,60 +1206,67 @@ Value* FetchJit::GetSimdValid32bitIndices(Value* pIndices, Value* pLastIndex)
  ///   @param compCtrl - component control val
  ///   @param vVertexElements[4] - vertex components to output
  ///   @param swizzle[4] - component swizzle location
-void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
+void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs& args)
  {
      // Unpack tuple args
-    Value*& vGatherResult = std::get<0>(args);
-    Value* pVtxOut = std::get<1>(args);
-    const Instruction::CastOps extendType = std::get<2>(args);
-    const ConversionType conversionType = std::get<3>(args);
-    uint32_t &currentVertexElement = std::get<4>(args);
-    uint32_t &outputElt = std::get<5>(args);
-    const ComponentEnable compMask = std::get<6>(args);
-    const ComponentControl(&compCtrl)[4] = std::get<7>(args);
-    Value* (&vVertexElements)[4] = std::get<8>(args);
-    const uint32_t(&swizzle)[4] = std::get<9>(args);
+    Value*&                    vGatherResult        = std::get<0>(args);
+    Value*                     pVtxOut              = std::get<1>(args);
+    const Instruction::CastOps extendType           = std::get<2>(args);
+    const ConversionType       conversionType       = std::get<3>(args);
+    uint32_t&                  currentVertexElement = std::get<4>(args);
+    uint32_t&                  outputElt            = std::get<5>(args);
+    const ComponentEnable      compMask             = std::get<6>(args);
+    const ComponentControl(&compCtrl)[4]            = std::get<7>(args);
+    Value*(&vVertexElements)[4]                     = std::get<8>(args);
+    const uint32_t(&swizzle)[4]                     = std::get<9>(args);
  
      // cast types
-    Type *vGatherTy = VectorType::get(mInt32Ty, 8);
-    Type *v32x8Ty = VectorType::get(mInt8Ty, 32);
+    Type* vGatherTy = VectorType::get(mInt32Ty, 8);
+    Type* v32x8Ty   = VectorType::get(mInt8Ty, 32);
  
      // have to do extra work for sign extending
      if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP))
      {
-        Type *v16x8Ty = VectorType::get(mInt8Ty, 16); // 8x16bit ints in a 128bit lane
-        Type *v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
+        Type* v16x8Ty = VectorType::get(mInt8Ty, 16); // 8x16bit ints in a 128bit lane
+        Type* v128Ty  = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
  
          // shuffle mask, including any swizzling
-        const char x = (char)swizzle[0]; const char y = (char)swizzle[1];
-        const char z = (char)swizzle[2]; const char w = (char)swizzle[3];
-        Value *vConstMask = C<char>({ char(x), char(x + 4), char(x + 8), char(x + 12),
-            char(y), char(y + 4), char(y + 8), char(y + 12),
-            char(z), char(z + 4), char(z + 8), char(z + 12),
-            char(w), char(w + 4), char(w + 8), char(w + 12),
-            char(x), char(x + 4), char(x + 8), char(x + 12),
-            char(y), char(y + 4), char(y + 8), char(y + 12),
-            char(z), char(z + 4), char(z + 8), char(z + 12),
-            char(w), char(w + 4), char(w + 8), char(w + 12) });
+        const char x          = (char)swizzle[0];
+        const char y          = (char)swizzle[1];
+        const char z          = (char)swizzle[2];
+        const char w          = (char)swizzle[3];
+        Value*     vConstMask = C<char>(
+            {char(x),     char(x + 4),  char(x + 8), char(x + 12), char(y),     char(y + 4),
+             char(y + 8), char(y + 12), char(z),     char(z + 4),  char(z + 8), char(z + 12),
+             char(w),     char(w + 4),  char(w + 8), char(w + 12), char(x),     char(x + 4),
+             char(x + 8), char(x + 12), char(y),     char(y + 4),  char(y + 8), char(y + 12),
+             char(z),     char(z + 4),  char(z + 8), char(z + 12), char(w),     char(w + 4),
+             char(w + 8), char(w + 12)});
  
          // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
  
-        Value *vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
-        Value *vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
+        Value* vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
+        Value* vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
  
-        Value *vShufResult_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
-        Value *vShufResult_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
+        Value* vShufResult_lo =
+            BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
+        Value* vShufResult_hi =
+            BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
  
          // after pshufb: group components together in each 128bit lane
          // 256i - 0    1    2    3    4    5    6    7
          //        xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
  
-        Value *vi128XY_lo = nullptr;
-        Value *vi128XY_hi = nullptr;
+        Value* vi128XY_lo = nullptr;
+        Value* vi128XY_hi = nullptr;
          if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
          {
-            vi128XY_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
-            vi128XY_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
+            vi128XY_lo = BITCAST(
+                VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})),
+                v128Ty);
+            vi128XY_hi = BITCAST(
+                VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})),
+                v128Ty);
  
              // after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
@@ -1174,26 +1274,30 @@ void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
          }
  
          // do the same for zw components
-        Value *vi128ZW_lo = nullptr;
-        Value *vi128ZW_hi = nullptr;
+        Value* vi128ZW_lo = nullptr;
+        Value* vi128ZW_hi = nullptr;
          if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
          {
-            vi128ZW_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
-            vi128ZW_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
+            vi128ZW_lo = BITCAST(
+                VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})),
+                v128Ty);
+            vi128ZW_hi = BITCAST(
+                VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})),
+                v128Ty);
          }
  
          // init denormalize variables if needed
          Instruction::CastOps fpCast;
-        Value *conversionFactor;
+        Value*               conversionFactor;
  
          switch (conversionType)
          {
          case CONVERT_NORMALIZED:
-            fpCast = Instruction::CastOps::SIToFP;
+            fpCast           = Instruction::CastOps::SIToFP;
              conversionFactor = VIMMED1((float)(1.0 / 127.0));
              break;
          case CONVERT_SSCALED:
-            fpCast = Instruction::CastOps::SIToFP;
+            fpCast           = Instruction::CastOps::SIToFP;
              conversionFactor = VIMMED1((float)(1.0));
              break;
          case CONVERT_USCALED:
@@ -1206,7 +1310,8 @@ void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
              break;
          }
  
-        // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+        // sign extend all enabled components. If we have a fill vVertexElements, output to current
+        // simdvertex
          for (uint32_t i = 0; i < 4; i++)
          {
              if (isComponentEnabled(compMask, i))
@@ -1216,12 +1321,14 @@ void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
                      // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
                      uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
                      // if x or y, use vi128XY permute result, else use vi128ZW
-                    Value *selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
-                    Value *selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
+                    Value* selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
+                    Value* selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
  
                      // sign extend
-                    Value *temp_lo = PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v16x8Ty));
-                    Value *temp_hi = PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v16x8Ty));
+                    Value* temp_lo =
+                        PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v16x8Ty));
+                    Value* temp_hi =
+                        PMOVSXBD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v16x8Ty));
  
                      Value* temp = JOIN_16(temp_lo, temp_hi);
  
@@ -1250,20 +1357,21 @@ void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
          }
      }
      // else zero extend
-    else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
+    else if ((extendType == Instruction::CastOps::ZExt) ||
+             (extendType == Instruction::CastOps::UIToFP))
      {
          // init denormalize variables if needed
          Instruction::CastOps fpCast;
-        Value *conversionFactor;
+        Value*               conversionFactor;
  
          switch (conversionType)
          {
          case CONVERT_NORMALIZED:
-            fpCast = Instruction::CastOps::UIToFP;
+            fpCast           = Instruction::CastOps::UIToFP;
              conversionFactor = VIMMED1((float)(1.0 / 255.0));
              break;
          case CONVERT_USCALED:
-            fpCast = Instruction::CastOps::UIToFP;
+            fpCast           = Instruction::CastOps::UIToFP;
              conversionFactor = VIMMED1((float)(1.0));
              break;
          case CONVERT_SSCALED:
@@ -1284,43 +1392,49 @@ void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
                  if (compCtrl[i] == ComponentControl::StoreSrc)
                  {
                      // pshufb masks for each component
-                    Value *vConstMask;
+                    Value* vConstMask;
                      switch (swizzle[i])
                      {
                      case 0:
                          // x shuffle mask
-                        vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
-                            0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1 });
+                        vConstMask =
+                            C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
+                                     0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
                          break;
                      case 1:
                          // y shuffle mask
-                        vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
-                            1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1 });
+                        vConstMask =
+                            C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
+                                     1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
                          break;
                      case 2:
                          // z shuffle mask
-                        vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
-                            2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1 });
+                        vConstMask =
+                            C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
+                                     2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
                          break;
                      case 3:
                          // w shuffle mask
-                        vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
-                            3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1 });
+                        vConstMask =
+                            C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
+                                     3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
                          break;
                      default:
                          vConstMask = nullptr;
                          break;
                      }
  
-                    Value *vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
-                    Value *vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
+                    Value* vGatherResult_lo = EXTRACT_16(vGatherResult, 0);
+                    Value* vGatherResult_hi = EXTRACT_16(vGatherResult, 1);
  
-                    Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
-                    Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
+                    Value* temp_lo =
+                        BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask), vGatherTy);
+                    Value* temp_hi =
+                        BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask), vGatherTy);
  
                      // after pshufb for x channel
                      // 256i - 0    1    2    3    4    5    6    7
-                    //        x000 x000 x000 x000 x000 x000 x000 x000 
+                    //        x000 x000 x000 x000 x000 x000 x000 x000
  
                      Value* temp = JOIN_16(temp_lo, temp_hi);
  
@@ -1354,19 +1468,19 @@ void FetchJit::Shuffle8bpcGatherd16(Shuffle8bpcArgs &args)
      }
  }
  
-void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
+void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs& args)
  {
      // Unpack tuple args
-    Value*& vGatherResult = std::get<0>(args);
-    Value* pVtxOut = std::get<1>(args);
-    const Instruction::CastOps extendType = std::get<2>(args);
-    const ConversionType conversionType = std::get<3>(args);
-    uint32_t &currentVertexElement = std::get<4>(args);
-    uint32_t &outputElt = std::get<5>(args);
-    const ComponentEnable compMask = std::get<6>(args);
-    const ComponentControl(&compCtrl)[4] = std::get<7>(args);
-    Value* (&vVertexElements)[4] = std::get<8>(args);
-    const uint32_t(&swizzle)[4] = std::get<9>(args);
+    Value*&                    vGatherResult        = std::get<0>(args);
+    Value*                     pVtxOut              = std::get<1>(args);
+    const Instruction::CastOps extendType           = std::get<2>(args);
+    const ConversionType       conversionType       = std::get<3>(args);
+    uint32_t&                  currentVertexElement = std::get<4>(args);
+    uint32_t&                  outputElt            = std::get<5>(args);
+    const ComponentEnable      compMask             = std::get<6>(args);
+    const ComponentControl(&compCtrl)[4]            = std::get<7>(args);
+    Value*(&vVertexElements)[4]                     = std::get<8>(args);
+    const uint32_t(&swizzle)[4]                     = std::get<9>(args);
  
      // cast types
      Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
@@ -1379,18 +1493,19 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
          if (compCtrl[i] == ComponentControl::StoreSrc)
          {
              std::vector<uint32_t> vShuffleMasks[4] = {
-                { 0, 4,  8, 12, 16, 20, 24, 28 }, // x
-                { 1, 5,  9, 13, 17, 21, 25, 29 }, // y
-                { 2, 6, 10, 14, 18, 22, 26, 30 }, // z
-                { 3, 7, 11, 15, 19, 23, 27, 31 }, // w
+                {0, 4, 8, 12, 16, 20, 24, 28},  // x
+                {1, 5, 9, 13, 17, 21, 25, 29},  // y
+                {2, 6, 10, 14, 18, 22, 26, 30}, // z
+                {3, 7, 11, 15, 19, 23, 27, 31}, // w
              };
  
-            Value *val = VSHUFFLE(BITCAST(vGatherResult, v32x8Ty),
-                UndefValue::get(v32x8Ty),
-                vShuffleMasks[swizzle[i]]);
+            Value* val = VSHUFFLE(BITCAST(vGatherResult, v32x8Ty),
+                                  UndefValue::get(v32x8Ty),
+                                  vShuffleMasks[swizzle[i]]);
  
              if ((extendType == Instruction::CastOps::SExt) ||
-                (extendType == Instruction::CastOps::SIToFP)) {
+                (extendType == Instruction::CastOps::SIToFP))
+            {
                  switch (conversionType)
                  {
                  case CONVERT_NORMALIZED:
@@ -1409,7 +1524,8 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
                  }
              }
              else if ((extendType == Instruction::CastOps::ZExt) ||
-                (extendType == Instruction::CastOps::UIToFP)) {
+                     (extendType == Instruction::CastOps::UIToFP))
+            {
                  switch (conversionType)
                  {
                  case CONVERT_NORMALIZED:
@@ -1449,8 +1565,8 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Takes a SIMD of gathered 16bpc verts, zero or sign extends, 
-/// denormalizes if needed, converts to F32 if needed, and positions in 
+/// @brief Takes a SIMD of gathered 16bpc verts, zero or sign extends,
+/// denormalizes if needed, converts to F32 if needed, and positions in
  //  the proper SIMD rows to be output to the simdvertex structure
  /// @param args: (tuple of args, listed below)
  ///   @param vGatherResult[2] - array of gathered 16bpc vertices, 4 per index
@@ -1462,53 +1578,59 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
  ///   @param compMask - component packing mask
  ///   @param compCtrl - component control val
  ///   @param vVertexElements[4] - vertex components to output
-void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
+void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs& args)
  {
      // Unpack tuple args
-    Value* (&vGatherResult)[2] = std::get<0>(args);
-    Value* pVtxOut = std::get<1>(args);
-    const Instruction::CastOps extendType = std::get<2>(args);
-    const ConversionType conversionType = std::get<3>(args);
-    uint32_t &currentVertexElement = std::get<4>(args);
-    uint32_t &outputElt = std::get<5>(args);
-    const ComponentEnable compMask = std::get<6>(args);
-    const ComponentControl(&compCtrl)[4] = std::get<7>(args);
-    Value* (&vVertexElements)[4] = std::get<8>(args);
+    Value*(&vGatherResult)[2]                       = std::get<0>(args);
+    Value*                     pVtxOut              = std::get<1>(args);
+    const Instruction::CastOps extendType           = std::get<2>(args);
+    const ConversionType       conversionType       = std::get<3>(args);
+    uint32_t&                  currentVertexElement = std::get<4>(args);
+    uint32_t&                  outputElt            = std::get<5>(args);
+    const ComponentEnable      compMask             = std::get<6>(args);
+    const ComponentControl(&compCtrl)[4]            = std::get<7>(args);
+    Value*(&vVertexElements)[4]                     = std::get<8>(args);
  
      // cast types
-    Type *vGatherTy = VectorType::get(mInt32Ty, 8);
-    Type *v32x8Ty = VectorType::get(mInt8Ty, 32);
+    Type* vGatherTy = VectorType::get(mInt32Ty, 8);
+    Type* v32x8Ty   = VectorType::get(mInt8Ty, 32);
  
      // have to do extra work for sign extending
-    if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
+    if ((extendType == Instruction::CastOps::SExt) ||
+        (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
      {
          // is this PP float?
          bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
  
-        Type *v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
-        Type *v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
+        Type* v8x16Ty   = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
+        Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), 2);
  
          // shuffle mask
-        Value *vConstMask = C<uint8_t>({ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
-                                          0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 });
-        Value *vi128XY_lo = nullptr;
-        Value *vi128XY_hi = nullptr;
+        Value* vConstMask = C<uint8_t>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
+                                        0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
+        Value* vi128XY_lo = nullptr;
+        Value* vi128XY_hi = nullptr;
          if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
          {
-            // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
+            // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for
+            // now..
  
-            Value *vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[0], 0), v32x8Ty);
-            Value *vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[0], 1), v32x8Ty);
+            Value* vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[0], 0), v32x8Ty);
+            Value* vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[0], 1), v32x8Ty);
  
-            Value *vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
-            Value *vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
+            Value* vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
+            Value* vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
  
              // after pshufb: group components together in each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
              //        xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
  
-            vi128XY_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
-            vi128XY_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+            vi128XY_lo = BITCAST(
+                VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+                v128bitTy);
+            vi128XY_hi = BITCAST(
+                VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+                v128bitTy);
  
              // after PERMD: move and pack xy components into each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
@@ -1516,32 +1638,36 @@ void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
          }
  
          // do the same for zw components
-        Value *vi128ZW_lo = nullptr;
-        Value *vi128ZW_hi = nullptr;
+        Value* vi128ZW_lo = nullptr;
+        Value* vi128ZW_hi = nullptr;
          if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
          {
-            Value *vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[1], 0), v32x8Ty);
-            Value *vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[1], 1), v32x8Ty);
-
-            Value *vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
-            Value *vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
-
-            vi128ZW_lo = BITCAST(VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
-            vi128ZW_hi = BITCAST(VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+            Value* vGatherResult_lo = BITCAST(EXTRACT_16(vGatherResult[1], 0), v32x8Ty);
+            Value* vGatherResult_hi = BITCAST(EXTRACT_16(vGatherResult[1], 1), v32x8Ty);
+
+            Value* vShufResult_lo = BITCAST(PSHUFB(vGatherResult_lo, vConstMask), vGatherTy);
+            Value* vShufResult_hi = BITCAST(PSHUFB(vGatherResult_hi, vConstMask), vGatherTy);
+
+            vi128ZW_lo = BITCAST(
+                VSHUFFLE(vShufResult_lo, vShufResult_lo, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+                v128bitTy);
+            vi128ZW_hi = BITCAST(
+                VSHUFFLE(vShufResult_hi, vShufResult_hi, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})),
+                v128bitTy);
          }
  
          // init denormalize variables if needed
          Instruction::CastOps IntToFpCast;
-        Value *conversionFactor;
+        Value*               conversionFactor;
  
          switch (conversionType)
          {
          case CONVERT_NORMALIZED:
-            IntToFpCast = Instruction::CastOps::SIToFP;
+            IntToFpCast      = Instruction::CastOps::SIToFP;
              conversionFactor = VIMMED1((float)(1.0 / 32767.0));
              break;
          case CONVERT_SSCALED:
-            IntToFpCast = Instruction::CastOps::SIToFP;
+            IntToFpCast      = Instruction::CastOps::SIToFP;
              conversionFactor = VIMMED1((float)(1.0));
              break;
          case CONVERT_USCALED:
@@ -1554,7 +1680,8 @@ void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
              break;
          }
  
-        // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+        // sign extend all enabled components. If we have a fill vVertexElements, output to current
+        // simdvertex
          for (uint32_t i = 0; i < 4; i++)
          {
              if (isComponentEnabled(compMask, i))
@@ -1564,22 +1691,26 @@ void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
                      // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
                      uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
                      // if x or y, use vi128XY permute result, else use vi128ZW
-                    Value *selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
-                    Value *selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
+                    Value* selectedPermute_lo = (i < 2) ? vi128XY_lo : vi128ZW_lo;
+                    Value* selectedPermute_hi = (i < 2) ? vi128XY_hi : vi128ZW_hi;
  
                      if (bFP)
                      {
                          // extract 128 bit lanes to sign extend each component
-                        Value *temp_lo = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
-                        Value *temp_hi = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
+                        Value* temp_lo =
+                            CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
+                        Value* temp_hi =
+                            CVTPH2PS(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
  
                          vVertexElements[currentVertexElement] = JOIN_16(temp_lo, temp_hi);
                      }
                      else
                      {
                          // extract 128 bit lanes to sign extend each component
-                        Value *temp_lo = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
-                        Value *temp_hi = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
+                        Value* temp_lo =
+                            PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_lo, C(lane)), v8x16Ty));
+                        Value* temp_hi =
+                            PMOVSXWD(BITCAST(VEXTRACT(selectedPermute_hi, C(lane)), v8x16Ty));
  
                          Value* temp = JOIN_16(temp_lo, temp_hi);
  
@@ -1609,37 +1740,40 @@ void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
          }
      }
      // else zero extend
-    else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
+    else if ((extendType == Instruction::CastOps::ZExt) ||
+             (extendType == Instruction::CastOps::UIToFP))
      {
          // pshufb masks for each component
-        Value *vConstMask[2];
+        Value* vConstMask[2];
  
          if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2))
          {
              // x/z shuffle mask
-            vConstMask[0] = C<char>({ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
-                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
+            vConstMask[0] = C<char>({
+                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+            });
          }
  
          if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3))
          {
              // y/w shuffle mask
-            vConstMask[1] = C<char>({ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
-                2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1 });
+            vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
+                                     2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
          }
  
          // init denormalize variables if needed
          Instruction::CastOps fpCast;
-        Value* conversionFactor;
+        Value*               conversionFactor;
  
          switch (conversionType)
          {
          case CONVERT_NORMALIZED:
-            fpCast = Instruction::CastOps::UIToFP;
+            fpCast           = Instruction::CastOps::UIToFP;
              conversionFactor = VIMMED1((float)(1.0 / 65535.0));
              break;
          case CONVERT_USCALED:
-            fpCast = Instruction::CastOps::UIToFP;
+            fpCast           = Instruction::CastOps::UIToFP;
              conversionFactor = VIMMED1((float)(1.0f));
              break;
          case CONVERT_SSCALED:
@@ -1664,17 +1798,22 @@ void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
                      // if x or y, use vi128XY permute result, else use vi128ZW
                      uint32_t selectedGather = (i < 2) ? 0 : 1;
  
-                    // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL, for now..
+                    // SIMD16 PSHUFB isnt part of AVX-512F, so split into SIMD8 for the sake of KNL,
+                    // for now..
  
-                    Value *vGatherResult_lo = EXTRACT_16(vGatherResult[selectedGather], 0);
-                    Value *vGatherResult_hi = EXTRACT_16(vGatherResult[selectedGather], 1);
+                    Value* vGatherResult_lo = EXTRACT_16(vGatherResult[selectedGather], 0);
+                    Value* vGatherResult_hi = EXTRACT_16(vGatherResult[selectedGather], 1);
  
-                    Value *temp_lo = BITCAST(PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]), vGatherTy);
-                    Value *temp_hi = BITCAST(PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]), vGatherTy);
+                    Value* temp_lo = BITCAST(
+                        PSHUFB(BITCAST(vGatherResult_lo, v32x8Ty), vConstMask[selectedMask]),
+                        vGatherTy);
+                    Value* temp_hi = BITCAST(
+                        PSHUFB(BITCAST(vGatherResult_hi, v32x8Ty), vConstMask[selectedMask]),
+                        vGatherTy);
  
-                    // after pshufb mask for x channel; z uses the same shuffle from the second gather
-                    // 256i - 0    1    2    3    4    5    6    7
-                    //        xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00 
+                    // after pshufb mask for x channel; z uses the same shuffle from the second
+                    // gather 256i - 0    1    2    3    4    5    6    7
+                    //        xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
  
                      Value* temp = JOIN_16(temp_lo, temp_hi);
  
@@ -1708,44 +1847,47 @@ void FetchJit::Shuffle16bpcGather16(Shuffle16bpcArgs &args)
      }
  }
  
-void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
+void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs& args)
  {
      // Unpack tuple args
-    Value* (&vGatherResult)[2] = std::get<0>(args);
-    Value* pVtxOut = std::get<1>(args);
-    const Instruction::CastOps extendType = std::get<2>(args);
-    const ConversionType conversionType = std::get<3>(args);
-    uint32_t &currentVertexElement = std::get<4>(args);
-    uint32_t &outputElt = std::get<5>(args);
-    const ComponentEnable compMask = std::get<6>(args);
-    const ComponentControl(&compCtrl)[4] = std::get<7>(args);
-    Value* (&vVertexElements)[4] = std::get<8>(args);
+    Value*(&vGatherResult)[2]                       = std::get<0>(args);
+    Value*                     pVtxOut              = std::get<1>(args);
+    const Instruction::CastOps extendType           = std::get<2>(args);
+    const ConversionType       conversionType       = std::get<3>(args);
+    uint32_t&                  currentVertexElement = std::get<4>(args);
+    uint32_t&                  outputElt            = std::get<5>(args);
+    const ComponentEnable      compMask             = std::get<6>(args);
+    const ComponentControl(&compCtrl)[4]            = std::get<7>(args);
+    Value*(&vVertexElements)[4]                     = std::get<8>(args);
  
      // cast types
      Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
-    Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
+    Type* v32x8Ty   = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
  
-                                                           // have to do extra work for sign extending
-    if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP) ||
-        (extendType == Instruction::CastOps::FPExt))
+    // have to do extra work for sign extending
+    if ((extendType == Instruction::CastOps::SExt) ||
+        (extendType == Instruction::CastOps::SIToFP) || (extendType == Instruction::CastOps::FPExt))
      {
          // is this PP float?
          bool bFP = (extendType == Instruction::CastOps::FPExt) ? true : false;
  
-        Type* v8x16Ty = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
-        Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
+        Type* v8x16Ty   = VectorType::get(mInt16Ty, 8); // 8x16bit in a 128bit lane
+        Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128),
+                                          mVWidth / 4); // vwidth is units of 32 bits
  
-                                                                                                     // shuffle mask
-        Value* vConstMask = C<char>({ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
-            0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 });
-        Value* vi128XY = nullptr;
-        if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)) {
-            Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherResult[0], v32x8Ty), vConstMask), vGatherTy);
+        // shuffle mask
+        Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
+                                     0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
+        Value* vi128XY    = nullptr;
+        if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1))
+        {
+            Value* vShufResult =
+                BITCAST(PSHUFB(BITCAST(vGatherResult[0], v32x8Ty), vConstMask), vGatherTy);
              // after pshufb: group components together in each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
              //        xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
  
-            vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+            vi128XY = BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
              // after PERMD: move and pack xy components into each 128bit lane
              // 256i - 0    1    2    3    4    5    6    7
              //        xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
@@ -1753,23 +1895,25 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
  
          // do the same for zw components
          Value* vi128ZW = nullptr;
-        if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)) {
-            Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherResult[1], v32x8Ty), vConstMask), vGatherTy);
-            vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
+        if (isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3))
+        {
+            Value* vShufResult =
+                BITCAST(PSHUFB(BITCAST(vGatherResult[1], v32x8Ty), vConstMask), vGatherTy);
+            vi128ZW = BITCAST(VPERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
          }
  
          // init denormalize variables if needed
          Instruction::CastOps IntToFpCast;
-        Value* conversionFactor;
+        Value*               conversionFactor;
  
          switch (conversionType)
          {
          case CONVERT_NORMALIZED:
-            IntToFpCast = Instruction::CastOps::SIToFP;
+            IntToFpCast      = Instruction::CastOps::SIToFP;
              conversionFactor = VIMMED1((float)(1.0 / 32767.0));
              break;
          case CONVERT_SSCALED:
-            IntToFpCast = Instruction::CastOps::SIToFP;
+            IntToFpCast      = Instruction::CastOps::SIToFP;
              conversionFactor = VIMMED1((float)(1.0));
              break;
          case CONVERT_USCALED:
@@ -1782,7 +1926,8 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
              break;
          }
  
-        // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
+        // sign extend all enabled components. If we have a fill vVertexElements, output to current
+        // simdvertex
          for (uint32_t i = 0; i < 4; i++)
          {
              if (isComponentEnabled(compMask, i))
@@ -1794,17 +1939,26 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
                      // if x or y, use vi128XY permute result, else use vi128ZW
                      Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
  
-                    if (bFP) {
+                    if (bFP)
+                    {
                          // extract 128 bit lanes to sign extend each component
-                        vVertexElements[currentVertexElement] = CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
+                        vVertexElements[currentVertexElement] =
+                            CVTPH2PS(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
                      }
-                    else {
+                    else
+                    {
                          // extract 128 bit lanes to sign extend each component
-                        vVertexElements[currentVertexElement] = PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
+                        vVertexElements[currentVertexElement] =
+                            PMOVSXWD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v8x16Ty));
  
                          // denormalize if needed
-                        if (conversionType != CONVERT_NONE) {
-                            vVertexElements[currentVertexElement] = FMUL(CAST(IntToFpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
+                        if (conversionType != CONVERT_NONE)
+                        {
+                            vVertexElements[currentVertexElement] =
+                                FMUL(CAST(IntToFpCast,
+                                          vVertexElements[currentVertexElement],
+                                          mSimdFP32Ty),
+                                     conversionFactor);
                          }
                      }
                      currentVertexElement++;
@@ -1824,34 +1978,39 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
          }
      }
      // else zero extend
-    else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
+    else if ((extendType == Instruction::CastOps::ZExt) ||
+             (extendType == Instruction::CastOps::UIToFP))
      {
          // pshufb masks for each component
          Value* vConstMask[2];
-        if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2)) {
+        if (isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 2))
+        {
              // x/z shuffle mask
-            vConstMask[0] = C<char>({ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
-                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
+            vConstMask[0] = C<char>({
+                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+                0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
+            });
          }
  
-        if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3)) {
+        if (isComponentEnabled(compMask, 1) || isComponentEnabled(compMask, 3))
+        {
              // y/w shuffle mask
-            vConstMask[1] = C<char>({ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
-                2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1 });
+            vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
+                                     2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
          }
  
          // init denormalize variables if needed
          Instruction::CastOps fpCast;
-        Value* conversionFactor;
+        Value*               conversionFactor;
  
          switch (conversionType)
          {
          case CONVERT_NORMALIZED:
-            fpCast = Instruction::CastOps::UIToFP;
+            fpCast           = Instruction::CastOps::UIToFP;
              conversionFactor = VIMMED1((float)(1.0 / 65535.0));
              break;
          case CONVERT_USCALED:
-            fpCast = Instruction::CastOps::UIToFP;
+            fpCast           = Instruction::CastOps::UIToFP;
              conversionFactor = VIMMED1((float)(1.0f));
              break;
          case CONVERT_SSCALED:
@@ -1876,15 +2035,20 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
                      // if x or y, use vi128XY permute result, else use vi128ZW
                      uint32_t selectedGather = (i < 2) ? 0 : 1;
  
-                    vVertexElements[currentVertexElement] = BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
-                    // after pshufb mask for x channel; z uses the same shuffle from the second gather
-                    // 256i - 0    1    2    3    4    5    6    7
-                    //        xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00 
+                    vVertexElements[currentVertexElement] =
+                        BITCAST(PSHUFB(BITCAST(vGatherResult[selectedGather], v32x8Ty),
+                                       vConstMask[selectedMask]),
+                                vGatherTy);
+                    // after pshufb mask for x channel; z uses the same shuffle from the second
+                    // gather 256i - 0    1    2    3    4    5    6    7
+                    //        xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
  
                      // denormalize if needed
                      if (conversionType != CONVERT_NONE)
                      {
-                        vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
+                        vVertexElements[currentVertexElement] =
+                            FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty),
+                                 conversionFactor);
                      }
                      currentVertexElement++;
                  }
@@ -1914,7 +2078,10 @@ void FetchJit::Shuffle16bpcGather(Shuffle16bpcArgs &args)
  /// @param outputElt - simdvertex offset in VIN to write to
  /// @param numEltsToStore - number of simdvertex rows to write out
  /// @param vVertexElements - LLVM Value*[] simdvertex to write out
-void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, const uint32_t numEltsToStore, Value* (&vVertexElements)[4])
+void FetchJit::StoreVertexElements(Value*         pVtxOut,
+                                   const uint32_t outputElt,
+                                   const uint32_t numEltsToStore,
+                                   Value* (&vVertexElements)[4])
  {
      SWR_ASSERT(numEltsToStore <= 4, "Invalid element count.");
  
@@ -1924,14 +2091,14 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
          if (!vVertexElements[c]->getType()->getScalarType()->isFloatTy())
          {
  #if FETCH_DUMP_VERTEX
-            PRINT("vVertexElements[%d]: 0x%x\n", { C(c), vVertexElements[c] });
+            PRINT("vVertexElements[%d]: 0x%x\n", {C(c), vVertexElements[c]});
  #endif
              vVertexElements[c] = BITCAST(vVertexElements[c], mSimdFP32Ty);
          }
  #if FETCH_DUMP_VERTEX
          else
          {
-            PRINT("vVertexElements[%d]: %f\n", { C(c), vVertexElements[c] });
+            PRINT("vVertexElements[%d]: %f\n", {C(c), vVertexElements[c]});
          }
  #endif
          // outputElt * 4 = offsetting by the size of a simdvertex
@@ -1942,10 +2109,10 @@ void FetchJit::StoreVertexElements(Value* pVtxOut, const uint32_t outputElt, con
  }
  
  //////////////////////////////////////////////////////////////////////////
-/// @brief Generates a constant vector of values based on the 
+/// @brief Generates a constant vector of values based on the
  /// ComponentControl value
  /// @param ctrl - ComponentControl value
-Value *FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
+Value* FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
  {
      switch (ctrl)
      {
@@ -1961,21 +2128,23 @@ Value *FetchJit::GenerateCompCtrlVector(const ComponentControl ctrl)
      {
          if (mVWidth == 16)
          {
-            Type* pSimd8FPTy = VectorType::get(mFP32Ty, 8);
-            Value *pIdLo = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), pSimd8FPTy);
-            Value *pIdHi = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID2 })), pSimd8FPTy);
+            Type*  pSimd8FPTy = VectorType::get(mFP32Ty, 8);
+            Value* pIdLo =
+                BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID})), pSimd8FPTy);
+            Value* pIdHi =
+                BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID2})), pSimd8FPTy);
              return JOIN_16(pIdLo, pIdHi);
          }
          else
          {
-            return BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_VertexID })), mSimdFP32Ty);
+            return BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_VertexID})), mSimdFP32Ty);
          }
      }
      case StoreInstanceId:
-        {
-            Value *pId = BITCAST(LOAD(GEP(mpFetchInfo, { 0, SWR_FETCH_CONTEXT_CurInstance })), mFP32Ty);
-            return VBROADCAST(pId);
-        }
+    {
+        Value* pId = BITCAST(LOAD(GEP(mpFetchInfo, {0, SWR_FETCH_CONTEXT_CurInstance})), mFP32Ty);
+        return VBROADCAST(pId);
+    }
  
  
      case StoreSrc:
@@ -1994,15 +2163,20 @@ bool isComponentEnabled(ComponentEnable enableMask, uint8_t component)
      switch (component)
      {
          // X
-    case 0: return (enableMask & ComponentEnable::X);
+    case 0:
+        return (enableMask & ComponentEnable::X);
          // Y
-    case 1: return (enableMask & ComponentEnable::Y);
+    case 1:
+        return (enableMask & ComponentEnable::Y);
          // Z
-    case 2: return (enableMask & ComponentEnable::Z);
+    case 2:
+        return (enableMask & ComponentEnable::Z);
          // W
-    case 3: return (enableMask & ComponentEnable::W);
+    case 3:
+        return (enableMask & ComponentEnable::W);
  
-    default: return false;
+    default:
+        return false;
      }
  }
  
@@ -2018,21 +2192,22 @@ static std::mutex gFetchCodegenMutex;
  /// @return PFN_FETCH_FUNC - pointer to fetch code
  PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const HANDLE hFunc)
  {
-    const llvm::Function* func = (const llvm::Function*)hFunc;
-    JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
-    PFN_FETCH_FUNC pfnFetch;
+    const llvm::Function* func    = (const llvm::Function*)hFunc;
+    JitManager*           pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
+    PFN_FETCH_FUNC        pfnFetch;
  
      gFetchCodegenMutex.lock();
      pfnFetch = (PFN_FETCH_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
-    // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
+    // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
+    // add new IR to the module
      pJitMgr->mIsModuleFinalized = true;
  
  #if defined(KNOB_SWRC_TRACING)
-    char fName[1024];
-    const char *funcName = func->getName().data();
+    char        fName[1024];
+    const char* funcName = func->getName().data();
      sprintf(fName, "%s.bin", funcName);
-    FILE *fd = fopen(fName, "wb");
-    fwrite((void *)pfnFetch, 1, 2048, fd);
+    FILE* fd = fopen(fName, "wb");
+    fwrite((void*)pfnFetch, 1, 2048, fd);
      fclose(fd);
  #endif
  
@@ -2040,7 +2215,6 @@ PFN_FETCH_FUNC JitFetchFunc(HANDLE hJitMgr, const HANDLE hFunc)
      gFetchCodegenMutex.unlock();
  
  
-
      return pfnFetch;
  }
  
@@ -2055,7 +2229,7 @@ extern "C" PFN_FETCH_FUNC JITCALL JitCompileFetch(HANDLE hJitMgr, const FETCH_CO
      pJitMgr->SetupNewModule();
  
      FetchJit theJit(pJitMgr);
-    HANDLE hFunc = theJit.Create(state);
+    HANDLE   hFunc = theJit.Create(state);
  
      return JitFetchFunc(hJitMgr, hFunc);
  }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h

index de0ec4f833000a6dcaaa74749fb9021602f4cd14..abc3091354f4a787d0d792c130bf57446d38c5a3 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file fetch_jit.h
-*
-* @brief Definition of the fetch jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file fetch_jit.h
+ *
+ * @brief Definition of the fetch jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "common/formats.h"
@@ -41,17 +41,17 @@ struct INPUT_ELEMENT_DESC
      {
          struct
          {
-            uint32_t            AlignedByteOffset : 12;
-            uint32_t            Format : 10;
-            uint32_t            StreamIndex : 6;
-            uint32_t            InstanceEnable : 1;
-            uint32_t            InstanceStrideEnable : 1;
-            uint32_t            ComponentControl0 : 4;
-            uint32_t            ComponentControl1 : 4;
-            uint32_t            ComponentControl2 : 4;
-            uint32_t            ComponentControl3 : 4;
-            uint32_t            ComponentPacking : 4;
-            uint32_t            _reserved : 14;
+            uint32_t AlignedByteOffset : 12;
+            uint32_t Format : 10;
+            uint32_t StreamIndex : 6;
+            uint32_t InstanceEnable : 1;
+            uint32_t InstanceStrideEnable : 1;
+            uint32_t ComponentControl0 : 4;
+            uint32_t ComponentControl1 : 4;
+            uint32_t ComponentControl2 : 4;
+            uint32_t ComponentControl3 : 4;
+            uint32_t ComponentPacking : 4;
+            uint32_t _reserved : 14;
          };
          uint64_t bits;
      };
@@ -95,40 +95,52 @@ enum ComponentControl
  //////////////////////////////////////////////////////////////////////////
  struct FETCH_COMPILE_STATE
  {
-    uint32_t numAttribs{ 0 };
+    uint32_t           numAttribs{0};
      INPUT_ELEMENT_DESC layout[SWR_VTX_NUM_SLOTS];
-    SWR_FORMAT indexType;
-    uint32_t cutIndex{ 0xffffffff };
+    SWR_FORMAT         indexType;
+    uint32_t           cutIndex{0xffffffff};
  
      // Options that effect the JIT'd code
-    bool bDisableIndexOOBCheck;             // If enabled, FetchJit will exclude index OOB check
-    bool bEnableCutIndex{ false };          // Compares indices with the cut index and returns a cut mask
-    bool bVertexIDOffsetEnable{ false };    // Offset vertexID by StartVertex for non-indexed draws or BaseVertex for indexed draws
-    bool bPartialVertexBuffer{ false };     // for indexed draws, map illegal indices to a known resident vertex
+    bool bDisableIndexOOBCheck;        // If enabled, FetchJit will exclude index OOB check
+    bool bEnableCutIndex{false};       // Compares indices with the cut index and returns a cut mask
+    bool bVertexIDOffsetEnable{false}; // Offset vertexID by StartVertex for non-indexed draws or
+                                       // BaseVertex for indexed draws
+    bool bPartialVertexBuffer{
+        false}; // for indexed draws, map illegal indices to a known resident vertex
  
-    bool bForceSequentialAccessEnable{ false };
-    bool bInstanceIDOffsetEnable{ false };
+    bool bForceSequentialAccessEnable{false};
+    bool bInstanceIDOffsetEnable{false};
  
-    FETCH_COMPILE_STATE(bool diableIndexOOBCheck = false):
-        bDisableIndexOOBCheck(diableIndexOOBCheck){ };
+    FETCH_COMPILE_STATE(bool diableIndexOOBCheck = false) :
+        bDisableIndexOOBCheck(diableIndexOOBCheck){};
  
-    bool operator==(const FETCH_COMPILE_STATE &other) const
+    bool operator==(const FETCH_COMPILE_STATE& other) const
      {
-        if (numAttribs != other.numAttribs) return false;
-        if (indexType != other.indexType) return false;
-        if (bDisableIndexOOBCheck != other.bDisableIndexOOBCheck) return false;
-        if (bEnableCutIndex != other.bEnableCutIndex) return false;
-        if (cutIndex != other.cutIndex) return false;
-        if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return false;
-        if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false;
-        if (bForceSequentialAccessEnable != other.bForceSequentialAccessEnable) return false;
-        if (bInstanceIDOffsetEnable != other.bInstanceIDOffsetEnable) return false;
+        if (numAttribs != other.numAttribs)
+            return false;
+        if (indexType != other.indexType)
+            return false;
+        if (bDisableIndexOOBCheck != other.bDisableIndexOOBCheck)
+            return false;
+        if (bEnableCutIndex != other.bEnableCutIndex)
+            return false;
+        if (cutIndex != other.cutIndex)
+            return false;
+        if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable)
+            return false;
+        if (bPartialVertexBuffer != other.bPartialVertexBuffer)
+            return false;
+        if (bForceSequentialAccessEnable != other.bForceSequentialAccessEnable)
+            return false;
+        if (bInstanceIDOffsetEnable != other.bInstanceIDOffsetEnable)
+            return false;
  
          for (uint32_t i = 0; i < numAttribs; ++i)
          {
              if ((layout[i].bits != other.layout[i].bits) ||
-               (((layout[i].InstanceEnable == 1) || (layout[i].InstanceStrideEnable == 1)) &&
-                (layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState))){
+                (((layout[i].InstanceEnable == 1) || (layout[i].InstanceStrideEnable == 1)) &&
+                 (layout[i].InstanceAdvancementState != other.layout[i].InstanceAdvancementState)))
+            {
                  return false;
              }
          }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp

index f2bd8889fc50b9fb0be1b3d2cded47dbd9005739..2a01c706b9688915db96ce8a4bc57ded5c6db76c 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file lower_x86.cpp
-*
-* @brief llvm pass to lower meta code to x86
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file lower_x86.cpp
+ *
+ * @brief llvm pass to lower meta code to x86
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  
  #include "jit_pch.hpp"
  #include "passes.h"
@@ -34,12 +34,11 @@
  
  #include <unordered_map>
  
-
  namespace llvm
  {
      // foward declare the initializer
-    void initializeLowerX86Pass(PassRegistry&);
-}
+    void initializeLowerX86Pass(PassRegistry &);
+} // namespace llvm
  
  namespace SwrJit
  {
@@ -47,97 +46,135 @@ namespace SwrJit
  
      enum TargetArch
      {
-        AVX = 0,
-        AVX2 = 1,
+        AVX    = 0,
+        AVX2   = 1,
          AVX512 = 2
      };
  
      enum TargetWidth
      {
-        W256 = 0,
-        W512 = 1,
+        W256       = 0,
+        W512       = 1,
          NUM_WIDTHS = 2
      };
  
      struct LowerX86;
  
-    typedef std::function<Instruction*(LowerX86*, TargetArch, TargetWidth, CallInst*)> EmuFunc;
+    typedef std::function<Instruction *(LowerX86 *, TargetArch, TargetWidth, CallInst *)> EmuFunc;
  
      struct X86Intrinsic
      {
          Intrinsic::ID intrin[NUM_WIDTHS];
-        EmuFunc emuFunc;
+        EmuFunc       emuFunc;
      };
  
-    // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the previous behavior of
-    // mapping directly to avx/avx2 intrinsics.
+    // Map of intrinsics that haven't been moved to the new mechanism yet. If used, these get the
+    // previous behavior of mapping directly to avx/avx2 intrinsics.
      static std::map<std::string, Intrinsic::ID> intrinsicMap = {
-        {"meta.intrinsic.BEXTR_32",        Intrinsic::x86_bmi_bextr_32},
-        {"meta.intrinsic.VPSHUFB",         Intrinsic::x86_avx2_pshuf_b},
-        {"meta.intrinsic.VCVTPS2PH",       Intrinsic::x86_vcvtps2ph_256},
-        {"meta.intrinsic.VPTESTC",         Intrinsic::x86_avx_ptestc_256},
-        {"meta.intrinsic.VPTESTZ",         Intrinsic::x86_avx_ptestz_256},
-        {"meta.intrinsic.VFMADDPS",        Intrinsic::x86_fma_vfmadd_ps_256},
-        {"meta.intrinsic.VPHADDD",         Intrinsic::x86_avx2_phadd_d},
-        {"meta.intrinsic.PDEP32",          Intrinsic::x86_bmi_pdep_32},
-        {"meta.intrinsic.RDTSC",           Intrinsic::x86_rdtsc},
+        {"meta.intrinsic.BEXTR_32", Intrinsic::x86_bmi_bextr_32},
+        {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
+        {"meta.intrinsic.VCVTPS2PH", Intrinsic::x86_vcvtps2ph_256},
+        {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
+        {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
+        {"meta.intrinsic.VFMADDPS", Intrinsic::x86_fma_vfmadd_ps_256},
+        {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
+        {"meta.intrinsic.PDEP32", Intrinsic::x86_bmi_pdep_32},
+        {"meta.intrinsic.RDTSC", Intrinsic::x86_rdtsc},
      };
  
      // Forward decls
-    Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
-    Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
-    Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
-    Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
-    Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst);
-
-    Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst, Intrinsic::ID intrin);
-    
+    Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+    Instruction *
+    VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+    Instruction *
+    VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+    Instruction *
+    VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+    Instruction *
+    VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst);
+
+    Instruction *DOUBLE_EMU(LowerX86 *    pThis,
+                            TargetArch    arch,
+                            TargetWidth   width,
+                            CallInst *    pCallInst,
+                            Intrinsic::ID intrin);
+
      static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
  
      static std::map<std::string, X86Intrinsic> intrinsicMap2[] = {
          //                              256 wide                                    512 wide
-    {   // AVX
-        {"meta.intrinsic.VRCPPS",      {{Intrinsic::x86_avx_rcp_ps_256,              DOUBLE},                                        NO_EMU}},
-        {"meta.intrinsic.VPERMPS",     {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VPERM_EMU}},
-        {"meta.intrinsic.VPERMD",      {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VPERM_EMU}},
-        {"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,          Intrinsic::not_intrinsic},                      NO_EMU}},
-        {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_vcvtph2ps_256,               Intrinsic::not_intrinsic},                      NO_EMU}},
-        {"meta.intrinsic.VROUND",      {{Intrinsic::x86_avx_round_ps_256,            DOUBLE},                                        NO_EMU}},
-        {"meta.intrinsic.VHSUBPS",     {{Intrinsic::x86_avx_hsub_ps_256,             DOUBLE},                                        NO_EMU}},
-    },
-    {   // AVX2
-        {"meta.intrinsic.VRCPPS",      {{Intrinsic::x86_avx_rcp_ps_256,              DOUBLE},                                        NO_EMU}},
-        {"meta.intrinsic.VPERMPS",     {{Intrinsic::x86_avx2_permps,                 Intrinsic::not_intrinsic},                      VPERM_EMU}},
-        {"meta.intrinsic.VPERMD",      {{Intrinsic::x86_avx2_permd,                  Intrinsic::not_intrinsic},                      VPERM_EMU}},
-        {"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,          DOUBLE},                                        NO_EMU}},
-        {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_vcvtph2ps_256,               Intrinsic::not_intrinsic},                      NO_EMU}},
-        {"meta.intrinsic.VROUND",      {{Intrinsic::x86_avx_round_ps_256,            DOUBLE},                                        NO_EMU}},
-        {"meta.intrinsic.VHSUBPS",     {{Intrinsic::x86_avx_hsub_ps_256,             DOUBLE},                                        NO_EMU}},
-    },
-    {   // AVX512
-        {"meta.intrinsic.VRCPPS",      {{Intrinsic::x86_avx512_rcp14_ps_256,         Intrinsic::x86_avx512_rcp14_ps_512},            NO_EMU}},
-        {"meta.intrinsic.VPERMPS",     {{Intrinsic::x86_avx512_mask_permvar_sf_256,  Intrinsic::x86_avx512_mask_permvar_sf_512},     NO_EMU}},
-        {"meta.intrinsic.VPERMD",      {{Intrinsic::x86_avx512_mask_permvar_si_256,  Intrinsic::x86_avx512_mask_permvar_si_512},     NO_EMU}},
-        {"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VGATHER_EMU}},
-        {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx512_mask_cvtpd2ps_256,    Intrinsic::x86_avx512_mask_cvtpd2ps_512 },      NO_EMU}},
-        {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_avx512_mask_vcvtph2ps_256,   Intrinsic::x86_avx512_mask_vcvtph2ps_512 },     NO_EMU}},
-        {"meta.intrinsic.VROUND",      {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VROUND_EMU}},
-        {"meta.intrinsic.VHSUBPS",     {{Intrinsic::not_intrinsic,                   Intrinsic::not_intrinsic},                      VHSUB_EMU}},
-    }
-    };
+        {
+            // AVX
+            {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+            {"meta.intrinsic.VPERMPS",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+            {"meta.intrinsic.VPERMD",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VPERM_EMU}},
+            {"meta.intrinsic.VGATHERPD",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VGATHERPS",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VGATHERDD",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VCVTPD2PS",
+             {{Intrinsic::x86_avx_cvt_pd2_ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+            {"meta.intrinsic.VCVTPH2PS",
+             {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+            {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+            {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+        },
+        {
+            // AVX2
+            {"meta.intrinsic.VRCPPS", {{Intrinsic::x86_avx_rcp_ps_256, DOUBLE}, NO_EMU}},
+            {"meta.intrinsic.VPERMPS",
+             {{Intrinsic::x86_avx2_permps, Intrinsic::not_intrinsic}, VPERM_EMU}},
+            {"meta.intrinsic.VPERMD",
+             {{Intrinsic::x86_avx2_permd, Intrinsic::not_intrinsic}, VPERM_EMU}},
+            {"meta.intrinsic.VGATHERPD",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VGATHERPS",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VGATHERDD",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VCVTPD2PS", {{Intrinsic::x86_avx_cvt_pd2_ps_256, DOUBLE}, NO_EMU}},
+            {"meta.intrinsic.VCVTPH2PS",
+             {{Intrinsic::x86_vcvtph2ps_256, Intrinsic::not_intrinsic}, NO_EMU}},
+            {"meta.intrinsic.VROUND", {{Intrinsic::x86_avx_round_ps_256, DOUBLE}, NO_EMU}},
+            {"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256, DOUBLE}, NO_EMU}},
+        },
+        {
+            // AVX512
+            {"meta.intrinsic.VRCPPS",
+             {{Intrinsic::x86_avx512_rcp14_ps_256, Intrinsic::x86_avx512_rcp14_ps_512}, NO_EMU}},
+            {"meta.intrinsic.VPERMPS",
+             {{Intrinsic::x86_avx512_mask_permvar_sf_256,
+               Intrinsic::x86_avx512_mask_permvar_sf_512},
+              NO_EMU}},
+            {"meta.intrinsic.VPERMD",
+             {{Intrinsic::x86_avx512_mask_permvar_si_256,
+               Intrinsic::x86_avx512_mask_permvar_si_512},
+              NO_EMU}},
+            {"meta.intrinsic.VGATHERPD",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VGATHERPS",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VGATHERDD",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VGATHER_EMU}},
+            {"meta.intrinsic.VCVTPD2PS",
+             {{Intrinsic::x86_avx512_mask_cvtpd2ps_256, Intrinsic::x86_avx512_mask_cvtpd2ps_512},
+              NO_EMU}},
+            {"meta.intrinsic.VCVTPH2PS",
+             {{Intrinsic::x86_avx512_mask_vcvtph2ps_256, Intrinsic::x86_avx512_mask_vcvtph2ps_512},
+              NO_EMU}},
+            {"meta.intrinsic.VROUND",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VROUND_EMU}},
+            {"meta.intrinsic.VHSUBPS",
+             {{Intrinsic::not_intrinsic, Intrinsic::not_intrinsic}, VHSUB_EMU}},
+        }};
  
      struct LowerX86 : public FunctionPass
      {
-        LowerX86(Builder* b = nullptr)
-            : FunctionPass(ID), B(b)
+        LowerX86(Builder *b = nullptr) : FunctionPass(ID), B(b)
          {
              initializeLowerX86Pass(*PassRegistry::getPassRegistry());
  
@@ -153,7 +190,6 @@ namespace SwrJit
              else if (JM()->mArch.AVX())
              {
                  mTarget = AVX;
-
              }
              else
              {
@@ -166,9 +202,12 @@ namespace SwrJit
          // across all intrinsics, and will have to be rethought. Probably need something
          // similar to llvm's getDeclaration() utility to map a set of inputs to a specific typed
          // intrinsic.
-        void GetRequestedWidthAndType(CallInst* pCallInst, const StringRef intrinName, TargetWidth* pWidth, Type** pTy)
+        void GetRequestedWidthAndType(CallInst *      pCallInst,
+                                      const StringRef intrinName,
+                                      TargetWidth *   pWidth,
+                                      Type **         pTy)
          {
-            Type* pVecTy = pCallInst->getType();
+            Type *pVecTy = pCallInst->getType();
  
              // Check for intrinsic specific types
              // VCVTPD2PS type comes from src, not dst
@@ -179,7 +218,7 @@ namespace SwrJit
  
              if (!pVecTy->isVectorTy())
              {
-                for (auto& op : pCallInst->arg_operands())
+                for (auto &op : pCallInst->arg_operands())
                  {
                      if (op.get()->getType()->isVectorTy())
                      {
@@ -193,53 +232,68 @@ namespace SwrJit
              uint32_t width = cast<VectorType>(pVecTy)->getBitWidth();
              switch (width)
              {
-            case 256: *pWidth = W256; break;
-            case 512: *pWidth = W512; break;
-            default: SWR_ASSERT(false, "Unhandled vector width %d", width);
+            case 256:
+                *pWidth = W256;
+                break;
+            case 512:
+                *pWidth = W512;
+                break;
+            default:
+                SWR_ASSERT(false, "Unhandled vector width %d", width);
                  *pWidth = W256;
              }
  
              *pTy = pVecTy->getScalarType();
          }
  
-        Value* GetZeroVec(TargetWidth width, Type* pTy)
+        Value *GetZeroVec(TargetWidth width, Type *pTy)
          {
              uint32_t numElem = 0;
              switch (width)
              {
-            case W256: numElem = 8; break;
-            case W512: numElem = 16; break;
-            default: SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
+            case W256:
+                numElem = 8;
+                break;
+            case W512:
+                numElem = 16;
+                break;
+            default:
+                SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
              }
  
              return ConstantVector::getNullValue(VectorType::get(pTy, numElem));
          }
  
-        Value* GetMask(TargetWidth width)
+        Value *GetMask(TargetWidth width)
          {
-            Value* mask;
+            Value *mask;
              switch (width)
              {
-            case W256: mask = B->C((uint8_t)-1); break;
-            case W512: mask = B->C((uint16_t)-1); break;
-            default: SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
+            case W256:
+                mask = B->C((uint8_t)-1);
+                break;
+            case W512:
+                mask = B->C((uint16_t)-1);
+                break;
+            default:
+                SWR_ASSERT(false, "Unhandled vector width type %d\n", width);
              }
              return mask;
          }
  
          // Convert <N x i1> mask to <N x i32> x86 mask
-        Value* VectorMask(Value* vi1Mask)
+        Value *VectorMask(Value *vi1Mask)
          {
              uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
              return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
          }
  
-        Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
+        Instruction *ProcessIntrinsicAdvanced(CallInst *pCallInst)
          {
-            Function* pFunc = pCallInst->getCalledFunction();
-            auto& intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
+            Function *  pFunc     = pCallInst->getCalledFunction();
+            auto &      intrinsic = intrinsicMap2[mTarget][pFunc->getName()];
              TargetWidth vecWidth;
-            Type* pElemTy;
+            Type *      pElemTy;
              GetRequestedWidthAndType(pCallInst, pFunc->getName(), &vecWidth, &pElemTy);
  
              // Check if there is a native intrinsic for this instruction
@@ -249,29 +303,33 @@ namespace SwrJit
                  // Double pump the next smaller SIMD intrinsic
                  SWR_ASSERT(vecWidth != 0, "Cannot double pump smallest SIMD width.");
                  Intrinsic::ID id2 = intrinsic.intrin[vecWidth - 1];
-                SWR_ASSERT(id2 != Intrinsic::not_intrinsic, "Cannot find intrinsic to double pump.");
+                SWR_ASSERT(id2 != Intrinsic::not_intrinsic,
+                           "Cannot find intrinsic to double pump.");
                  return DOUBLE_EMU(this, mTarget, vecWidth, pCallInst, id2);
              }
              else if (id != Intrinsic::not_intrinsic)
              {
-                Function* pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
-                SmallVector<Value*, 8> args;
-                for (auto& arg : pCallInst->arg_operands())
+                Function *pIntrin = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
+                SmallVector<Value *, 8> args;
+                for (auto &arg : pCallInst->arg_operands())
                  {
                      args.push_back(arg.get());
                  }
  
-                // If AVX512, all instructions add a src operand and mask. We'll pass in 0 src and full mask for now
-                // Assuming the intrinsics are consistent and place the src operand and mask last in the argument list.
+                // If AVX512, all instructions add a src operand and mask. We'll pass in 0 src and
+                // full mask for now Assuming the intrinsics are consistent and place the src
+                // operand and mask last in the argument list.
                  if (mTarget == AVX512)
                  {
-                    if (pFunc->getName().equals("meta.intrinsic.VCVTPD2PS")) {
+                    if (pFunc->getName().equals("meta.intrinsic.VCVTPD2PS"))
+                    {
                          args.push_back(GetZeroVec(W256, pCallInst->getType()->getScalarType()));
                          args.push_back(GetMask(W256));
                          // for AVX512 VCVTPD2PS, we also have to add rounding mode
-                        args.push_back(B->C(_MM_FROUND_TO_NEAREST_INT |
-                                            _MM_FROUND_NO_EXC));
-                    } else {
+                        args.push_back(B->C(_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
+                    }
+                    else
+                    {
                          args.push_back(GetZeroVec(vecWidth, pElemTy));
                          args.push_back(GetMask(vecWidth));
                      }
@@ -289,23 +347,26 @@ namespace SwrJit
              return nullptr;
          }
  
-        Instruction* ProcessIntrinsic(CallInst* pCallInst)
+        Instruction *ProcessIntrinsic(CallInst *pCallInst)
          {
-            Function* pFunc = pCallInst->getCalledFunction();
-            
+            Function *pFunc = pCallInst->getCalledFunction();
+
              // Forward to the advanced support if found
              if (intrinsicMap2[mTarget].find(pFunc->getName()) != intrinsicMap2[mTarget].end())
              {
                  return ProcessIntrinsicAdvanced(pCallInst);
              }
  
-            SWR_ASSERT(intrinsicMap.find(pFunc->getName()) != intrinsicMap.end(), "Unimplemented intrinsic %s.", pFunc->getName());
+            SWR_ASSERT(intrinsicMap.find(pFunc->getName()) != intrinsicMap.end(),
+                       "Unimplemented intrinsic %s.",
+                       pFunc->getName());
  
              Intrinsic::ID x86Intrinsic = intrinsicMap[pFunc->getName()];
-            Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
+            Function *    pX86IntrinFunc =
+                Intrinsic::getDeclaration(B->JM()->mpCurrentModule, x86Intrinsic);
  
-            SmallVector<Value*, 8> args;
-            for (auto& arg : pCallInst->arg_operands())
+            SmallVector<Value *, 8> args;
+            for (auto &arg : pCallInst->arg_operands())
              {
                  args.push_back(arg.get());
              }
@@ -315,34 +376,33 @@ namespace SwrJit
          //////////////////////////////////////////////////////////////////////////
          /// @brief LLVM funtion pass run method.
          /// @param f- The function we're working on with this pass.
-        virtual bool runOnFunction(Function& F)
+        virtual bool runOnFunction(Function &F)
          {
-            std::vector<Instruction*> toRemove;
+            std::vector<Instruction *> toRemove;
  
-            for (auto& BB : F.getBasicBlockList())
+            for (auto &BB : F.getBasicBlockList())
              {
-                for (auto& I : BB.getInstList())
+                for (auto &I : BB.getInstList())
                  {
-                    if (CallInst* pCallInst = dyn_cast<CallInst>(&I))
+                    if (CallInst *pCallInst = dyn_cast<CallInst>(&I))
                      {
-                        Function* pFunc = pCallInst->getCalledFunction();
+                        Function *pFunc = pCallInst->getCalledFunction();
                          if (pFunc)
                          {
                              if (pFunc->getName().startswith("meta.intrinsic"))
                              {
                                  B->IRB()->SetInsertPoint(&I);
-                                Instruction* pReplace = ProcessIntrinsic(pCallInst);
+                                Instruction *pReplace = ProcessIntrinsic(pCallInst);
                                  SWR_ASSERT(pReplace);
                                  toRemove.push_back(pCallInst);
                                  pCallInst->replaceAllUsesWith(pReplace);
                              }
                          }
-
                      }
                  }
              }
  
-            for (auto* pInst : toRemove)
+            for (auto *pInst : toRemove)
              {
                  pInst->eraseFromParent();
              }
@@ -352,42 +412,37 @@ namespace SwrJit
              return true;
          }
  
-        virtual void getAnalysisUsage(AnalysisUsage& AU) const
-        {
-        }
+        virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
  
-        JitManager* JM() { return B->JM(); }
+        JitManager *JM() { return B->JM(); }
  
-        Builder* B;
+        Builder *B;
  
          TargetArch mTarget;
  
-        static char ID;  ///< Needed by LLVM to generate ID for FunctionPass.
+        static char ID; ///< Needed by LLVM to generate ID for FunctionPass.
      };
  
-    char LowerX86::ID = 0;   // LLVM uses address of ID as the actual ID.
+    char LowerX86::ID = 0; // LLVM uses address of ID as the actual ID.
  
-    FunctionPass* createLowerX86Pass(Builder* b)
-    {
-        return new LowerX86(b);
-    }
+    FunctionPass *createLowerX86Pass(Builder *b) { return new LowerX86(b); }
  
-    Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    Instruction *NO_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
      {
          SWR_ASSERT(false, "Unimplemented intrinsic emulation.");
          return nullptr;
      }
  
-    Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    Instruction *VPERM_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
      {
          // Only need vperm emulation for AVX
          SWR_ASSERT(arch == AVX);
  
-        Builder* B = pThis->B;
-        auto v32A = pCallInst->getArgOperand(0);
-        auto vi32Index = pCallInst->getArgOperand(1);
+        Builder *B         = pThis->B;
+        auto     v32A      = pCallInst->getArgOperand(0);
+        auto     vi32Index = pCallInst->getArgOperand(1);
  
-        Value* v32Result;
+        Value *v32Result;
          if (isa<Constant>(vi32Index))
          {
              // Can use llvm shuffle vector directly with constant shuffle indices
@@ -399,67 +454,71 @@ namespace SwrJit
              for (uint32_t l = 0; l < v32A->getType()->getVectorNumElements(); ++l)
              {
                  auto i32Index = B->VEXTRACT(vi32Index, B->C(l));
-                auto val = B->VEXTRACT(v32A, i32Index);
-                v32Result = B->VINSERT(v32Result, val, B->C(l));
+                auto val      = B->VEXTRACT(v32A, i32Index);
+                v32Result     = B->VINSERT(v32Result, val, B->C(l));
              }
          }
          return cast<Instruction>(v32Result);
      }
  
-    Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    Instruction *
+    VGATHER_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
      {
-        Builder* B = pThis->B;
-        auto vSrc = pCallInst->getArgOperand(0);
-        auto pBase = pCallInst->getArgOperand(1);
-        auto vi32Indices = pCallInst->getArgOperand(2);
-        auto vi1Mask = pCallInst->getArgOperand(3);
-        auto i8Scale = pCallInst->getArgOperand(4);
-
-        pBase = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0));
-        uint32_t numElem = vSrc->getType()->getVectorNumElements();
-        auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
-        auto srcTy = vSrc->getType()->getVectorElementType();
-        Value* v32Gather;
+        Builder *B           = pThis->B;
+        auto     vSrc        = pCallInst->getArgOperand(0);
+        auto     pBase       = pCallInst->getArgOperand(1);
+        auto     vi32Indices = pCallInst->getArgOperand(2);
+        auto     vi1Mask     = pCallInst->getArgOperand(3);
+        auto     i8Scale     = pCallInst->getArgOperand(4);
+
+        pBase             = B->POINTER_CAST(pBase, PointerType::get(B->mInt8Ty, 0));
+        uint32_t numElem  = vSrc->getType()->getVectorNumElements();
+        auto     i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
+        auto     srcTy    = vSrc->getType()->getVectorElementType();
+        Value *  v32Gather;
          if (arch == AVX)
          {
              // Full emulation for AVX
              // Store source on stack to provide a valid address to load from inactive lanes
              auto pStack = B->STACKSAVE();
-            auto pTmp = B->ALLOCA(vSrc->getType());
+            auto pTmp   = B->ALLOCA(vSrc->getType());
              B->STORE(vSrc, pTmp);
  
-            v32Gather = UndefValue::get(vSrc->getType());
-            auto vi32Scale = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
+            v32Gather        = UndefValue::get(vSrc->getType());
+            auto vi32Scale   = ConstantVector::getSplat(numElem, cast<ConstantInt>(i32Scale));
              auto vi32Offsets = B->MUL(vi32Indices, vi32Scale);
  
              for (uint32_t i = 0; i < numElem; ++i)
              {
-                auto i32Offset = B->VEXTRACT(vi32Offsets, B->C(i));
-                auto pLoadAddress = B->GEP(pBase, i32Offset);
-                pLoadAddress = B->BITCAST(pLoadAddress, PointerType::get(srcTy, 0));
-                auto pMaskedLoadAddress = B->GEP(pTmp, { 0, i });
-                auto i1Mask = B->VEXTRACT(vi1Mask, B->C(i));
-                auto pValidAddress = B->SELECT(i1Mask, pLoadAddress, pMaskedLoadAddress);
-                auto val = B->LOAD(pValidAddress);
-                v32Gather = B->VINSERT(v32Gather, val, B->C(i));
+                auto i32Offset          = B->VEXTRACT(vi32Offsets, B->C(i));
+                auto pLoadAddress       = B->GEP(pBase, i32Offset);
+                pLoadAddress            = B->BITCAST(pLoadAddress, PointerType::get(srcTy, 0));
+                auto pMaskedLoadAddress = B->GEP(pTmp, {0, i});
+                auto i1Mask             = B->VEXTRACT(vi1Mask, B->C(i));
+                auto pValidAddress      = B->SELECT(i1Mask, pLoadAddress, pMaskedLoadAddress);
+                auto val                = B->LOAD(pValidAddress);
+                v32Gather               = B->VINSERT(v32Gather, val, B->C(i));
              }
  
              B->STACKRESTORE(pStack);
          }
          else if (arch == AVX2 || (arch == AVX512 && width == W256))
          {
-            Function* pX86IntrinFunc;
+            Function *pX86IntrinFunc;
              if (srcTy == B->mFP32Ty)
              {
-                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_ps_256);
-            } 
+                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+                                                           Intrinsic::x86_avx2_gather_d_ps_256);
+            }
              else if (srcTy == B->mInt32Ty)
              {
-                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_d_256);
+                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+                                                           Intrinsic::x86_avx2_gather_d_d_256);
              }
              else if (srcTy == B->mDoubleTy)
              {
-                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx2_gather_d_q_256);
+                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+                                                           Intrinsic::x86_avx2_gather_d_q_256);
              }
              else
              {
@@ -469,7 +528,7 @@ namespace SwrJit
              if (width == W256)
              {
                  auto v32Mask = B->BITCAST(pThis->VectorMask(vi1Mask), vSrc->getType());
-                v32Gather = B->CALL(pX86IntrinFunc, { vSrc, pBase, vi32Indices, v32Mask, i8Scale });
+                v32Gather = B->CALL(pX86IntrinFunc, {vSrc, pBase, vi32Indices, v32Mask, i8Scale});
              }
              else if (width == W512)
              {
@@ -477,45 +536,58 @@ namespace SwrJit
                  if (vSrc->getType()->getVectorElementType() == B->mDoubleTy)
                  {
                      auto v64Mask = pThis->VectorMask(vi1Mask);
-                    v64Mask = B->S_EXT(v64Mask,
-                                       VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
+                    v64Mask      = B->S_EXT(
+                        v64Mask,
+                        VectorType::get(B->mInt64Ty, v64Mask->getType()->getVectorNumElements()));
                      v64Mask = B->BITCAST(v64Mask, vSrc->getType());
  
-                    Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({ 0, 1, 2, 3 }));
-                    Value* src1 = B->VSHUFFLE(vSrc, vSrc, B->C({ 4, 5, 6, 7 }));
-
-                    Value* indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({ 0, 1, 2, 3 }));
-                    Value* indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({ 4, 5, 6, 7 }));
-
-                    Value* mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({ 0, 1, 2, 3 }));
-                    Value* mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({ 4, 5, 6, 7 }));
-
-                    src0 = B->BITCAST(src0, VectorType::get(B->mInt64Ty, src0->getType()->getVectorNumElements()));
-                    mask0 = B->BITCAST(mask0, VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
-                    Value* gather0 = B->CALL(pX86IntrinFunc, { src0, pBase, indices0, mask0, i8Scale });
-                    src1 = B->BITCAST(src1, VectorType::get(B->mInt64Ty, src1->getType()->getVectorNumElements()));
-                    mask1 = B->BITCAST(mask1, VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
-                    Value* gather1 = B->CALL(pX86IntrinFunc, { src1, pBase, indices1, mask1, i8Scale });
-
-                    v32Gather = B->VSHUFFLE(gather0, gather1, B->C({ 0, 1, 2, 3, 4, 5, 6, 7 }));
+                    Value *src0 = B->VSHUFFLE(vSrc, vSrc, B->C({0, 1, 2, 3}));
+                    Value *src1 = B->VSHUFFLE(vSrc, vSrc, B->C({4, 5, 6, 7}));
+
+                    Value *indices0 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({0, 1, 2, 3}));
+                    Value *indices1 = B->VSHUFFLE(vi32Indices, vi32Indices, B->C({4, 5, 6, 7}));
+
+                    Value *mask0 = B->VSHUFFLE(v64Mask, v64Mask, B->C({0, 1, 2, 3}));
+                    Value *mask1 = B->VSHUFFLE(v64Mask, v64Mask, B->C({4, 5, 6, 7}));
+
+                    src0 = B->BITCAST(
+                        src0,
+                        VectorType::get(B->mInt64Ty, src0->getType()->getVectorNumElements()));
+                    mask0 = B->BITCAST(
+                        mask0,
+                        VectorType::get(B->mInt64Ty, mask0->getType()->getVectorNumElements()));
+                    Value *gather0 =
+                        B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
+                    src1 = B->BITCAST(
+                        src1,
+                        VectorType::get(B->mInt64Ty, src1->getType()->getVectorNumElements()));
+                    mask1 = B->BITCAST(
+                        mask1,
+                        VectorType::get(B->mInt64Ty, mask1->getType()->getVectorNumElements()));
+                    Value *gather1 =
+                        B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
+
+                    v32Gather = B->VSHUFFLE(gather0, gather1, B->C({0, 1, 2, 3, 4, 5, 6, 7}));
                      v32Gather = B->BITCAST(v32Gather, vSrc->getType());
                  }
                  else
                  {
                      // Double pump 8-wide for 32bit elements
                      auto v32Mask = pThis->VectorMask(vi1Mask);
-                    v32Mask = B->BITCAST(v32Mask, vSrc->getType());
-                    Value* src0 = B->EXTRACT_16(vSrc, 0);
-                    Value* src1 = B->EXTRACT_16(vSrc, 1);
+                    v32Mask      = B->BITCAST(v32Mask, vSrc->getType());
+                    Value *src0  = B->EXTRACT_16(vSrc, 0);
+                    Value *src1  = B->EXTRACT_16(vSrc, 1);
  
-                    Value* indices0 = B->EXTRACT_16(vi32Indices, 0);
-                    Value* indices1 = B->EXTRACT_16(vi32Indices, 1);
+                    Value *indices0 = B->EXTRACT_16(vi32Indices, 0);
+                    Value *indices1 = B->EXTRACT_16(vi32Indices, 1);
  
-                    Value* mask0 = B->EXTRACT_16(v32Mask, 0);
-                    Value* mask1 = B->EXTRACT_16(v32Mask, 1);
+                    Value *mask0 = B->EXTRACT_16(v32Mask, 0);
+                    Value *mask1 = B->EXTRACT_16(v32Mask, 1);
  
-                    Value* gather0 = B->CALL(pX86IntrinFunc, { src0, pBase, indices0, mask0, i8Scale });
-                    Value* gather1 = B->CALL(pX86IntrinFunc, { src1, pBase, indices1, mask1, i8Scale });
+                    Value *gather0 =
+                        B->CALL(pX86IntrinFunc, {src0, pBase, indices0, mask0, i8Scale});
+                    Value *gather1 =
+                        B->CALL(pX86IntrinFunc, {src1, pBase, indices1, mask1, i8Scale});
  
                      v32Gather = B->JOIN_16(gather0, gather1);
                  }
@@ -523,22 +595,25 @@ namespace SwrJit
          }
          else if (arch == AVX512)
          {
-            Value* iMask;
-            Function* pX86IntrinFunc;
+            Value *   iMask;
+            Function *pX86IntrinFunc;
              if (srcTy == B->mFP32Ty)
              {
-                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dps_512);
-                iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
+                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+                                                           Intrinsic::x86_avx512_gather_dps_512);
+                iMask          = B->BITCAST(vi1Mask, B->mInt16Ty);
              }
              else if (srcTy == B->mInt32Ty)
              {
-                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dpi_512);
-                iMask = B->BITCAST(vi1Mask, B->mInt16Ty);
+                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+                                                           Intrinsic::x86_avx512_gather_dpi_512);
+                iMask          = B->BITCAST(vi1Mask, B->mInt16Ty);
              }
              else if (srcTy == B->mDoubleTy)
              {
-                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx512_gather_dpd_512);
-                iMask = B->BITCAST(vi1Mask, B->mInt8Ty);
+                pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule,
+                                                           Intrinsic::x86_avx512_gather_dpd_512);
+                iMask          = B->BITCAST(vi1Mask, B->mInt8Ty);
              }
              else
              {
@@ -546,21 +621,24 @@ namespace SwrJit
              }
  
              auto i32Scale = B->Z_EXT(i8Scale, B->mInt32Ty);
-            v32Gather = B->CALL(pX86IntrinFunc, { vSrc, pBase, vi32Indices, iMask, i32Scale });
+            v32Gather     = B->CALL(pX86IntrinFunc, {vSrc, pBase, vi32Indices, iMask, i32Scale});
          }
  
          return cast<Instruction>(v32Gather);
      }
  
-    // No support for vroundps in avx512 (it is available in kncni), so emulate with avx instructions
-    Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    // No support for vroundps in avx512 (it is available in kncni), so emulate with avx
+    // instructions
+    Instruction *
+    VROUND_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
      {
          SWR_ASSERT(arch == AVX512);
  
-        auto B = pThis->B;
+        auto B       = pThis->B;
          auto vf32Src = pCallInst->getOperand(0);
          auto i8Round = pCallInst->getOperand(1);
-        auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
+        auto pfnFunc =
+            Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_round_ps_256);
  
          if (width == W256)
          {
@@ -585,25 +663,26 @@ namespace SwrJit
      }
  
      // No support for hsub in AVX512
-    Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst)
+    Instruction *VHSUB_EMU(LowerX86 *pThis, TargetArch arch, TargetWidth width, CallInst *pCallInst)
      {
          SWR_ASSERT(arch == AVX512);
  
-        auto B = pThis->B;
+        auto B    = pThis->B;
          auto src0 = pCallInst->getOperand(0);
          auto src1 = pCallInst->getOperand(1);
  
          // 256b hsub can just use avx intrinsic
          if (width == W256)
          {
-            auto pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_hsub_ps_256);
+            auto pX86IntrinFunc =
+                Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrinsic::x86_avx_hsub_ps_256);
              return cast<Instruction>(B->CALL2(pX86IntrinFunc, src0, src1));
          }
          else if (width == W512)
          {
              // 512b hsub can be accomplished with shuf/sub combo
-            auto minuend = B->VSHUFFLE(src0, src1, B->C({ 0, 2, 8, 10, 4, 6, 12, 14 }));
-            auto subtrahend = B->VSHUFFLE(src0, src1, B->C({ 1, 3, 9, 11, 5, 7, 13, 15 }));
+            auto minuend    = B->VSHUFFLE(src0, src1, B->C({0, 2, 8, 10, 4, 6, 12, 14}));
+            auto subtrahend = B->VSHUFFLE(src0, src1, B->C({1, 3, 9, 11, 5, 7, 13, 15}));
              return cast<Instruction>(B->SUB(minuend, subtrahend));
          }
          else
@@ -613,25 +692,30 @@ namespace SwrJit
          }
      }
  
-    // Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from each vector argument and
-    // calls the 256 wide intrinsic, then merges the results to 512 wide
-    Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, CallInst* pCallInst, Intrinsic::ID intrin)
+    // Double pump input using Intrin template arg. This blindly extracts lower and upper 256 from
+    // each vector argument and calls the 256 wide intrinsic, then merges the results to 512 wide
+    Instruction *DOUBLE_EMU(LowerX86 *    pThis,
+                            TargetArch    arch,
+                            TargetWidth   width,
+                            CallInst *    pCallInst,
+                            Intrinsic::ID intrin)
      {
          auto B = pThis->B;
          SWR_ASSERT(width == W512);
-        Value* result[2];
-        Function* pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
+        Value *   result[2];
+        Function *pX86IntrinFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
          for (uint32_t i = 0; i < 2; ++i)
          {
-            SmallVector<Value*, 8> args;
-            for (auto& arg : pCallInst->arg_operands())
+            SmallVector<Value *, 8> args;
+            for (auto &arg : pCallInst->arg_operands())
              {
                  auto argType = arg.get()->getType();
                  if (argType->isVectorTy())
                  {
-                    uint32_t vecWidth = argType->getVectorNumElements();
-                    Value *lanes = B->CInc<int>(i*vecWidth/2, vecWidth/2);
-                    Value *argToPush = B->VSHUFFLE(arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
+                    uint32_t vecWidth  = argType->getVectorNumElements();
+                    Value *  lanes     = B->CInc<int>(i * vecWidth / 2, vecWidth / 2);
+                    Value *  argToPush = B->VSHUFFLE(
+                        arg.get(), B->VUNDEF(argType->getVectorElementType(), vecWidth), lanes);
                      args.push_back(argToPush);
                  }
                  else
@@ -646,7 +730,7 @@ namespace SwrJit
          {
              assert(result[1]->getType()->isVectorTy());
              vecWidth = result[0]->getType()->getVectorNumElements() +
-                result[1]->getType()->getVectorNumElements();
+                       result[1]->getType()->getVectorNumElements();
          }
          else
          {
@@ -656,10 +740,9 @@ namespace SwrJit
          return cast<Instruction>(B->VSHUFFLE(result[0], result[1], lanes));
      }
  
-}
+} // namespace SwrJit
  
  using namespace SwrJit;
  
  INITIALIZE_PASS_BEGIN(LowerX86, "LowerX86", "LowerX86", false, false)
  INITIALIZE_PASS_END(LowerX86, "LowerX86", "LowerX86", false, false)
-
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h

index 95ef4bcf01671a5c0f061030590c5bc62cd7ac9e..d3c732af042e255a915f7bfb405c8bb0b350eb97 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h
@@ -1,30 +1,30 @@
  /****************************************************************************
-* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file passes.h
-*
-* @brief Include file for llvm passes
-*
-******************************************************************************/
+ * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file passes.h
+ *
+ * @brief Include file for llvm passes
+ *
+ ******************************************************************************/
  
  #include "JitManager.h"
  #include "builder.h"
@@ -34,4 +34,4 @@ namespace SwrJit
      using namespace llvm;
  
      FunctionPass* createLowerX86Pass(Builder* b);
-}
+} // namespace SwrJit
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h

index fb6cf9b3f0ae9320a67530fe1b5d619846d89cfa..cc986a78e0aabbaf21e3dc9631326849fad6cd49 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_api.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file jit_api.h
-*
-* @brief Platform independent JIT interface
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file jit_api.h
+ *
+ * @brief Platform independent JIT interface
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  #include "common/os.h"
  #include "core/utils.h"
@@ -48,7 +48,6 @@
  #endif
  
  
-
  struct ShaderInfo;
  
  //////////////////////////////////////////////////////////////////////////
@@ -59,15 +58,15 @@ struct JIT_COMPILE_INPUT
      SWR_SHADER_TYPE type;
      uint32_t        crc;
  
-    const void* pIR;        ///< Pointer to LLVM IR text.
-    size_t irLength;
+    const void* pIR; ///< Pointer to LLVM IR text.
+    size_t      irLength;
  
      bool enableJitSampler;
  
  };
  
-extern "C"
-{
+
+extern "C" {
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief Create JIT context.
@@ -82,17 +81,13 @@ void JITCALL JitDestroyContext(HANDLE hJitContext);
  /// @param hJitContext - Jit Context
  /// @param input  - Input containing LLVM IR and other information
  /// @param output - Output containing information about JIT shader
-ShaderInfo* JITCALL JitCompileShader(
-    HANDLE hJitContext,
-    const JIT_COMPILE_INPUT& input);
+ShaderInfo* JITCALL JitCompileShader(HANDLE hJitContext, const JIT_COMPILE_INPUT& input);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief JIT destroy shader.
  /// @param hJitContext - Jit Context
  /// @param pShaderInfo  - pointer to shader object.
-void JITCALL JitDestroyShader(
-    HANDLE hJitContext,
-    ShaderInfo*& pShaderInfo);
+void JITCALL JitDestroyShader(HANDLE hJitContext, ShaderInfo*& pShaderInfo);
  
  //////////////////////////////////////////////////////////////////////////
  /// @brief JIT compiles fetch shader
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp

index 001a1ab241f76724f51779d3bd91e01e24aec302..47f717bfc2a1c305ab0ac7163549e4e54995a0b0 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2017-2018 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file jit_pch.hpp
-*
-* @brief Pre-compiled header for jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2017-2018 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file jit_pch.hpp
+ *
+ * @brief Pre-compiled header for jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  
  #pragma once
  
@@ -58,7 +58,7 @@
  
  #include "llvm/IR/LegacyPassManager.h"
  using FunctionPassManager = llvm::legacy::FunctionPassManager;
-using PassManager = llvm::legacy::PassManager;
+using PassManager         = llvm::legacy::PassManager;
  
  #include "llvm/CodeGen/Passes.h"
  #include "llvm/ExecutionEngine/ExecutionEngine.h"
@@ -92,7 +92,6 @@ using PassManager = llvm::legacy::PassManager;
  
  #include "llvm/Transforms/Utils/Cloning.h"
  
-
  #if defined(_WIN32)
  #include "llvm/ADT/Triple.h"
  #endif
@@ -117,16 +116,18 @@ using PassManager = llvm::legacy::PassManager;
  #endif
  
  #if LLVM_VERSION_MAJOR >= 5
-static const auto Sync_CrossThread = llvm::SyncScope::System;
-static const auto Attrib_FunctionIndex = llvm::AttributeList::FunctionIndex;
-static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, const llvm::AttrBuilder &b)
+static const auto                Sync_CrossThread     = llvm::SyncScope::System;
+static const auto                Attrib_FunctionIndex = llvm::AttributeList::FunctionIndex;
+static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext&       ctx,
+                                                  const llvm::AttrBuilder& b)
  {
      return llvm::AttributeSet::get(ctx, b);
  }
  #else
-static const auto Sync_CrossThread = llvm::SynchronizationScope::CrossThread;
-static const auto Attrib_FunctionIndex = llvm::AttributeSet::FunctionIndex;
-static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, const llvm::AttrBuilder &b)
+static const auto                Sync_CrossThread     = llvm::SynchronizationScope::CrossThread;
+static const auto                Attrib_FunctionIndex = llvm::AttributeSet::FunctionIndex;
+static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext&       ctx,
+                                                  const llvm::AttrBuilder& b)
  {
      return llvm::AttributeSet::get(ctx, Attrib_FunctionIndex, b);
  }
@@ -134,7 +135,6 @@ static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, const
  
  #pragma pop_macro("DEBUG")
  
-
  #include <deque>
  #include <list>
  #include <unordered_map>
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp b/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp

index 54d45e6bc4c5c13d20cca9c48c22671cb025d2b2..1c9db0c2d2ae8268aa85441b8a1c209754a1f970 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp
@@ -1,36 +1,35 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file DebugOutput.cpp
-*
-* @brief Shader support library implementation for printed Debug output
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file DebugOutput.cpp
+ *
+ * @brief Shader support library implementation for printed Debug output
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include <stdarg.h>
  #include "common/os.h"
  
-
  //////////////////////////////////////////////////////////////////////////
  /// @brief called in JIT code, inserted by PRINT
  /// output to both stdout and visual studio debug console
@@ -40,7 +39,7 @@ extern "C" void CallPrint(const char* fmt, ...)
      va_start(args, fmt);
      vprintf(fmt, args);
  
-#if defined( _WIN32 )
+#if defined(_WIN32)
      char strBuf[1024];
      vsnprintf_s(strBuf, _TRUNCATE, fmt, args);
      OutputDebugStringA(strBuf);
@@ -48,4 +47,3 @@ extern "C" void CallPrint(const char* fmt, ...)
  
      va_end(args);
  }
-
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp

index cb2e3aed61a8be71dec129bff2b29f4f8a2e1719..8f86af2a4b41450b3facad945fe3f23278a42dc7 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file streamout_jit.cpp
-*
-* @brief Implementation of the streamout jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file streamout_jit.cpp
+ *
+ * @brief Implementation of the streamout jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #include "jit_pch.hpp"
  #include "builder.h"
  #include "jit_api.h"
@@ -44,13 +44,12 @@ struct StreamOutJit : public Builder
  {
      StreamOutJit(JitManager* pJitMgr) : Builder(pJitMgr){};
  
-    // returns pointer to SWR_STREAMOUT_BUFFER 
+    // returns pointer to SWR_STREAMOUT_BUFFER
      Value* getSOBuffer(Value* pSoCtx, uint32_t buffer)
      {
-        return LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pBuffer, buffer });
+        return LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_pBuffer, buffer});
      }
  
-
      //////////////////////////////////////////////////////////////////////////
      // @brief checks if streamout buffer is oob
      // @return <i1> true/false
@@ -62,28 +61,27 @@ struct StreamOutJit : public Builder
  
          // load enable
          // @todo bool data types should generate <i1> llvm type
-        Value* enabled = TRUNC(LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_enable }), IRB()->getInt1Ty());
+        Value* enabled = TRUNC(LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_enable}), IRB()->getInt1Ty());
  
          // load buffer size
-        Value* bufferSize = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_bufferSize });
-        
+        Value* bufferSize = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_bufferSize});
+
          // load current streamOffset
-        Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
+        Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
  
          // load buffer pitch
-        Value* pitch = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
+        Value* pitch = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
  
          // buffer is considered oob if in use in a decl but not enabled
          returnMask = OR(returnMask, NOT(enabled));
  
          // buffer is oob if cannot fit a prims worth of verts
          Value* newOffset = ADD(streamOffset, MUL(pitch, C(state.numVertsPerPrim)));
-        returnMask = OR(returnMask, ICMP_SGT(newOffset, bufferSize));
+        returnMask       = OR(returnMask, ICMP_SGT(newOffset, bufferSize));
  
          return returnMask;
      }
  
-
      //////////////////////////////////////////////////////////////////////////
      // @brief converts scalar bitmask to <4 x i32> suitable for shuffle vector,
      //        packing the active mask bits
@@ -93,8 +91,8 @@ struct StreamOutJit : public Builder
      Value* PackMask(uint32_t bitmask)
      {
          std::vector<Constant*> indices(4, C(0));
-        DWORD index;
-        uint32_t elem = 0;
+        DWORD                  index;
+        uint32_t               elem = 0;
          while (_BitScanForward(&index, bitmask))
          {
              indices[elem++] = C((int)index);
@@ -133,17 +131,17 @@ struct StreamOutJit : public Builder
      void buildDecl(Value* pStream, Value* pOutBuffers[4], const STREAMOUT_DECL& decl)
      {
          uint32_t numComponents = _mm_popcnt_u32(decl.componentMask);
-        uint32_t packedMask = (1 << numComponents) - 1;
+        uint32_t packedMask    = (1 << numComponents) - 1;
          if (!decl.hole)
          {
              // increment stream pointer to correct slot
              Value* pAttrib = GEP(pStream, C(4 * decl.attribSlot));
  
              // load 4 components from stream
-            Type* simd4Ty = VectorType::get(IRB()->getFloatTy(), 4);
+            Type* simd4Ty    = VectorType::get(IRB()->getFloatTy(), 4);
              Type* simd4PtrTy = PointerType::get(simd4Ty, 0);
-            pAttrib = BITCAST(pAttrib, simd4PtrTy);
-            Value *vattrib = LOAD(pAttrib);
+            pAttrib          = BITCAST(pAttrib, simd4PtrTy);
+            Value* vattrib   = LOAD(pAttrib);
  
              // shuffle/pack enabled components
              Value* vpackedAttrib = VSHUFFLE(vattrib, vattrib, PackMask(decl.componentMask));
@@ -178,7 +176,11 @@ struct StreamOutJit : public Builder
          }
      }
  
-    void buildStream(const STREAMOUT_COMPILE_STATE& state, const STREAMOUT_STREAM& streamState, Value* pSoCtx, BasicBlock* returnBB, Function* soFunc)
+    void buildStream(const STREAMOUT_COMPILE_STATE& state,
+                     const STREAMOUT_STREAM&        streamState,
+                     Value*                         pSoCtx,
+                     BasicBlock*                    returnBB,
+                     Function*                      soFunc)
      {
          // get list of active SO buffers
          std::unordered_set<uint32_t> activeSOBuffers;
@@ -189,9 +191,9 @@ struct StreamOutJit : public Builder
          }
  
          // always increment numPrimStorageNeeded
-        Value *numPrimStorageNeeded = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
-        numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1));
-        STORE(numPrimStorageNeeded, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
+        Value* numPrimStorageNeeded = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded});
+        numPrimStorageNeeded        = ADD(numPrimStorageNeeded, C(1));
+        STORE(numPrimStorageNeeded, pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded});
  
          // check OOB on active SO buffers.  If any buffer is out of bound, don't write
          // the primitive to any buffer
@@ -208,27 +210,27 @@ struct StreamOutJit : public Builder
  
          IRB()->SetInsertPoint(validBB);
  
-        Value* numPrimsWritten = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
-        numPrimsWritten = ADD(numPrimsWritten, C(1));
-        STORE(numPrimsWritten, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
+        Value* numPrimsWritten = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimsWritten});
+        numPrimsWritten        = ADD(numPrimsWritten, C(1));
+        STORE(numPrimsWritten, pSoCtx, {0, SWR_STREAMOUT_CONTEXT_numPrimsWritten});
  
          // compute start pointer for each output buffer
          Value* pOutBuffer[4];
          Value* pOutBufferStartVertex[4];
          Value* outBufferPitch[4];
-        for (uint32_t b: activeSOBuffers)
+        for (uint32_t b : activeSOBuffers)
          {
-            Value* pBuf = getSOBuffer(pSoCtx, b);
-            Value* pData = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pBuffer });
-            Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
-            pOutBuffer[b] = GEP(pData, streamOffset);
+            Value* pBuf              = getSOBuffer(pSoCtx, b);
+            Value* pData             = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pBuffer});
+            Value* streamOffset      = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
+            pOutBuffer[b]            = GEP(pData, streamOffset);
              pOutBufferStartVertex[b] = pOutBuffer[b];
  
-            outBufferPitch[b] = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
+            outBufferPitch[b] = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_pitch});
          }
  
          // loop over the vertices of the prim
-        Value* pStreamData = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pPrimData });
+        Value* pStreamData = LOAD(pSoCtx, {0, SWR_STREAMOUT_CONTEXT_pPrimData});
          for (uint32_t v = 0; v < state.numVertsPerPrim; ++v)
          {
              buildVertex(streamState, pStreamData, pOutBuffer);
@@ -241,23 +243,24 @@ struct StreamOutJit : public Builder
              for (uint32_t b : activeSOBuffers)
              {
                  pOutBufferStartVertex[b] = GEP(pOutBufferStartVertex[b], outBufferPitch[b]);
-                pOutBuffer[b] = pOutBufferStartVertex[b];
+                pOutBuffer[b]            = pOutBufferStartVertex[b];
              }
          }
  
          // update each active buffer's streamOffset
          for (uint32_t b : activeSOBuffers)
          {
-            Value* pBuf = getSOBuffer(pSoCtx, b);
-            Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
+            Value* pBuf         = getSOBuffer(pSoCtx, b);
+            Value* streamOffset = LOAD(pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
              streamOffset = ADD(streamOffset, MUL(C(state.numVertsPerPrim), outBufferPitch[b]));
-            STORE(streamOffset, pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
+            STORE(streamOffset, pBuf, {0, SWR_STREAMOUT_BUFFER_streamOffset});
          }
      }
  
      Function* Create(const STREAMOUT_COMPILE_STATE& state)
      {
-        std::stringstream fnName("SO_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
+        std::stringstream fnName("SO_",
+                                 std::ios_base::in | std::ios_base::out | std::ios_base::ate);
          fnName << ComputeCRC(0, &state, sizeof(state));
  
          // SO function signature
@@ -267,19 +270,20 @@ struct StreamOutJit : public Builder
              PointerType::get(Gen_SWR_STREAMOUT_CONTEXT(JM()), 0), // SWR_STREAMOUT_CONTEXT*
          };
  
-        FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
-        Function* soFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
+        FunctionType* fTy    = FunctionType::get(IRB()->getVoidTy(), args, false);
+        Function*     soFunc = Function::Create(
+            fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
  
          soFunc->getParent()->setModuleIdentifier(soFunc->getName());
  
          // create return basic block
-        BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", soFunc);
+        BasicBlock* entry    = BasicBlock::Create(JM()->mContext, "entry", soFunc);
          BasicBlock* returnBB = BasicBlock::Create(JM()->mContext, "return", soFunc);
  
          IRB()->SetInsertPoint(entry);
  
          // arguments
-        auto argitr = soFunc->arg_begin();
+        auto   argitr = soFunc->arg_begin();
          Value* pSoCtx = &*argitr++;
          pSoCtx->setName("pSoCtx");
  
@@ -325,11 +329,12 @@ struct StreamOutJit : public Builder
  /// @return PFN_SO_FUNC - pointer to SOS function
  PFN_SO_FUNC JitStreamoutFunc(HANDLE hJitMgr, const HANDLE hFunc)
  {
-    llvm::Function *func = (llvm::Function*)hFunc;
-    JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
-    PFN_SO_FUNC pfnStreamOut;
+    llvm::Function* func    = (llvm::Function*)hFunc;
+    JitManager*     pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
+    PFN_SO_FUNC     pfnStreamOut;
      pfnStreamOut = (PFN_SO_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
-    // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
+    // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
+    // add new IR to the module
      pJitMgr->mIsModuleFinalized = true;
  
      pJitMgr->DumpAsm(func, "SoFunc_optimized");
@@ -342,7 +347,8 @@ PFN_SO_FUNC JitStreamoutFunc(HANDLE hJitMgr, const HANDLE hFunc)
  /// @brief JIT compiles streamout shader
  /// @param hJitMgr - JitManager handle
  /// @param state   - SO state to build function from
-extern "C" PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitMgr, const STREAMOUT_COMPILE_STATE& state)
+extern "C" PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE                         hJitMgr,
+                                                   const STREAMOUT_COMPILE_STATE& state)
  {
      JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
  
@@ -358,7 +364,7 @@ extern "C" PFN_SO_FUNC JITCALL JitCompileStreamout(HANDLE hJitMgr, const STREAMO
      pJitMgr->SetupNewModule();
  
      StreamOutJit theJit(pJitMgr);
-    HANDLE hFunc = theJit.Create(soState);
+    HANDLE       hFunc = theJit.Create(soState);
  
      return JitStreamoutFunc(hJitMgr, hFunc);
  }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h

index 097f8ab44d93cabcede2f2e56770ef9ca02cb462..cee7b5748ed2d4108af65e56878689c432c0e8e6 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h
@@ -1,32 +1,32 @@
  /****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a
-* copy of this software and associated documentation files (the "Software"),
-* to deal in the Software without restriction, including without limitation
-* the rights to use, copy, modify, merge, publish, distribute, sublicense,
-* and/or sell copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice (including the next
-* paragraph) shall be included in all copies or substantial portions of the
-* Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-* IN THE SOFTWARE.
-*
-* @file streamout_jit.h
-*
-* @brief Definition of the streamout jitter
-*
-* Notes:
-*
-******************************************************************************/
+ * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * @file streamout_jit.h
+ *
+ * @brief Definition of the streamout jitter
+ *
+ * Notes:
+ *
+ ******************************************************************************/
  #pragma once
  
  #include "common/formats.h"
@@ -43,7 +43,7 @@ struct STREAMOUT_DECL
      // attribute to stream
      uint32_t attribSlot;
  
-    // attribute component mask 
+    // attribute component mask
      uint32_t componentMask;
  
      // indicates this decl is a hole
@@ -69,24 +69,31 @@ struct STREAMOUT_COMPILE_STATE
  {
      // number of verts per primitive
      uint32_t numVertsPerPrim;
-    uint32_t offsetAttribs; ///< attrib offset to subtract from all STREAMOUT_DECL::attribSlot values.
+    uint32_t
+        offsetAttribs; ///< attrib offset to subtract from all STREAMOUT_DECL::attribSlot values.
  
      uint64_t streamMask;
  
      // stream decls
      STREAMOUT_STREAM stream;
  
-    bool operator==(const STREAMOUT_COMPILE_STATE &other) const
+    bool operator==(const STREAMOUT_COMPILE_STATE& other) const
      {
-        if (numVertsPerPrim != other.numVertsPerPrim) return false;
-        if (stream.numDecls != other.stream.numDecls) return false;
+        if (numVertsPerPrim != other.numVertsPerPrim)
+            return false;
+        if (stream.numDecls != other.stream.numDecls)
+            return false;
  
          for (uint32_t i = 0; i < stream.numDecls; ++i)
          {
-            if (stream.decl[i].bufferIndex != other.stream.decl[i].bufferIndex) return false;
-            if (stream.decl[i].attribSlot != other.stream.decl[i].attribSlot) return false;
-            if (stream.decl[i].componentMask != other.stream.decl[i].componentMask) return false;
-            if (stream.decl[i].hole != other.stream.decl[i].hole) return false;
+            if (stream.decl[i].bufferIndex != other.stream.decl[i].bufferIndex)
+                return false;
+            if (stream.decl[i].attribSlot != other.stream.decl[i].attribSlot)
+                return false;
+            if (stream.decl[i].componentMask != other.stream.decl[i].componentMask)
+                return false;
+            if (stream.decl[i].hole != other.stream.decl[i].hole)
+                return false;
          }
  
          return true;
author	Alok Hota <alok.hota@intel.com>
	Tue, 5 Jun 2018 18:59:53 +0000 (13:59 -0500)
committer	Tim Rowley <timothy.o.rowley@intel.com>
	Mon, 18 Jun 2018 18:57:38 +0000 (13:57 -0500)
src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/archrast/archrast.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/archrast/eventmanager.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_backend.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_builder.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_llvm.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/formats.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/formats.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/intrin.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/isa.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/os.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/os.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/rdtsc_buckets_shared.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simd16intrin.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdintrin.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx2.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu_masks.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_interface.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/swr_assert.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/swr_assert.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/api.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/api.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/arena.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/backend.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/backend.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/backend_impl.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/binner.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/binner.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/blend.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/clip.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/clip.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/conservativeRast.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/context.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/depthstencil.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/fifo.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/format_conversion.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/format_traits.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/format_types.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/format_utils.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/frontend.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/frontend.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/knobs.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/knobs_init.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/multisample.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/pa.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/rasterizer.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/rdtsc_core.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/rdtsc_core.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/ringbuffer.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/state.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/state_funcs.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/tessellator.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/threads.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/threads.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/tilemgr.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/tileset.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/core/utils.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/JitManager.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/blend_jit.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/blend_jit.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_math.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/jit_api.h		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.cpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/jitter/streamout_jit.h		patch \| blob \| history