#include <stdlib.h>
#include <string.h>
-#include <X11/Xmd.h>
#include <x86intrin.h>
#include <stdint.h>
#include <sys/types.h>
#define CreateDirectory(name, pSecurity) mkdir(name, 0777)
-#if defined(_WIN32)
-static inline
-unsigned int _mm_popcnt_u32(unsigned int v)
-{
- return __builtin_popcount(v);
-}
-#endif
-
#define _aligned_free free
#define InterlockedCompareExchange(Dest, Exchange, Comparand) __sync_val_compare_and_swap(Dest, Comparand, Exchange)
#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
#endif
// Universal types
-typedef BYTE KILOBYTE[1024];
+typedef uint8_t KILOBYTE[1024];
typedef KILOBYTE MEGABYTE[1024];
typedef MEGABYTE GIGABYTE[1024];
uint32_t clearMask,
const float clearColor[4],
float z,
- BYTE stencil)
+ uint8_t stencil)
{
RDTSC_START(APIClearRenderTarget);
/// @param pDstHotTile - pointer to the hot tile surface
typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT dstFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, BYTE *pDstHotTile);
+ uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pDstHotTile);
//////////////////////////////////////////////////////////////////////////
/// @brief Function signature for store hot tiles
/// @param pSrcHotTile - pointer to the hot tile surface
typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT srcFormat,
SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
- uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, BYTE *pSrcHotTile);
+ uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t *pSrcHotTile);
/// @brief Function signature for clearing from the hot tiles clear value
/// @param hPrivateContext - handle to private data
uint32_t clearMask,
const float clearColor[4],
float z,
- BYTE stencil);
+ uint8_t stencil);
void SWR_API SwrSetRastState(
HANDLE hContext,
}
template<SWR_FORMAT format>
-void ClearRasterTile(BYTE *pTileBuffer, simdvector &value)
+void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
{
auto lambda = [&](int comp)
{
/// @todo clear data should come in as RGBA32_FLOAT
DWORD clearData[4];
float clearFloat[4];
- clearFloat[0] = ((BYTE*)(&pClear->clearRTColor))[0] / 255.0f;
- clearFloat[1] = ((BYTE*)(&pClear->clearRTColor))[1] / 255.0f;
- clearFloat[2] = ((BYTE*)(&pClear->clearRTColor))[2] / 255.0f;
- clearFloat[3] = ((BYTE*)(&pClear->clearRTColor))[3] / 255.0f;
+ clearFloat[0] = ((uint8_t*)(&pClear->clearRTColor))[0] / 255.0f;
+ clearFloat[1] = ((uint8_t*)(&pClear->clearRTColor))[1] / 255.0f;
+ clearFloat[2] = ((uint8_t*)(&pClear->clearRTColor))[2] / 255.0f;
+ clearFloat[3] = ((uint8_t*)(&pClear->clearRTColor))[3] / 255.0f;
clearData[0] = *(DWORD*)&clearFloat[0];
clearData[1] = *(DWORD*)&clearFloat[1];
clearData[2] = *(DWORD*)&clearFloat[2];
coeffs.vRecipDet = _simd_broadcast_ss(&work.recipDet);
- BYTE *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
+ uint8_t *pDepthBase = renderBuffers.pDepth, *pStencilBase = renderBuffers.pStencil;
RDTSC_STOP(BESetup, 0, 0);
CLEAR_FLAGS flags;
float clearRTColor[4]; // RGBA_32F
float clearDepth; // [0..1]
- BYTE clearStencil;
+ uint8_t clearStencil;
};
struct INVALIDATE_TILES_DESC
INLINE
simdscalar DepthStencilTest(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
- bool frontFacing, simdscalar interpZ, BYTE* pDepthBase, simdscalar coverageMask, BYTE *pStencilBase,
+ bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, uint8_t *pStencilBase,
simdscalar* pStencilMask)
{
static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
INLINE
void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
- bool frontFacing, simdscalar interpZ, BYTE* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
- BYTE *pStencilBase, const simdscalar& stencilMask)
+ bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
+ uint8_t *pStencilBase, const simdscalar& stencilMask)
{
if (pDSState->depthWriteEnable)
{
/// @param pSrc - source data in SOA form
/// @param dst - output data in SOA form
template<SWR_FORMAT SrcFormat>
-INLINE void LoadSOA(const BYTE *pSrc, simdvector &dst)
+INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst)
{
// fast path for float32
if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
/// @param src - source data in SOA form
/// @param dst - output data in SOA form
template<SWR_FORMAT DstFormat>
-INLINE void StoreSOA(const simdvector &src, BYTE *pDst)
+INLINE void StoreSOA(const simdvector &src, uint8_t *pDst)
{
// fast path for float32
if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
struct PackTraits
{
static const uint32_t MyNumBits = NumBits;
- static simdscalar loadSOA(const BYTE *pSrc) = delete;
- static void storeSOA(BYTE *pDst, simdscalar src) = delete;
+ static simdscalar loadSOA(const uint8_t *pSrc) = delete;
+ static void storeSOA(uint8_t *pDst, simdscalar src) = delete;
static simdscalar unpack(simdscalar &in) = delete;
static simdscalar pack(simdscalar &in) = delete;
};
{
static const uint32_t MyNumBits = 0;
- static simdscalar loadSOA(const BYTE *pSrc) { return _simd_setzero_ps(); }
- static void storeSOA(BYTE *pDst, simdscalar src) { return; }
+ static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_setzero_ps(); }
+ static void storeSOA(uint8_t *pDst, simdscalar src) { return; }
static simdscalar unpack(simdscalar &in) { return _simd_setzero_ps(); }
static simdscalar pack(simdscalar &in) { return _simd_setzero_ps(); }
};
{
static const uint32_t MyNumBits = 8;
- static simdscalar loadSOA(const BYTE *pSrc)
+ static simdscalar loadSOA(const uint8_t *pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
#endif
}
- static void storeSOA(BYTE *pDst, simdscalar src)
+ static void storeSOA(uint8_t *pDst, simdscalar src)
{
// store simd bytes
#if KNOB_SIMD_WIDTH == 8
{
static const uint32_t MyNumBits = 8;
- static simdscalar loadSOA(const BYTE *pSrc)
+ static simdscalar loadSOA(const uint8_t *pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
#endif
}
- static void storeSOA(BYTE *pDst, simdscalar src)
+ static void storeSOA(uint8_t *pDst, simdscalar src)
{
// store simd bytes
#if KNOB_SIMD_WIDTH == 8
{
static const uint32_t MyNumBits = 16;
- static simdscalar loadSOA(const BYTE *pSrc)
+ static simdscalar loadSOA(const uint8_t *pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
#endif
}
- static void storeSOA(BYTE *pDst, simdscalar src)
+ static void storeSOA(uint8_t *pDst, simdscalar src)
{
#if KNOB_SIMD_WIDTH == 8
// store 16B (2B * 8)
{
static const uint32_t MyNumBits = 16;
- static simdscalar loadSOA(const BYTE *pSrc)
+ static simdscalar loadSOA(const uint8_t *pSrc)
{
#if KNOB_SIMD_WIDTH == 8
__m256 result = _mm256_setzero_ps();
#endif
}
- static void storeSOA(BYTE *pDst, simdscalar src)
+ static void storeSOA(uint8_t *pDst, simdscalar src)
{
#if KNOB_SIMD_WIDTH == 8
// store 16B (2B * 8)
{
static const uint32_t MyNumBits = 32;
- static simdscalar loadSOA(const BYTE *pSrc) { return _simd_load_ps((const float*)pSrc); }
- static void storeSOA(BYTE *pDst, simdscalar src) { _simd_store_ps((float*)pDst, src); }
+ static simdscalar loadSOA(const uint8_t *pSrc) { return _simd_load_ps((const float*)pSrc); }
+ static void storeSOA(uint8_t *pDst, simdscalar src) { _simd_store_ps((float*)pDst, src); }
static simdscalar unpack(simdscalar &in) { return in; }
static simdscalar pack(simdscalar &in) { return in; }
};
return TypeTraits<X, NumBitsX>::fromFloat();
}
- INLINE static simdscalar loadSOA(uint32_t comp, const BYTE* pSrc)
+ INLINE static simdscalar loadSOA(uint32_t comp, const uint8_t* pSrc)
{
switch (comp)
{
return TypeTraits<X, NumBitsX>::loadSOA(pSrc);
}
- INLINE static void storeSOA(uint32_t comp, BYTE *pDst, simdscalar src)
+ INLINE static void storeSOA(uint32_t comp, uint8_t *pDst, simdscalar src)
{
switch (comp)
{
// if the entire index buffer isn't being consumed, set the last index
// so that fetches < a SIMD wide will be masked off
- fetchInfo.pLastIndex = (const int32_t*)(((BYTE*)state.indexBuffer.pIndices) + state.indexBuffer.size);
+ fetchInfo.pLastIndex = (const int32_t*)(((uint8_t*)state.indexBuffer.pIndices) + state.indexBuffer.size);
if (pLastRequestedIndex < fetchInfo.pLastIndex)
{
fetchInfo.pLastIndex = pLastRequestedIndex;
i += KNOB_SIMD_WIDTH;
if (IsIndexedT)
{
- fetchInfo.pIndices = (int*)((BYTE*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
+ fetchInfo.pIndices = (int*)((uint8_t*)fetchInfo.pIndices + KNOB_SIMD_WIDTH * indexSize);
}
else
{
typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
-typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
+typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, uint8_t*, simdvector&, simdscalari*, simdscalari*);
//////////////////////////////////////////////////////////////////////////
/// FRONTEND_STATE
if (create)
{
uint32_t size = numSamples * mHotTileSize[attachment];
- hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
+ hotTile.pBuffer = (uint8_t*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
hotTile.state = HOTTILE_INVALID;
hotTile.numSamples = numSamples;
hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
_aligned_free(hotTile.pBuffer);
uint32_t size = numSamples * mHotTileSize[attachment];
- hotTile.pBuffer = (BYTE*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
+ hotTile.pBuffer = (uint8_t*)_aligned_malloc(size, KNOB_SIMD_WIDTH * 4);
hotTile.state = HOTTILE_INVALID;
hotTile.numSamples = numSamples;
}
RDTSC_STOP(BELoadTiles, 0, 0);
}
}
-}
\ No newline at end of file
+}
struct HOTTILE
{
- BYTE *pBuffer;
+ uint8_t *pBuffer;
HOTTILE_STATE state;
DWORD clearData[4]; // May need to change based on pfnClearTile implementation. Reorder for alignment?
uint32_t numSamples;
/// @brief Pass-thru for single component.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
memcpy(pDst, pSrc, (bpp * KNOB_SIMD_WIDTH) / 8);
}
/// @brief Performs an SOA to AOS conversion for packed 8_8_8_8 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
simdscalari src = _simd_load_si((const simdscalari*)pSrc);
#if KNOB_SIMD_WIDTH == 8
/// @brief Performs an SOA to AOS conversion for packed 8_8_8 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs an SOA to AOS conversion for packed 8_8 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
simdscalari src = _simd_load_si((const simdscalari*)pSrc);
/// @brief Performs an SOA to AOS conversion for packed 32_32_32_32 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
#if KNOB_SIMD_WIDTH == 8
simdscalar src0 = _simd_load_ps((const float*)pSrc);
/// @brief Performs an SOA to AOS conversion for packed 32_32_32 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
#if KNOB_SIMD_WIDTH == 8
simdscalar src0 = _simd_load_ps((const float*)pSrc);
/// @brief Performs an SOA to AOS conversion for packed 32_32 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
const float* pfSrc = (const float*)pSrc;
__m128 src_r0 = _mm_load_ps(pfSrc + 0);
/// @brief Performs an SOA to AOS conversion for packed 16_16_16_16 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
#if KNOB_SIMD_WIDTH == 8
simdscalari src_rg = _simd_load_si((const simdscalari*)pSrc);
/// @brief Performs an SOA to AOS conversion for packed 16_16_16 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
#if KNOB_SIMD_WIDTH == 8
simdscalari src_rg = _simd_load_si((const simdscalari*)pSrc);
/// @brief Performs an SOA to AOS conversion for packed 16_16 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- INLINE static void Transpose(const BYTE* pSrc, BYTE* pDst)
+ INLINE static void Transpose(const uint8_t* pSrc, uint8_t* pDst)
{
simdscalar src = _simd_load_ps((const float*)pSrc);
/// @brief Performs an SOA to AOS conversion for packed 24_8 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs an SOA to AOS conversion for packed 32_8_24 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
/// @brief Performs an SOA to AOS conversion for packed 4_4_4_4 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs an SOA to AOS conversion for packed 5_6_5 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs an SOA to AOS conversion for packed 9_9_9_5 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs an SOA to AOS conversion for packed 5_5_5_1 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs an SOA to AOS conversion for packed 10_10_10_2 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
//////////////////////////////////////////////////////////////////////////
/// @brief Performs an SOA to AOS conversion for packed 11_11_10 data.
/// @param pSrc - source data in SOA form
/// @param pDst - output data in AOS form
- static void Transpose(const BYTE* pSrc, BYTE* pDst) = delete;
+ static void Transpose(const uint8_t* pSrc, uint8_t* pDst) = delete;
};
// helper function to unroll loops
}
#endif
- BYTE* pRemainderBytes = (BYTE*)pDataWords;
+ uint8_t* pRemainderBytes = (uint8_t*)pDataWords;
for (uint32_t i = 0; i < sizeRemainderBytes; ++i)
{
crc = _mm_crc32_u8(crc, *pRemainderBytes++);
/// @param pDstSurface - Destination surface state
/// @param x, y - Coordinates to raster tile.
INLINE static void StoreClear(
- const BYTE* dstFormattedColor,
+ const uint8_t* dstFormattedColor,
UINT dstBytesPerPixel,
SWR_SURFACE_STATE* pDstSurface,
UINT x, UINT y) // (x, y) pixel coordinate to start of raster tile.
{
// Compute destination address for raster tile.
- BYTE* pDstTile = (BYTE*)pDstSurface->pBaseAddress +
+ uint8_t* pDstTile = (uint8_t*)pDstSurface->pBaseAddress +
(y * pDstSurface->pitch) + (x * dstBytesPerPixel);
// start of first row
- BYTE* pDst = pDstTile;
+ uint8_t* pDst = pDstTile;
UINT dstBytesPerRow = 0;
// For each raster tile pixel in row 0 (rx, 0)
{
UINT dstBytesPerPixel = (FormatTraits<DstFormat>::bpp / 8);
- BYTE dstFormattedColor[16]; // max bpp is 128, so 16 is all we need here for one pixel
+ uint8_t dstFormattedColor[16]; // max bpp is 128, so 16 is all we need here for one pixel
float srcColor[4];
/// @param srcPixel - Pointer to source pixel (pre-swizzled according to dest).
template<SWR_FORMAT DstFormat>
static void ConvertPixelFromFloat(
- BYTE* pDstPixel,
+ uint8_t* pDstPixel,
const float srcPixel[4])
{
uint32_t outColor[4] = { 0 }; // typeless bits
template<SWR_FORMAT SrcFormat>
INLINE static void ConvertPixelToFloat(
float dstPixel[4],
- const BYTE* pSrc)
+ const uint8_t* pSrc)
{
uint32_t srcColor[4]; // typeless bits