swr: [rasterizer core] only use Viewport/Scissors during SwrDraw* operations
authorTim Rowley <timothy.o.rowley@intel.com>
Wed, 17 Aug 2016 19:30:32 +0000 (14:30 -0500)
committerTim Rowley <timothy.o.rowley@intel.com>
Wed, 17 Aug 2016 22:08:55 +0000 (17:08 -0500)
Add explicit rects for:

- SwrClearRenderTarget
- SwrDiscardRect
- SwrInvalidateTiles
- SwrStoreTiles

Signed-off-by: Tim Rowley <timothy.o.rowley@intel.com>
12 files changed:
src/gallium/drivers/swr/rasterizer/core/api.cpp
src/gallium/drivers/swr/rasterizer/core/api.h
src/gallium/drivers/swr/rasterizer/core/backend.cpp
src/gallium/drivers/swr/rasterizer/core/context.h
src/gallium/drivers/swr/rasterizer/core/frontend.cpp
src/gallium/drivers/swr/rasterizer/core/frontend.h
src/gallium/drivers/swr/rasterizer/core/rasterizer.cpp
src/gallium/drivers/swr/rasterizer/core/utils.h
src/gallium/drivers/swr/swr_clear.cpp
src/gallium/drivers/swr/swr_context.h
src/gallium/drivers/swr/swr_draw.cpp
src/gallium/drivers/swr/swr_state.cpp

index e447bf6fbe3308fcaa00e13def6c619fb04cb983..d53a6cbeddaae032e9eda7c34a03c6b0ba514708 100644 (file)
@@ -46,6 +46,8 @@
 #include "common/simdintrin.h"
 #include "common/os.h"
 
+static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y };
+
 void SetupDefaultState(SWR_CONTEXT *pContext);
 
 static INLINE SWR_CONTEXT* GetContext(HANDLE hContext)
@@ -713,56 +715,46 @@ void SwrSetViewports(
 void SwrSetScissorRects(
     HANDLE hContext,
     uint32_t numScissors,
-    const BBOX* pScissors)
+    const SWR_RECT* pScissors)
 {
     SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS,
         "Invalid number of scissor rects.");
 
     API_STATE* pState = GetDrawState(GetContext(hContext));
-    memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(BBOX));
+    memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0]));
 };
 
 void SetupMacroTileScissors(DRAW_CONTEXT *pDC)
 {
     API_STATE *pState = &pDC->pState->state;
-    uint32_t left, right, top, bottom;
 
     // Set up scissor dimensions based on scissor or viewport
     if (pState->rastState.scissorEnable)
     {
-        // scissor rect right/bottom edge are exclusive, core expects scissor dimensions to be inclusive, so subtract one pixel from right/bottom edges
-        left = pState->scissorRects[0].left;
-        right = pState->scissorRects[0].right;
-        top = pState->scissorRects[0].top;
-        bottom = pState->scissorRects[0].bottom;
+        pState->scissorInFixedPoint = pState->scissorRects[0];
     }
     else
     {
         // the vp width and height must be added to origin un-rounded then the result round to -inf.
         // The cast to int works for rounding assuming all [left, right, top, bottom] are positive.
-        left = (int32_t)pState->vp[0].x;
-        right = (int32_t)(pState->vp[0].x + pState->vp[0].width);
-        top = (int32_t)pState->vp[0].y;
-        bottom = (int32_t)(pState->vp[0].y + pState->vp[0].height);
+        pState->scissorInFixedPoint.xmin = (int32_t)pState->vp[0].x;
+        pState->scissorInFixedPoint.xmax = (int32_t)(pState->vp[0].x + pState->vp[0].width);
+        pState->scissorInFixedPoint.ymin = (int32_t)pState->vp[0].y;
+        pState->scissorInFixedPoint.ymax = (int32_t)(pState->vp[0].y + pState->vp[0].height);
     }
 
-    right = std::min<uint32_t>(right, KNOB_MAX_SCISSOR_X);
-    bottom = std::min<uint32_t>(bottom, KNOB_MAX_SCISSOR_Y);
+    // Clamp to max rect
+    pState->scissorInFixedPoint &= g_MaxScissorRect;
 
-    if (left > KNOB_MAX_SCISSOR_X || top > KNOB_MAX_SCISSOR_Y)
-    {
-        pState->scissorInFixedPoint.left = 0;
-        pState->scissorInFixedPoint.right = 0;
-        pState->scissorInFixedPoint.top = 0;
-        pState->scissorInFixedPoint.bottom = 0;
-    }
-    else
-    {
-        pState->scissorInFixedPoint.left = left * FIXED_POINT_SCALE;
-        pState->scissorInFixedPoint.right = right * FIXED_POINT_SCALE - 1;
-        pState->scissorInFixedPoint.top = top * FIXED_POINT_SCALE;
-        pState->scissorInFixedPoint.bottom = bottom * FIXED_POINT_SCALE - 1;
-    }
+    // Scale to fixed point
+    pState->scissorInFixedPoint.xmin *= FIXED_POINT_SCALE;
+    pState->scissorInFixedPoint.xmax *= FIXED_POINT_SCALE;
+    pState->scissorInFixedPoint.ymin *= FIXED_POINT_SCALE;
+    pState->scissorInFixedPoint.ymax *= FIXED_POINT_SCALE;
+
+    // Make scissor inclusive
+    pState->scissorInFixedPoint.xmax -= 1;
+    pState->scissorInFixedPoint.ymax -= 1;
 }
 
 // templated backend function tables
@@ -1303,9 +1295,12 @@ void SwrDrawIndexedInstanced(
 /// @brief SwrInvalidateTiles
 /// @param hContext - Handle passed back from SwrCreateContext
 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
-void SwrInvalidateTiles(
+/// @param invalidateRect - The pixel-coordinate rectangle to invalidate.  This will be expanded to
+///                         be hottile size-aligned.
+void SWR_API SwrInvalidateTiles(
     HANDLE hContext,
-    uint32_t attachmentMask)
+    uint32_t attachmentMask,
+    const SWR_RECT& invalidateRect)
 {
     if (KNOB_TOSS_DRAW)
     {
@@ -1318,7 +1313,8 @@ void SwrInvalidateTiles(
     pDC->FeWork.type = DISCARDINVALIDATETILES;
     pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
     pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
-    memset(&pDC->FeWork.desc.discardInvalidateTiles.rect, 0, sizeof(SWR_RECT));
+    pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect;
+    pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
     pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID;
     pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false;
     pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false;
@@ -1331,11 +1327,12 @@ void SwrInvalidateTiles(
 /// @brief SwrDiscardRect
 /// @param hContext - Handle passed back from SwrCreateContext
 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
-/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
-void SwrDiscardRect(
+/// @param rect - The pixel-coordinate rectangle to discard.  Only fully-covered hottiles will be
+///               discarded.
+void SWR_API SwrDiscardRect(
     HANDLE hContext,
     uint32_t attachmentMask,
-    SWR_RECT rect)
+    const SWR_RECT& rect)
 {
     if (KNOB_TOSS_DRAW)
     {
@@ -1350,6 +1347,7 @@ void SwrDiscardRect(
     pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles;
     pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask;
     pDC->FeWork.desc.discardInvalidateTiles.rect = rect;
+    pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect;
     pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED;
     pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true;
     pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true;
@@ -1398,10 +1396,11 @@ void SwrDispatch(
 
 // Deswizzles, converts and stores current contents of the hot tiles to surface
 // described by pState
-void SwrStoreTiles(
+void SWR_API SwrStoreTiles(
     HANDLE hContext,
     SWR_RENDERTARGET_ATTACHMENT attachment,
-    SWR_TILE_STATE postStoreTileState)
+    SWR_TILE_STATE postStoreTileState,
+    const SWR_RECT& storeRect)
 {
     if (KNOB_TOSS_DRAW)
     {
@@ -1413,12 +1412,12 @@ void SwrStoreTiles(
     SWR_CONTEXT *pContext = GetContext(hContext);
     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
 
-    SetupMacroTileScissors(pDC);
-
     pDC->FeWork.type = STORETILES;
     pDC->FeWork.pfnWork = ProcessStoreTiles;
     pDC->FeWork.desc.storeTiles.attachment = attachment;
     pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState;
+    pDC->FeWork.desc.storeTiles.rect = storeRect;
+    pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect;
 
     //enqueue
     QueueDraw(pContext);
@@ -1426,12 +1425,21 @@ void SwrStoreTiles(
     RDTSC_STOP(APIStoreTiles, 0, 0);
 }
 
-void SwrClearRenderTarget(
+//////////////////////////////////////////////////////////////////////////
+/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
+/// @param hContext - Handle passed back from SwrCreateContext
+/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE)
+/// @param clearColor - color use for clearing render targets
+/// @param z - depth value use for clearing depth buffer
+/// @param stencil - stencil value used for clearing stencil buffer
+/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
+void SWR_API SwrClearRenderTarget(
     HANDLE hContext,
     uint32_t clearMask,
     const float clearColor[4],
     float z,
-    uint8_t stencil)
+    uint8_t stencil,
+    const SWR_RECT& clearRect)
 {
     if (KNOB_TOSS_DRAW)
     {
@@ -1441,16 +1449,16 @@ void SwrClearRenderTarget(
     RDTSC_START(APIClearRenderTarget);
 
     SWR_CONTEXT *pContext = GetContext(hContext);
-
     DRAW_CONTEXT* pDC = GetDrawContext(pContext);
 
-    SetupMacroTileScissors(pDC);
-
     CLEAR_FLAGS flags;
+    flags.bits = 0;
     flags.mask = clearMask;
 
     pDC->FeWork.type = CLEAR;
     pDC->FeWork.pfnWork = ProcessClear;
+    pDC->FeWork.desc.clear.rect = clearRect;
+    pDC->FeWork.desc.clear.rect &= g_MaxScissorRect;
     pDC->FeWork.desc.clear.flags = flags;
     pDC->FeWork.desc.clear.clearDepth = z;
     pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0];
index ed18fe01010b59bbba9b66c09eff5e594d4fa196..9ca235d821e0f2bdd98e3aaf32d8984a69cbe9d9 100644 (file)
 #include "common/os.h"
 
 #include <assert.h>
-#include <vector>
+#include <algorithm>
 
 #include "common/simdintrin.h"
 #include "common/formats.h"
-#include "core/utils.h"
 #include "core/state.h"
 
-///@todo place all the API functions into the 'swr' namespace.
-
 typedef void(SWR_API *PFN_CALLBACK_FUNC)(uint64_t data, uint64_t data2, uint64_t data3);
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief Rectangle structure
+struct SWR_RECT
+{
+    int32_t xmin; ///< inclusive
+    int32_t ymin; ///< inclusive
+    int32_t xmax; ///< exclusive
+    int32_t ymax; ///< exclusive 
+
+    bool operator == (const SWR_RECT& rhs)
+    {
+        return (this->ymin == rhs.ymin &&
+            this->ymax == rhs.ymax &&
+            this->xmin == rhs.xmin &&
+            this->xmax == rhs.xmax);
+    }
+
+    bool operator != (const SWR_RECT& rhs)
+    {
+        return !(*this == rhs);
+    }
+
+    SWR_RECT& Intersect(const SWR_RECT& other)
+    {
+        this->xmin = std::max(this->xmin, other.xmin);
+        this->ymin = std::max(this->ymin, other.ymin);
+        this->xmax = std::min(this->xmax, other.xmax);
+        this->ymax = std::min(this->ymax, other.ymax);
+
+        if (xmax - xmin < 0 ||
+            ymax - ymin < 0)
+        {
+            // Zero area
+            ymin = ymax = xmin = xmax = 0;
+        }
+
+        return *this;
+    }
+    SWR_RECT& operator &= (const SWR_RECT& other)
+    {
+        return Intersect(other);
+    }
+
+    SWR_RECT& Union(const SWR_RECT& other)
+    {
+        this->xmin = std::min(this->xmin, other.xmin);
+        this->ymin = std::min(this->ymin, other.ymin);
+        this->xmax = std::max(this->xmax, other.xmax);
+        this->ymax = std::max(this->ymax, other.ymax);
+
+        return *this;
+    }
+
+    SWR_RECT& operator |= (const SWR_RECT& other)
+    {
+        return Union(other);
+    }
+
+    void Translate(int32_t x, int32_t y)
+    {
+        xmin += x;
+        ymin += y;
+        xmax += x;
+        ymax += y;
+    }
+};
+
 //////////////////////////////////////////////////////////////////////////
 /// @brief Function signature for load hot tiles
 /// @param hPrivateContext - handle to private data
@@ -105,6 +169,10 @@ typedef void(SWR_API *PFN_UPDATE_STATS)(HANDLE hPrivateContext,
 typedef void(SWR_API *PFN_UPDATE_STATS_FE)(HANDLE hPrivateContext,
     const SWR_STATS_FE* pStats);
 
+//////////////////////////////////////////////////////////////////////////
+/// BucketManager
+/// Forward Declaration (see rdtsc_buckets.h for full definition)
+/////////////////////////////////////////////////////////////////////////
 class BucketManager;
 
 //////////////////////////////////////////////////////////////////////////
@@ -149,17 +217,6 @@ struct SWR_CREATECONTEXT_INFO
     SWR_THREADING_INFO* pThreadInfo;
 };
 
-//////////////////////////////////////////////////////////////////////////
-/// SWR_RECT
-/////////////////////////////////////////////////////////////////////////
-struct SWR_RECT
-{
-    uint32_t left;
-    uint32_t right;
-    uint32_t top;
-    uint32_t bottom;
-};
-
 //////////////////////////////////////////////////////////////////////////
 /// @brief Create SWR Context.
 /// @param pCreateInfo - pointer to creation info.
@@ -445,19 +502,23 @@ void SWR_API SwrDrawIndexedInstanced(
 /// @brief SwrInvalidateTiles
 /// @param hContext - Handle passed back from SwrCreateContext
 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate.
+/// @param invalidateRect - The pixel-coordinate rectangle to invalidate.  This will be expanded to
+///                         be hottile size-aligned.
 void SWR_API SwrInvalidateTiles(
     HANDLE hContext,
-    uint32_t attachmentMask);
+    uint32_t attachmentMask,
+    const SWR_RECT& invalidateRect);
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief SwrDiscardRect
 /// @param hContext - Handle passed back from SwrCreateContext
 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard.
-/// @param rect - if rect is all zeros, the entire attachment surface will be discarded
+/// @param rect - The pixel-coordinate rectangle to discard.  Only fully-covered hottiles will be
+///               discarded.
 void SWR_API SwrDiscardRect(
     HANDLE hContext,
     uint32_t attachmentMask,
-    SWR_RECT rect);
+    const SWR_RECT& rect);
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief SwrDispatch
@@ -483,15 +544,30 @@ enum SWR_TILE_STATE
 void SWR_API SwrStoreTiles(
     HANDLE hContext,
     SWR_RENDERTARGET_ATTACHMENT attachment,
-    SWR_TILE_STATE postStoreTileState);
+    SWR_TILE_STATE postStoreTileState,
+    const SWR_RECT& storeRect);
+
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil
+/// @param hContext - Handle passed back from SwrCreateContext
+/// @param clearMask - combination of SWR_CLEAR_COLOR / SWR_CLEAR_DEPTH / SWR_CLEAR_STENCIL flags (or SWR_CLEAR_NONE)
+/// @param clearColor - color use for clearing render targets
+/// @param z - depth value use for clearing depth buffer
+/// @param stencil - stencil value used for clearing stencil buffer
+/// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers
 void SWR_API SwrClearRenderTarget(
     HANDLE hContext,
     uint32_t clearMask,
     const float clearColor[4],
     float z,
-    uint8_t stencil);
+    uint8_t stencil,
+    const SWR_RECT& clearRect);
 
+//////////////////////////////////////////////////////////////////////////
+/// @brief SwrSetRastyState
+/// @param hContext - Handle passed back from SwrCreateContext
+/// @param pRastState - New SWR_RASTSTATE used for SwrDraw* commands
 void SWR_API SwrSetRastState(
     HANDLE hContext,
     const SWR_RASTSTATE *pRastState);
@@ -516,7 +592,7 @@ void SWR_API SwrSetViewports(
 void SWR_API SwrSetScissorRects(
     HANDLE hContext,
     uint32_t numScissors,
-    const BBOX* pScissors);
+    const SWR_RECT* pScissors);
 
 //////////////////////////////////////////////////////////////////////////
 /// @brief Returns a pointer to the private context state for the current
@@ -555,4 +631,5 @@ void SWR_API SwrEnableStats(
 /// @param hContext - Handle passed back from SwrCreateContext
 void SWR_API SwrEndFrame(
     HANDLE hContext);
+
 #endif//__SWR_API_H__
index dff86b3ee76e85fa1d05851a67ddd443c6af353f..1e4dca2fe25418c5347ceaa2e380162f387af949 100644 (file)
@@ -37,7 +37,7 @@
 
 #include <algorithm>
 
-typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4]);
+typedef void(*PFN_CLEAR_TILES)(DRAW_CONTEXT*, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t, DWORD[4], const SWR_RECT& rect);
 static PFN_CLEAR_TILES sClearTilesTable[NUM_SWR_FORMATS];
 
 //////////////////////////////////////////////////////////////////////////
@@ -88,7 +88,7 @@ void ProcessSyncBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
 template<SWR_FORMAT format>
 void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
 {
-    auto lambda = [&](int comp)
+    auto lambda = [&](int32_t comp)
     {
         FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
         pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
@@ -102,7 +102,7 @@ void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
 }
 
 template<SWR_FORMAT format>
-INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4])
+INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, DWORD clear[4], const SWR_RECT& rect)
 {
     // convert clear color to hottile format
     // clear color is in RGBA float/uint32
@@ -122,32 +122,33 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
 
     uint32_t tileX, tileY;
     MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
-    const API_STATE& state = GetApiState(pDC);
-    
-    int top = KNOB_MACROTILE_Y_DIM_FIXED * tileY;
-    int bottom = top + KNOB_MACROTILE_Y_DIM_FIXED - 1;
-    int left = KNOB_MACROTILE_X_DIM_FIXED * tileX;
-    int right = left + KNOB_MACROTILE_X_DIM_FIXED - 1;
 
-    // intersect with scissor
-    top = std::max(top, state.scissorInFixedPoint.top);
-    left = std::max(left, state.scissorInFixedPoint.left);
-    bottom = std::min(bottom, state.scissorInFixedPoint.bottom);
-    right = std::min(right, state.scissorInFixedPoint.right);
+    // Init to full macrotile
+    SWR_RECT clearTile =
+    {
+        KNOB_MACROTILE_X_DIM * int32_t(tileX),
+        KNOB_MACROTILE_Y_DIM * int32_t(tileY),
+        KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
+        KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
+    };
+
+    // intersect with clear rect
+    clearTile &= rect;
 
     // translate to local hottile origin
-    top -= KNOB_MACROTILE_Y_DIM_FIXED * tileY;
-    bottom -= KNOB_MACROTILE_Y_DIM_FIXED * tileY;
-    left -= KNOB_MACROTILE_X_DIM_FIXED * tileX;
-    right -= KNOB_MACROTILE_X_DIM_FIXED * tileX;
+    clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
+
+    // Make maximums inclusive (needed for convert to raster tiles)
+    clearTile.xmax -= 1;
+    clearTile.ymax -= 1;
 
     // convert to raster tiles
-    top >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
-    bottom >>= (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
-    left >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
-    right >>= (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+    clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
+    clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
+    clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
+    clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
 
-    const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
+    const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
     // compute steps between raster tile samples / raster tiles / macro tile rows
     const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
     const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
@@ -155,16 +156,16 @@ INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, ui
     const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
 
     HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples);
-    uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, left, top)) * numSamples;
+    uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
     uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
 
     // loop over all raster tiles in the current hot tile
-    for (int y = top; y <= bottom; ++y)
+    for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
     {
         uint8_t* pRasterTile = pRasterTileRow;
-        for (int x = left; x <= right; ++x)
+        for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
         {
-            for( int sampleNum = 0; sampleNum < numSamples; sampleNum++)
+            for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
             {
                 ClearRasterTile<format>(pRasterTile, vClear);
                 pRasterTile += rasterTileSampleStep;
@@ -241,7 +242,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
             PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
             SWR_ASSERT(pfnClearTiles != nullptr);
 
-            pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData);
+            pfnClearTiles(pDC, SWR_ATTACHMENT_COLOR0, macroTile, clearData, pClear->rect);
         }
 
         if (pClear->flags.mask & SWR_CLEAR_DEPTH)
@@ -251,7 +252,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
             PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
             SWR_ASSERT(pfnClearTiles != nullptr);
 
-            pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData);
+            pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, clearData, pClear->rect);
         }
 
         if (pClear->flags.mask & SWR_CLEAR_STENCIL)
@@ -261,7 +262,7 @@ void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, vo
             clearData[0] = *(DWORD*)&value;
             PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
 
-            pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData);
+            pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, clearData, pClear->rect);
         }
 
         RDTSC_STOP(BEClear, 0, 0);
@@ -307,13 +308,13 @@ void ProcessStoreTileBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile
             PFN_CLEAR_TILES pfnClearTiles = sClearTilesTable[srcFormat];
             SWR_ASSERT(pfnClearTiles != nullptr);
 
-            pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData);
+            pfnClearTiles(pDC, pDesc->attachment, macroTile, pHotTile->clearData, pDesc->rect);
         }
 
         if (pHotTile->state == HOTTILE_DIRTY || pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
         {
-            int destX = KNOB_MACROTILE_X_DIM * x;
-            int destY = KNOB_MACROTILE_Y_DIM * y;
+            int32_t destX = KNOB_MACROTILE_X_DIM * x;
+            int32_t destY = KNOB_MACROTILE_Y_DIM * y;
 
             pContext->pfnStoreTile(GetPrivateState(pDC), srcFormat,
                 pDesc->attachment, destX, destY, pHotTile->renderTargetArrayIndex, pHotTile->pBuffer);
@@ -334,7 +335,7 @@ void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint3
     DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC *)pData;
     SWR_CONTEXT *pContext = pDC->pContext;
 
-    const int numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
+    const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
 
     for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i)
     {
index 0a85ebe30a594d9cf60b07f97176b46a89dbe1ac..818205300246d8898f2e0e7da73327e14ff742e4 100644 (file)
@@ -109,6 +109,7 @@ union CLEAR_FLAGS
 
 struct CLEAR_DESC
 {
+    SWR_RECT rect;
     CLEAR_FLAGS flags;
     float clearRTColor[4];  // RGBA_32F
     float clearDepth;   // [0..1]
@@ -136,6 +137,7 @@ struct STORE_TILES_DESC
 {
     SWR_RENDERTARGET_ATTACHMENT attachment;
     SWR_TILE_STATE postStoreTileState;
+    SWR_RECT rect;
 };
 
 struct COMPUTE_DESC
@@ -271,8 +273,8 @@ OSALIGNLINE(struct) API_STATE
     SWR_VIEWPORT            vp[KNOB_NUM_VIEWPORTS_SCISSORS];
     SWR_VIEWPORT_MATRICES   vpMatrices;
 
-    BBOX                    scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
-    BBOX                    scissorInFixedPoint;
+    SWR_RECT                scissorRects[KNOB_NUM_VIEWPORTS_SCISSORS];
+    SWR_RECT                scissorInFixedPoint;
 
     // Backend state
     SWR_BACKEND_STATE       backendState;
@@ -494,8 +496,5 @@ struct SWR_CONTEXT
     TileSet singleThreadLockedTiles;
 };
 
-void WaitForDependencies(SWR_CONTEXT *pContext, uint64_t drawId);
-void WakeAllThreads(SWR_CONTEXT *pContext);
-
 #define UPDATE_STAT(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.stats[workerId].name += count; }
 #define UPDATE_STAT_FE(name, count) if (GetApiState(pDC).enableStats) { pDC->dynState.statsFE.name += count; }
index 2809502ee34a438ddb95a91d606138512ab504d4..04c62adbc5a2afd2eafaa4e1f06282749c66d2f0 100644 (file)
@@ -93,26 +93,24 @@ void ProcessClear(
     uint32_t workerId,
     void *pUserData)
 {
-    CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
+    CLEAR_DESC *pDesc = (CLEAR_DESC*)pUserData;
     MacroTileMgr *pTileMgr = pDC->pTileMgr;
 
-    const API_STATE& state = GetApiState(pDC);
-
     // queue a clear to each macro tile
-    // compute macro tile bounds for the current scissor/viewport
-    uint32_t macroTileLeft = state.scissorInFixedPoint.left / KNOB_MACROTILE_X_DIM_FIXED;
-    uint32_t macroTileRight = state.scissorInFixedPoint.right / KNOB_MACROTILE_X_DIM_FIXED;
-    uint32_t macroTileTop = state.scissorInFixedPoint.top / KNOB_MACROTILE_Y_DIM_FIXED;
-    uint32_t macroTileBottom = state.scissorInFixedPoint.bottom / KNOB_MACROTILE_Y_DIM_FIXED;
+    // compute macro tile bounds for the specified rect
+    uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
+    uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
+    uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
+    uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
 
     BE_WORK work;
     work.type = CLEAR;
     work.pfnWork = ProcessClearBE;
-    work.desc.clear = *pClear;
+    work.desc.clear = *pDesc;
 
-    for (uint32_t y = macroTileTop; y <= macroTileBottom; ++y)
+    for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
     {
-        for (uint32_t x = macroTileLeft; x <= macroTileRight; ++x)
+        for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
         {
             pTileMgr->enqueue(x, y, &work);
         }
@@ -133,28 +131,25 @@ void ProcessStoreTiles(
     void *pUserData)
 {
     RDTSC_START(FEProcessStoreTiles);
-    STORE_TILES_DESC *pStore = (STORE_TILES_DESC*)pUserData;
     MacroTileMgr *pTileMgr = pDC->pTileMgr;
-
-    const API_STATE& state = GetApiState(pDC);
+    STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pUserData;
 
     // queue a store to each macro tile
-    // compute macro tile bounds for the current render target
-    const uint32_t macroWidth = KNOB_MACROTILE_X_DIM;
-    const uint32_t macroHeight = KNOB_MACROTILE_Y_DIM;
-
-    uint32_t numMacroTilesX = ((uint32_t)state.vp[0].width + (uint32_t)state.vp[0].x + (macroWidth - 1)) / macroWidth;
-    uint32_t numMacroTilesY = ((uint32_t)state.vp[0].height + (uint32_t)state.vp[0].y + (macroHeight - 1)) / macroHeight;
+    // compute macro tile bounds for the specified rect
+    uint32_t macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
+    uint32_t macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
+    uint32_t macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
+    uint32_t macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
 
     // store tiles
     BE_WORK work;
     work.type = STORETILES;
     work.pfnWork = ProcessStoreTileBE;
-    work.desc.storeTiles = *pStore;
+    work.desc.storeTiles = *pDesc;
 
-    for (uint32_t x = 0; x < numMacroTilesX; ++x)
+    for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
     {
-        for (uint32_t y = 0; y < numMacroTilesY; ++y)
+        for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
         {
             pTileMgr->enqueue(x, y, &work);
         }
@@ -177,64 +172,39 @@ void ProcessDiscardInvalidateTiles(
     void *pUserData)
 {
     RDTSC_START(FEProcessInvalidateTiles);
-    DISCARD_INVALIDATE_TILES_DESC *pInv = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
+    DISCARD_INVALIDATE_TILES_DESC *pDesc = (DISCARD_INVALIDATE_TILES_DESC*)pUserData;
     MacroTileMgr *pTileMgr = pDC->pTileMgr;
 
-    SWR_RECT rect;
+    // compute macro tile bounds for the specified rect
+    uint32_t macroTileXMin = (pDesc->rect.xmin + KNOB_MACROTILE_X_DIM - 1) / KNOB_MACROTILE_X_DIM;
+    uint32_t macroTileXMax = (pDesc->rect.xmax / KNOB_MACROTILE_X_DIM) - 1;
+    uint32_t macroTileYMin = (pDesc->rect.ymin + KNOB_MACROTILE_Y_DIM - 1) / KNOB_MACROTILE_Y_DIM;
+    uint32_t macroTileYMax = (pDesc->rect.ymax / KNOB_MACROTILE_Y_DIM) - 1;
 
-    if (pInv->rect.top | pInv->rect.bottom | pInv->rect.right | pInv->rect.left)
-    {
-        // Valid rect
-        rect = pInv->rect;
-    }
-    else
-    {
-        // Use viewport dimensions
-        const API_STATE& state = GetApiState(pDC);
-
-        rect.left   = (uint32_t)state.vp[0].x;
-        rect.right  = (uint32_t)(state.vp[0].x + state.vp[0].width);
-        rect.top    = (uint32_t)state.vp[0].y;
-        rect.bottom = (uint32_t)(state.vp[0].y + state.vp[0].height);
-    }
-
-    // queue a store to each macro tile
-    // compute macro tile bounds for the current render target
-    uint32_t macroWidth = KNOB_MACROTILE_X_DIM;
-    uint32_t macroHeight = KNOB_MACROTILE_Y_DIM;
-
-    // Setup region assuming full tiles
-    uint32_t macroTileStartX = (rect.left + (macroWidth - 1)) / macroWidth;
-    uint32_t macroTileStartY = (rect.top + (macroHeight - 1)) / macroHeight;
-
-    uint32_t macroTileEndX = rect.right / macroWidth;
-    uint32_t macroTileEndY = rect.bottom / macroHeight;
-
-    if (pInv->fullTilesOnly == false)
+    if (pDesc->fullTilesOnly == false)
     {
         // include partial tiles
-        macroTileStartX = rect.left / macroWidth;
-        macroTileStartY = rect.top / macroHeight;
-
-        macroTileEndX = (rect.right + macroWidth - 1) / macroWidth;
-        macroTileEndY = (rect.bottom + macroHeight - 1) / macroHeight;
+        macroTileXMin = pDesc->rect.xmin / KNOB_MACROTILE_X_DIM;
+        macroTileXMax = (pDesc->rect.xmax - 1) / KNOB_MACROTILE_X_DIM;
+        macroTileYMin = pDesc->rect.ymin / KNOB_MACROTILE_Y_DIM;
+        macroTileYMax = (pDesc->rect.ymax - 1) / KNOB_MACROTILE_Y_DIM;
     }
 
-    SWR_ASSERT(macroTileEndX <= KNOB_NUM_HOT_TILES_X);
-    SWR_ASSERT(macroTileEndY <= KNOB_NUM_HOT_TILES_Y);
+    SWR_ASSERT(macroTileXMax <= KNOB_NUM_HOT_TILES_X);
+    SWR_ASSERT(macroTileYMax <= KNOB_NUM_HOT_TILES_Y);
 
-    macroTileEndX = std::min<uint32_t>(macroTileEndX, KNOB_NUM_HOT_TILES_X);
-    macroTileEndY = std::min<uint32_t>(macroTileEndY, KNOB_NUM_HOT_TILES_Y);
+    macroTileXMax = std::min<int32_t>(macroTileXMax, KNOB_NUM_HOT_TILES_X);
+    macroTileYMax = std::min<int32_t>(macroTileYMax, KNOB_NUM_HOT_TILES_Y);
 
     // load tiles
     BE_WORK work;
     work.type = DISCARDINVALIDATETILES;
     work.pfnWork = ProcessDiscardInvalidateTilesBE;
-    work.desc.discardInvalidateTiles = *pInv;
+    work.desc.discardInvalidateTiles = *pDesc;
 
-    for (uint32_t x = macroTileStartX; x < macroTileEndX; ++x)
+    for (uint32_t x = macroTileXMin; x <= macroTileXMax; ++x)
     {
-        for (uint32_t y = macroTileStartY; y < macroTileEndY; ++y)
+        for (uint32_t y = macroTileYMin; y <= macroTileYMax; ++y)
         {
             pTileMgr->enqueue(x, y, &work);
         }
@@ -587,7 +557,7 @@ static void StreamOut(
 //////////////////////////////////////////////////////////////////////////
 /// @brief Computes number of invocations. The current index represents
 ///        the start of the SIMD. The max index represents how much work
-///        items are remaining. If there is less then a SIMD's left of work
+///        items are remaining. If there is less then a SIMD's xmin of work
 ///        then return the remaining amount of work.
 /// @param curIndex - The start index for the SIMD.
 /// @param maxIndex - The last index for all work items.
@@ -1694,10 +1664,10 @@ INLINE void calcBoundingBoxIntVertical(const simdvector * const tri, simdscalari
     vMaxY = _simd_max_epi32(vMaxY, vY[1]);
     vMaxY = _simd_max_epi32(vMaxY, vY[2]);
 
-    bbox.left = vMinX;
-    bbox.right = vMaxX;
-    bbox.top = vMinY;
-    bbox.bottom = vMaxY;
+    bbox.xmin = vMinX;
+    bbox.xmax = vMaxX;
+    bbox.ymin = vMinY;
+    bbox.ymax = vMaxY;
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1727,10 +1697,10 @@ INLINE void calcBoundingBoxIntVertical<FEConservativeRastT>(const simdvector * c
     
     /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
     /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
-    bbox.left = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
-    bbox.right = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
-    bbox.top = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
-    bbox.bottom = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+    bbox.xmin = _simd_sub_epi32(vMinX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+    bbox.xmax = _simd_add_epi32(vMaxX, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+    bbox.ymin = _simd_sub_epi32(vMinY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
+    bbox.ymax = _simd_add_epi32(vMaxY, _simd_set1_epi32(CT::BoundingBoxOffsetT::value));
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -1940,27 +1910,27 @@ void BinTriangles(
 
     // determine if triangle falls between pixel centers and discard
     // only discard for non-MSAA case and when conservative rast is disabled
-    // (left + 127) & ~255
-    // (right + 128) & ~255
+    // (xmin + 127) & ~255
+    // (xmax + 128) & ~255
     if(rastState.sampleCount == SWR_MULTISAMPLE_1X && (!CT::IsConservativeT::value))
     {
         origTriMask = triMask;
 
         int cullCenterMask;
         {
-            simdscalari left = _simd_add_epi32(bbox.left, _simd_set1_epi32(127));
-            left = _simd_and_si(left, _simd_set1_epi32(~255));
-            simdscalari right = _simd_add_epi32(bbox.right, _simd_set1_epi32(128));
-            right = _simd_and_si(right, _simd_set1_epi32(~255));
+            simdscalari xmin = _simd_add_epi32(bbox.xmin, _simd_set1_epi32(127));
+            xmin = _simd_and_si(xmin, _simd_set1_epi32(~255));
+            simdscalari xmax = _simd_add_epi32(bbox.xmax, _simd_set1_epi32(128));
+            xmax = _simd_and_si(xmax, _simd_set1_epi32(~255));
 
-            simdscalari vMaskH = _simd_cmpeq_epi32(left, right);
+            simdscalari vMaskH = _simd_cmpeq_epi32(xmin, xmax);
 
-            simdscalari top = _simd_add_epi32(bbox.top, _simd_set1_epi32(127));
-            top = _simd_and_si(top, _simd_set1_epi32(~255));
-            simdscalari bottom = _simd_add_epi32(bbox.bottom, _simd_set1_epi32(128));
-            bottom = _simd_and_si(bottom, _simd_set1_epi32(~255));
+            simdscalari ymin = _simd_add_epi32(bbox.ymin, _simd_set1_epi32(127));
+            ymin = _simd_and_si(ymin, _simd_set1_epi32(~255));
+            simdscalari ymax = _simd_add_epi32(bbox.ymax, _simd_set1_epi32(128));
+            ymax = _simd_and_si(ymax, _simd_set1_epi32(~255));
 
-            simdscalari vMaskV = _simd_cmpeq_epi32(top, bottom);
+            simdscalari vMaskV = _simd_cmpeq_epi32(ymin, ymax);
             vMaskV = _simd_or_si(vMaskH, vMaskV);
             cullCenterMask = _simd_movemask_ps(_simd_castsi_ps(vMaskV));
         }
@@ -1973,26 +1943,26 @@ void BinTriangles(
         }
     }
 
-    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
-    bbox.left   = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
-    bbox.top    = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
-    bbox.right  = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
-    bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
+    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+    bbox.xmin   = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
+    bbox.ymin    = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
+    bbox.xmax  = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
+    bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
 
     if(CT::IsConservativeT::value)
     {
         // in the case where a degenerate triangle is on a scissor edge, we need to make sure the primitive bbox has
-        // some area. Bump the right/bottom edges out 
-        simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.top, bbox.bottom);
-        bbox.bottom = _simd_blendv_epi32(bbox.bottom, _simd_add_epi32(bbox.bottom, _simd_set1_epi32(1)), topEqualsBottom);
-        simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.left, bbox.right);
-        bbox.right = _simd_blendv_epi32(bbox.right, _simd_add_epi32(bbox.right, _simd_set1_epi32(1)), leftEqualsRight);
+        // some area. Bump the xmax/ymax edges out 
+        simdscalari topEqualsBottom = _simd_cmpeq_epi32(bbox.ymin, bbox.ymax);
+        bbox.ymax = _simd_blendv_epi32(bbox.ymax, _simd_add_epi32(bbox.ymax, _simd_set1_epi32(1)), topEqualsBottom);
+        simdscalari leftEqualsRight = _simd_cmpeq_epi32(bbox.xmin, bbox.xmax);
+        bbox.xmax = _simd_blendv_epi32(bbox.xmax, _simd_add_epi32(bbox.xmax, _simd_set1_epi32(1)), leftEqualsRight);
     }
 
     // Cull tris completely outside scissor
     {
-        simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
-        simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
+        simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
+        simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
         simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
         uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
         triMask = triMask & ~maskOutsideScissor;
@@ -2004,16 +1974,16 @@ void BinTriangles(
     }
 
     // Convert triangle bbox to macrotile units.
-    bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
-    bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
-    bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
-    bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+    bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+    bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+    bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+    bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
 
     OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
-    _simd_store_si((simdscalari*)aMTLeft, bbox.left);
-    _simd_store_si((simdscalari*)aMTRight, bbox.right);
-    _simd_store_si((simdscalari*)aMTTop, bbox.top);
-    _simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
+    _simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
+    _simd_store_si((simdscalari*)aMTRight, bbox.xmax);
+    _simd_store_si((simdscalari*)aMTTop, bbox.ymin);
+    _simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
 
     // transpose verts needed for backend
     /// @todo modify BE to take non-transformed verts
@@ -2196,11 +2166,11 @@ void BinPoints(
 
     if (CanUseSimplePoints(pDC))
     {
-        // adjust for top-left rule
+        // adjust for ymin-xmin rule
         vXi = _simd_sub_epi32(vXi, _simd_set1_epi32(1));
         vYi = _simd_sub_epi32(vYi, _simd_set1_epi32(1));
 
-        // cull points off the top-left edge of the viewport
+        // cull points off the ymin-xmin edge of the viewport
         primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vXi));
         primMask &= ~_simd_movemask_ps(_simd_castsi_ps(vYi));
 
@@ -2325,40 +2295,40 @@ void BinPoints(
 
         // bloat point to bbox
         simdBBox bbox;
-        bbox.left = bbox.right = vXi;
-        bbox.top = bbox.bottom = vYi;
+        bbox.xmin = bbox.xmax = vXi;
+        bbox.ymin = bbox.ymax = vYi;
 
         simdscalar vHalfWidth = _simd_mul_ps(vPointSize, _simd_set1_ps(0.5f));
         simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth);
-        bbox.left = _simd_sub_epi32(bbox.left, vHalfWidthi);
-        bbox.right = _simd_add_epi32(bbox.right, vHalfWidthi);
-        bbox.top = _simd_sub_epi32(bbox.top, vHalfWidthi);
-        bbox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi);
+        bbox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi);
+        bbox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi);
+        bbox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi);
+        bbox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
 
-        // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
-        bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
-        bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
-        bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
-        bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
+        // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+        bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
+        bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
+        bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
+        bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
 
         // Cull bloated points completely outside scissor
-        simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
-        simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
+        simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
+        simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
         simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
         uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
         primMask = primMask & ~maskOutsideScissor;
 
         // Convert bbox to macrotile units.
-        bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
-        bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
-        bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
-        bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+        bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+        bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+        bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+        bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
 
         OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
-        _simd_store_si((simdscalari*)aMTLeft, bbox.left);
-        _simd_store_si((simdscalari*)aMTRight, bbox.right);
-        _simd_store_si((simdscalari*)aMTTop, bbox.top);
-        _simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
+        _simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
+        _simd_store_si((simdscalari*)aMTRight, bbox.xmax);
+        _simd_store_si((simdscalari*)aMTTop, bbox.ymin);
+        _simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
 
         // store render target array index
         OSALIGNSIMD(uint32_t) aRTAI[KNOB_SIMD_WIDTH];
@@ -2543,35 +2513,35 @@ void BinLines(
 
     // Calc bounding box of lines
     simdBBox bbox;
-    bbox.left = _simd_min_epi32(vXi[0], vXi[1]);
-    bbox.right = _simd_max_epi32(vXi[0], vXi[1]);
-    bbox.top = _simd_min_epi32(vYi[0], vYi[1]);
-    bbox.bottom = _simd_max_epi32(vYi[0], vYi[1]);
+    bbox.xmin = _simd_min_epi32(vXi[0], vXi[1]);
+    bbox.xmax = _simd_max_epi32(vXi[0], vXi[1]);
+    bbox.ymin = _simd_min_epi32(vYi[0], vYi[1]);
+    bbox.ymax = _simd_max_epi32(vYi[0], vYi[1]);
 
     // bloat bbox by line width along minor axis
     simdscalar vHalfWidth = _simd_set1_ps(rastState.lineWidth / 2.0f);
     simdscalari vHalfWidthi = fpToFixedPointVertical(vHalfWidth);
     simdBBox bloatBox;
-    bloatBox.left = _simd_sub_epi32(bbox.left, vHalfWidthi);
-    bloatBox.right = _simd_add_epi32(bbox.right, vHalfWidthi);
-    bloatBox.top = _simd_sub_epi32(bbox.top, vHalfWidthi);
-    bloatBox.bottom = _simd_add_epi32(bbox.bottom, vHalfWidthi);
-
-    bbox.left = _simd_blendv_epi32(bbox.left, bloatBox.left, vYmajorMask);
-    bbox.right = _simd_blendv_epi32(bbox.right, bloatBox.right, vYmajorMask);
-    bbox.top = _simd_blendv_epi32(bloatBox.top, bbox.top, vYmajorMask);
-    bbox.bottom = _simd_blendv_epi32(bloatBox.bottom, bbox.bottom, vYmajorMask);
-
-    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since right/bottom edge is exclusive.
-    bbox.left = _simd_max_epi32(bbox.left, _simd_set1_epi32(state.scissorInFixedPoint.left));
-    bbox.top = _simd_max_epi32(bbox.top, _simd_set1_epi32(state.scissorInFixedPoint.top));
-    bbox.right = _simd_min_epi32(_simd_sub_epi32(bbox.right, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.right));
-    bbox.bottom = _simd_min_epi32(_simd_sub_epi32(bbox.bottom, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.bottom));
+    bloatBox.xmin = _simd_sub_epi32(bbox.xmin, vHalfWidthi);
+    bloatBox.xmax = _simd_add_epi32(bbox.xmax, vHalfWidthi);
+    bloatBox.ymin = _simd_sub_epi32(bbox.ymin, vHalfWidthi);
+    bloatBox.ymax = _simd_add_epi32(bbox.ymax, vHalfWidthi);
+
+    bbox.xmin = _simd_blendv_epi32(bbox.xmin, bloatBox.xmin, vYmajorMask);
+    bbox.xmax = _simd_blendv_epi32(bbox.xmax, bloatBox.xmax, vYmajorMask);
+    bbox.ymin = _simd_blendv_epi32(bloatBox.ymin, bbox.ymin, vYmajorMask);
+    bbox.ymax = _simd_blendv_epi32(bloatBox.ymax, bbox.ymax, vYmajorMask);
+
+    // Intersect with scissor/viewport. Subtract 1 ULP in x.8 fixed point since xmax/ymax edge is exclusive.
+    bbox.xmin = _simd_max_epi32(bbox.xmin, _simd_set1_epi32(state.scissorInFixedPoint.xmin));
+    bbox.ymin = _simd_max_epi32(bbox.ymin, _simd_set1_epi32(state.scissorInFixedPoint.ymin));
+    bbox.xmax = _simd_min_epi32(_simd_sub_epi32(bbox.xmax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.xmax));
+    bbox.ymax = _simd_min_epi32(_simd_sub_epi32(bbox.ymax, _simd_set1_epi32(1)), _simd_set1_epi32(state.scissorInFixedPoint.ymax));
 
     // Cull prims completely outside scissor
     {
-        simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.left, bbox.right);
-        simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.top, bbox.bottom);
+        simdscalari maskOutsideScissorX = _simd_cmpgt_epi32(bbox.xmin, bbox.xmax);
+        simdscalari maskOutsideScissorY = _simd_cmpgt_epi32(bbox.ymin, bbox.ymax);
         simdscalari maskOutsideScissorXY = _simd_or_si(maskOutsideScissorX, maskOutsideScissorY);
         uint32_t maskOutsideScissor = _simd_movemask_ps(_simd_castsi_ps(maskOutsideScissorXY));
         primMask = primMask & ~maskOutsideScissor;
@@ -2583,16 +2553,16 @@ void BinLines(
     }
 
     // Convert triangle bbox to macrotile units.
-    bbox.left = _simd_srai_epi32(bbox.left, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
-    bbox.top = _simd_srai_epi32(bbox.top, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
-    bbox.right = _simd_srai_epi32(bbox.right, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
-    bbox.bottom = _simd_srai_epi32(bbox.bottom, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+    bbox.xmin = _simd_srai_epi32(bbox.xmin, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+    bbox.ymin = _simd_srai_epi32(bbox.ymin, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
+    bbox.xmax = _simd_srai_epi32(bbox.xmax, KNOB_MACROTILE_X_DIM_FIXED_SHIFT);
+    bbox.ymax = _simd_srai_epi32(bbox.ymax, KNOB_MACROTILE_Y_DIM_FIXED_SHIFT);
 
     OSALIGNSIMD(uint32_t) aMTLeft[KNOB_SIMD_WIDTH], aMTRight[KNOB_SIMD_WIDTH], aMTTop[KNOB_SIMD_WIDTH], aMTBottom[KNOB_SIMD_WIDTH];
-    _simd_store_si((simdscalari*)aMTLeft, bbox.left);
-    _simd_store_si((simdscalari*)aMTRight, bbox.right);
-    _simd_store_si((simdscalari*)aMTTop, bbox.top);
-    _simd_store_si((simdscalari*)aMTBottom, bbox.bottom);
+    _simd_store_si((simdscalari*)aMTLeft, bbox.xmin);
+    _simd_store_si((simdscalari*)aMTRight, bbox.xmax);
+    _simd_store_si((simdscalari*)aMTTop, bbox.ymin);
+    _simd_store_si((simdscalari*)aMTBottom, bbox.ymax);
 
     // transpose verts needed for backend
     /// @todo modify BE to take non-transformed verts
index 367d1998b91a49fadc8a60b4a127ab44b8590eda..6316156bfd0725c680c4536881ca5fd911783885 100644 (file)
@@ -240,7 +240,7 @@ void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices,
 }
 
 INLINE
-void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
+void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
 {
     // Need horizontal fp min here
     __m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
@@ -262,10 +262,10 @@ void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, BBOX &bbox)
     __m128i vMaxY = _mm_max_epi32(vY, vY1);
             vMaxY = _mm_max_epi32(vMaxY, vY2);
 
-    bbox.left = _mm_extract_epi32(vMinX, 0);
-    bbox.right = _mm_extract_epi32(vMaxX, 0);
-    bbox.top = _mm_extract_epi32(vMinY, 0);
-    bbox.bottom = _mm_extract_epi32(vMaxY, 0);
+    bbox.xmin = _mm_extract_epi32(vMinX, 0);
+    bbox.xmax = _mm_extract_epi32(vMaxX, 0);
+    bbox.ymin = _mm_extract_epi32(vMinY, 0);
+    bbox.ymax = _mm_extract_epi32(vMaxY, 0);
 }
 
 INLINE
index 768b58a1d97eddbf4ea6410bb43d60a7072bfe9a..9a8d062818d65c8e83878a44ae1e88e25221fd62 100644 (file)
@@ -591,7 +591,7 @@ INLINE void UpdateEdgeMasks<SingleSampleT>(const __m256d(&)[3], const __m256d* v
 template <typename RasterScissorEdgesT, typename IsConservativeT, typename RT>
 struct ComputeScissorEdges
 {
-    INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y, 
+    INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y, 
                               EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7]){};
 };
 
@@ -604,20 +604,20 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
     //////////////////////////////////////////////////////////////////////////
     /// @brief Intersect tri bbox with scissor, compute scissor edge vectors, 
     /// evaluate edge equations and offset them away from pixel center.
-    INLINE ComputeScissorEdges(const BBOX &triBBox, const BBOX &scissorBBox, const int32_t x, const int32_t y,
+    INLINE ComputeScissorEdges(const SWR_RECT &triBBox, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
                               EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
     {
         // if conservative rasterizing, triangle bbox intersected with scissor bbox is used
-        BBOX scissor;
-        scissor.left = std::max(triBBox.left, scissorBBox.left);
-        scissor.right = std::min(triBBox.right, scissorBBox.right);
-        scissor.top = std::max(triBBox.top, scissorBBox.top);
-        scissor.bottom = std::min(triBBox.bottom, scissorBBox.bottom);
+        SWR_RECT scissor;
+        scissor.xmin = std::max(triBBox.xmin, scissorBBox.xmin);
+        scissor.xmax = std::min(triBBox.xmax, scissorBBox.xmax);
+        scissor.ymin = std::max(triBBox.ymin, scissorBBox.ymin);
+        scissor.ymax = std::min(triBBox.ymax, scissorBBox.ymax);
 
-        POS topLeft{scissor.left, scissor.top};
-        POS bottomLeft{scissor.left, scissor.bottom};
-        POS topRight{scissor.right, scissor.top};
-        POS bottomRight{scissor.right, scissor.bottom};
+        POS topLeft{scissor.xmin, scissor.ymin};
+        POS bottomLeft{scissor.xmin, scissor.ymax};
+        POS topRight{scissor.xmax, scissor.ymin};
+        POS bottomRight{scissor.xmax, scissor.ymax};
 
         // construct 4 scissor edges in ccw direction
         ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
@@ -625,10 +625,10 @@ struct ComputeScissorEdges<std::true_type, std::true_type, RT>
         ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
         ComputeEdgeData(topRight, topLeft, rastEdges[6]);
 
-        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
-        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
-        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
-        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
+        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
+        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
+        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
+        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
 
         // if conservative rasterizing, need to bump the scissor edges out by the conservative uncertainty distance, else do nothing
         adjustScissorEdge<RT>(rastEdges[3].a, rastEdges[3].b, vEdgeFix16[3]);
@@ -647,14 +647,14 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
 {
     //////////////////////////////////////////////////////////////////////////
     /// @brief Compute scissor edge vectors and evaluate edge equations
-    INLINE ComputeScissorEdges(const BBOX &, const BBOX &scissorBBox, const int32_t x, const int32_t y,
+    INLINE ComputeScissorEdges(const SWR_RECT &, const SWR_RECT &scissorBBox, const int32_t x, const int32_t y,
                               EDGE (&rastEdges)[RT::NumEdgesT::value], __m256d (&vEdgeFix16)[7])
     {
-        const BBOX &scissor = scissorBBox;
-        POS topLeft{scissor.left, scissor.top};
-        POS bottomLeft{scissor.left, scissor.bottom};
-        POS topRight{scissor.right, scissor.top};
-        POS bottomRight{scissor.right, scissor.bottom};
+        const SWR_RECT &scissor = scissorBBox;
+        POS topLeft{scissor.xmin, scissor.ymin};
+        POS bottomLeft{scissor.xmin, scissor.ymax};
+        POS topRight{scissor.xmax, scissor.ymin};
+        POS bottomRight{scissor.xmax, scissor.ymax};
 
         // construct 4 scissor edges in ccw direction
         ComputeEdgeData(topLeft, bottomLeft, rastEdges[3]);
@@ -662,10 +662,10 @@ struct ComputeScissorEdges<std::true_type, std::false_type, RT>
         ComputeEdgeData(bottomRight, topRight, rastEdges[5]);
         ComputeEdgeData(topRight, topLeft, rastEdges[6]);
 
-        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.left)) + (rastEdges[3].b * (y - scissor.top)));
-        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.left)) + (rastEdges[4].b * (y - scissor.bottom)));
-        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.right)) + (rastEdges[5].b * (y - scissor.bottom)));
-        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.right)) + (rastEdges[6].b * (y - scissor.top)));
+        vEdgeFix16[3] = _mm256_set1_pd((rastEdges[3].a * (x - scissor.xmin)) + (rastEdges[3].b * (y - scissor.ymin)));
+        vEdgeFix16[4] = _mm256_set1_pd((rastEdges[4].a * (x - scissor.xmin)) + (rastEdges[4].b * (y - scissor.ymax)));
+        vEdgeFix16[5] = _mm256_set1_pd((rastEdges[5].a * (x - scissor.xmax)) + (rastEdges[5].b * (y - scissor.ymax)));
+        vEdgeFix16[6] = _mm256_set1_pd((rastEdges[6].a * (x - scissor.xmax)) + (rastEdges[6].b * (y - scissor.ymin)));
     }
 };
 
@@ -964,23 +964,23 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
     triDesc.Z[2] += ComputeDepthBias(&rastState, &triDesc, workDesc.pTriBuffer + 8);
 
     // Calc bounding box of triangle
-    OSALIGNSIMD(BBOX) bbox;
+    OSALIGNSIMD(SWR_RECT) bbox;
     calcBoundingBoxInt(vXi, vYi, bbox);
 
     if(RT::ValidEdgeMaskT::value != ALL_EDGES_VALID)
     {
         // If we're rasterizing a degenerate triangle, expand bounding box to guarantee the BBox is valid
-        bbox.left--;    bbox.right++;    bbox.top--;    bbox.bottom++;
-        SWR_ASSERT(state.scissorInFixedPoint.left >= 0 && state.scissorInFixedPoint.top >= 0, 
+        bbox.xmin--;    bbox.xmax++;    bbox.ymin--;    bbox.ymax++;
+        SWR_ASSERT(state.scissorInFixedPoint.xmin >= 0 && state.scissorInFixedPoint.ymin >= 0, 
                    "Conservative rast degenerate handling requires a valid scissor rect");
     }
 
     // Intersect with scissor/viewport
-    OSALIGNSIMD(BBOX) intersect;
-    intersect.left = std::max(bbox.left, state.scissorInFixedPoint.left);
-    intersect.right = std::min(bbox.right - 1, state.scissorInFixedPoint.right);
-    intersect.top = std::max(bbox.top, state.scissorInFixedPoint.top);
-    intersect.bottom = std::min(bbox.bottom - 1, state.scissorInFixedPoint.bottom);
+    OSALIGNSIMD(SWR_RECT) intersect;
+    intersect.xmin = std::max(bbox.xmin, state.scissorInFixedPoint.xmin);
+    intersect.xmax = std::min(bbox.xmax - 1, state.scissorInFixedPoint.xmax);
+    intersect.ymin = std::max(bbox.ymin, state.scissorInFixedPoint.ymin);
+    intersect.ymax = std::min(bbox.ymax - 1, state.scissorInFixedPoint.ymax);
 
     triDesc.triFlags = workDesc.triFlags;
 
@@ -992,20 +992,20 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
     int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
     int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
 
-    intersect.left   = std::max(intersect.left, macroBoxLeft);
-    intersect.top    = std::max(intersect.top, macroBoxTop);
-    intersect.right  = std::min(intersect.right, macroBoxRight);
-    intersect.bottom = std::min(intersect.bottom, macroBoxBottom);
+    intersect.xmin = std::max(intersect.xmin, macroBoxLeft);
+    intersect.ymin = std::max(intersect.ymin, macroBoxTop);
+    intersect.xmax = std::min(intersect.xmax, macroBoxRight);
+    intersect.ymax = std::min(intersect.ymax, macroBoxBottom);
 
-    SWR_ASSERT(intersect.left <= intersect.right && intersect.top <= intersect.bottom && intersect.left >= 0 && intersect.right >= 0 && intersect.top >= 0 && intersect.bottom >= 0);
+    SWR_ASSERT(intersect.xmin <= intersect.xmax && intersect.ymin <= intersect.ymax && intersect.xmin >= 0 && intersect.xmax >= 0 && intersect.ymin >= 0 && intersect.ymax >= 0);
 
     RDTSC_STOP(BETriangleSetup, 0, pDC->drawId);
 
     // update triangle desc
-    uint32_t minTileX = intersect.left >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
-    uint32_t minTileY = intersect.top >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
-    uint32_t maxTileX = intersect.right >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
-    uint32_t maxTileY = intersect.bottom >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t minTileX = intersect.xmin >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t minTileY = intersect.ymin >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t maxTileX = intersect.xmax >> (KNOB_TILE_X_DIM_SHIFT + FIXED_POINT_SHIFT);
+    uint32_t maxTileY = intersect.ymax >> (KNOB_TILE_Y_DIM_SHIFT + FIXED_POINT_SHIFT);
     uint32_t numTilesX = maxTileX - minTileX + 1;
     uint32_t numTilesY = maxTileY - minTileY + 1;
 
@@ -1020,8 +1020,8 @@ void RasterizeTriangle(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile,
 
     // Step to pixel center of top-left pixel of the triangle bbox
     // Align intersect bbox (top/left) to raster tile's (top/left).
-    int32_t x = AlignDown(intersect.left, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
-    int32_t y = AlignDown(intersect.top, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
+    int32_t x = AlignDown(intersect.xmin, (FIXED_POINT_SCALE * KNOB_TILE_X_DIM));
+    int32_t y = AlignDown(intersect.ymin, (FIXED_POINT_SCALE * KNOB_TILE_Y_DIM));
 
     // convenience typedef
     typedef typename RT::NumRasterSamplesT NumRasterSamplesT;
@@ -1663,17 +1663,17 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
     // make sure this macrotile intersects the triangle
     __m128i vXai = fpToFixedPoint(vXa);
     __m128i vYai = fpToFixedPoint(vYa);
-    OSALIGNSIMD(BBOX) bboxA;
+    OSALIGNSIMD(SWR_RECT) bboxA;
     calcBoundingBoxInt(vXai, vYai, bboxA);
 
-    if (!(bboxA.left > macroBoxRight ||
-          bboxA.left > state.scissorInFixedPoint.right ||
-          bboxA.right - 1 < macroBoxLeft ||
-          bboxA.right - 1 < state.scissorInFixedPoint.left ||
-          bboxA.top > macroBoxBottom ||
-          bboxA.top > state.scissorInFixedPoint.bottom ||
-          bboxA.bottom - 1 < macroBoxTop ||
-          bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
+    if (!(bboxA.xmin > macroBoxRight ||
+          bboxA.xmin > state.scissorInFixedPoint.xmax ||
+          bboxA.xmax - 1 < macroBoxLeft ||
+          bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
+          bboxA.ymin > macroBoxBottom ||
+          bboxA.ymin > state.scissorInFixedPoint.ymax ||
+          bboxA.ymax - 1 < macroBoxTop ||
+          bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
         // rasterize triangle
         pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
     }
@@ -1739,14 +1739,14 @@ void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, voi
     vYai = fpToFixedPoint(vYa);
     calcBoundingBoxInt(vXai, vYai, bboxA);
 
-    if (!(bboxA.left > macroBoxRight ||
-          bboxA.left > state.scissorInFixedPoint.right ||
-          bboxA.right - 1 < macroBoxLeft ||
-          bboxA.right - 1 < state.scissorInFixedPoint.left ||
-          bboxA.top > macroBoxBottom ||
-          bboxA.top > state.scissorInFixedPoint.bottom ||
-          bboxA.bottom - 1 < macroBoxTop ||
-          bboxA.bottom - 1 < state.scissorInFixedPoint.top)) {
+    if (!(bboxA.xmin > macroBoxRight ||
+          bboxA.xmin > state.scissorInFixedPoint.xmax ||
+          bboxA.xmax - 1 < macroBoxLeft ||
+          bboxA.xmax - 1 < state.scissorInFixedPoint.xmin ||
+          bboxA.ymin > macroBoxBottom ||
+          bboxA.ymin > state.scissorInFixedPoint.ymax ||
+          bboxA.ymax - 1 < macroBoxTop ||
+          bboxA.ymax - 1 < state.scissorInFixedPoint.ymin)) {
         // rasterize triangle
         pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
     }
index 0a9430af630c2df2c7b7bfc560511403700b834f..00878925359c0ac3c2cacc74f865caf7d2d46b29 100644 (file)
@@ -33,6 +33,7 @@
 #include "common/os.h"
 #include "common/simdintrin.h"
 #include "common/swr_assert.h"
+#include "core/api.h"
 
 #if defined(_WIN64) || defined(__x86_64__)
 #define _MM_INSERT_EPI64 _mm_insert_epi64
@@ -74,53 +75,12 @@ INLINE __m128i  _MM_INSERT_EPI64(__m128i a, int64_t b, const int32_t ndx)
 }
 #endif
 
-OSALIGNLINE(struct) BBOX
-{
-    int top{ 0 };
-    int bottom{ 0 };
-    int left{ 0 };
-    int right{ 0 };
-
-    BBOX() {}
-    BBOX(int t, int b, int l, int r) : top(t), bottom(b), left(l), right(r) {}
-
-    bool operator==(const BBOX& rhs)
-    {
-        return (this->top == rhs.top &&
-            this->bottom == rhs.bottom &&
-            this->left == rhs.left &&
-            this->right == rhs.right);
-    }
-
-    bool operator!=(const BBOX& rhs)
-    {
-        return !(*this == rhs);
-    }
-
-    BBOX& Intersect(const BBOX& other)
-    {
-        this->top = std::max(this->top, other.top);
-        this->bottom = std::min(this->bottom, other.bottom);
-        this->left = std::max(this->left, other.left);
-        this->right = std::min(this->right, other.right);
-
-        if (right - left < 0 ||
-            bottom - top < 0)
-        {
-            // Zero area
-            top = bottom = left = right = 0;
-        }
-
-        return *this;
-    }
-};
-
 struct simdBBox
 {
-    simdscalari top;
-    simdscalari bottom;
-    simdscalari left;
-    simdscalari right;
+    simdscalari ymin;
+    simdscalari ymax;
+    simdscalari xmin;
+    simdscalari xmax;
 };
 
 INLINE
index 4b02dd1fb12bf02aef5315b393b8368c446cc008..a65f8f4918dc2164058392fbb43221bf53f97235 100644 (file)
@@ -67,17 +67,9 @@ swr_clear(struct pipe_context *pipe,
    ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness */
 #endif
 
-   /* Reset viewport to full framebuffer width/height before clear, then
-    * restore it  */
-   /* Scissor affects clear, viewport should not */
-   ctx->dirty |= SWR_NEW_VIEWPORT;
-   SWR_VIEWPORT vp = {0};
-   vp.width = ctx->framebuffer.width;
-   vp.height = ctx->framebuffer.height;
-   SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
-
    swr_update_draw_context(ctx);
-   SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil);
+   SwrClearRenderTarget(ctx->swrContext, clearMask, color->f, depth, stencil,
+                        ctx->swr_scissor);
 }
 
 
index b4553fb171bf1046d0d3f027274d08f4d06b7ec5..6854d697db9138217f50807bfc83271edb0a9c55 100644 (file)
@@ -121,6 +121,7 @@ struct swr_context {
    struct pipe_framebuffer_state framebuffer;
    struct pipe_poly_stipple poly_stipple;
    struct pipe_scissor_state scissor;
+   SWR_RECT swr_scissor;
    struct pipe_sampler_view *
       sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
 
index 0f6a8c6d88cfdc850ee726dfc3cf0e039334dacd..a46aef924ab33dc23781872e87c97f4d56cb4fd0 100644 (file)
@@ -258,37 +258,13 @@ swr_store_render_target(struct pipe_context *pipe,
 
    /* Only proceed if there's a valid surface to store to */
    if (renderTarget->pBaseAddress) {
-      /* Set viewport to full renderTarget width/height and disable scissor
-       * before StoreTiles */
-      boolean change_viewport =
-         (ctx->derived.vp.x != 0.0f || ctx->derived.vp.y != 0.0f
-          || ctx->derived.vp.width != renderTarget->width
-          || ctx->derived.vp.height != renderTarget->height);
-      if (change_viewport) {
-         SWR_VIEWPORT vp = {0};
-         vp.width = renderTarget->width;
-         vp.height = renderTarget->height;
-         SwrSetViewports(ctx->swrContext, 1, &vp, NULL);
-      }
-
-      boolean scissor_enable = ctx->derived.rastState.scissorEnable;
-      if (scissor_enable) {
-         ctx->derived.rastState.scissorEnable = FALSE;
-         SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
-      }
-
       swr_update_draw_context(ctx);
+      SWR_RECT full_rect =
+         {0, 0, (int32_t)renderTarget->width, (int32_t)renderTarget->height};
       SwrStoreTiles(ctx->swrContext,
                     (enum SWR_RENDERTARGET_ATTACHMENT)attachment,
-                    post_tile_state);
-
-      /* Restore viewport and scissor enable */
-      if (change_viewport)
-         SwrSetViewports(ctx->swrContext, 1, &ctx->derived.vp, &ctx->derived.vpm);
-      if (scissor_enable) {
-         ctx->derived.rastState.scissorEnable = scissor_enable;
-         SwrSetRastState(ctx->swrContext, &ctx->derived.rastState);
-      }
+                    post_tile_state,
+                    full_rect);
    }
 }
 
index de41ddc704d615b8f5dd26489729e679b2ec8852..4c9a4327e29a85ade2e97ad4951d3803b137007f 100644 (file)
@@ -575,6 +575,10 @@ swr_set_scissor_states(struct pipe_context *pipe,
    struct swr_context *ctx = swr_context(pipe);
 
    ctx->scissor = *scissor;
+   ctx->swr_scissor.xmin = scissor->minx;
+   ctx->swr_scissor.xmax = scissor->maxx;
+   ctx->swr_scissor.ymin = scissor->miny;
+   ctx->swr_scissor.ymax = scissor->maxy;
    ctx->dirty |= SWR_NEW_SCISSOR;
 }
 
@@ -930,10 +934,7 @@ swr_update_derived(struct pipe_context *pipe,
 
    /* Scissor */
    if (ctx->dirty & SWR_NEW_SCISSOR) {
-      pipe_scissor_state *scissor = &ctx->scissor;
-      BBOX bbox(scissor->miny, scissor->maxy,
-                scissor->minx, scissor->maxx);
-      SwrSetScissorRects(ctx->swrContext, 1, &bbox);
+      SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
    }
 
    /* Viewport */