src/gallium/drivers/swr/rasterizer/core/fifo.hpp

   1 /****************************************************************************
   2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * @file fifo.hpp
  24 *
  25 * @brief Definitions for our fifos used for thread communication.
  26 *
  27 ******************************************************************************/
  28 #pragma once
  29
  30
  31 #include "common/os.h"
  32 #include "arena.h"
  33
  34 #include <vector>
  35 #include <cassert>
  36
  37 template<class T>
  38 struct QUEUE
  39 {
  40     OSALIGNLINE(volatile uint32_t) mLock{ 0 };
  41     OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
  42     std::vector<T*> mBlocks;
  43     T* mCurBlock{ nullptr };
  44     uint32_t mHead{ 0 };
  45     uint32_t mTail{ 0 };
  46     uint32_t mCurBlockIdx{ 0 };
  47
  48     // power of 2
  49     static const uint32_t mBlockSizeShift = 6;
  50     static const uint32_t mBlockSize = 1 << mBlockSizeShift;
  51
  52     template <typename ArenaT>
  53     void clear(ArenaT& arena)
  54     {
  55         mHead = 0;
  56         mTail = 0;
  57         mBlocks.clear();
  58         T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
  59         mBlocks.push_back(pNewBlock);
  60         mCurBlock = pNewBlock;
  61         mCurBlockIdx = 0;
  62         mNumEntries = 0;
  63         mLock = 0;
  64     }
  65
  66     uint32_t getNumQueued()
  67     {
  68         return mNumEntries;
  69     }
  70
  71     bool tryLock()
  72     {
  73         if (mLock)
  74         {
  75             return false;
  76         }
  77
  78         // try to lock the FIFO
  79         LONG initial = InterlockedCompareExchange(&mLock, 1, 0);
  80         return (initial == 0);
  81     }
  82
  83     void unlock()
  84     {
  85         mLock = 0;
  86     }
  87
  88     T* peek()
  89     {
  90         if (mNumEntries == 0)
  91         {
  92             return nullptr;
  93         }
  94         uint32_t block = mHead >> mBlockSizeShift;
  95         return &mBlocks[block][mHead & (mBlockSize-1)];
  96     }
  97
  98     void dequeue_noinc()
  99     {
 100         mHead ++;
 101         mNumEntries --;
 102     }
 103
 104     template <typename ArenaT>
 105     bool enqueue_try_nosync(ArenaT& arena, const T* entry)
 106     {
 107         const float* pSrc = (const float*)entry;
 108         float* pDst = (float*)&mCurBlock[mTail];
 109
 110         auto lambda = [&](int32_t i)
 111         {
 112             __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
 113             _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
 114         };
 115
 116         const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
 117         static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
 118             "FIFO element size should be multiple of SIMD width.");
 119
 120         UnrollerL<0, numSimdLines, 1>::step(lambda);
 121
 122         mTail ++;
 123         if (mTail == mBlockSize)
 124         {
 125             if (++mCurBlockIdx < mBlocks.size())
 126             {
 127                 mCurBlock = mBlocks[mCurBlockIdx];
 128             }
 129             else
 130             {
 131                 T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
 132                 SWR_ASSERT(newBlock);
 133
 134                 mBlocks.push_back(newBlock);
 135                 mCurBlock = newBlock;
 136             }
 137
 138             mTail = 0;
 139         }
 140
 141         mNumEntries ++;
 142         return true;
 143     }
 144
 145     void destroy()
 146     {
 147     }
 148
 149 };