1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Definitions for our fifos used for thread communication.
27 ******************************************************************************/
31 #include "common/os.h"
40 OSALIGNLINE(volatile uint32_t) mLock{ 0 };
41 OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
42 std::vector<T*> mBlocks;
43 T* mCurBlock{ nullptr };
46 uint32_t mCurBlockIdx{ 0 };
49 static const uint32_t mBlockSizeShift = 6;
50 static const uint32_t mBlockSize = 1 << mBlockSizeShift;
52 template <typename ArenaT>
53 void clear(ArenaT& arena)
58 T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
59 mBlocks.push_back(pNewBlock);
60 mCurBlock = pNewBlock;
66 uint32_t getNumQueued()
78 // try to lock the FIFO
79 LONG initial = InterlockedCompareExchange(&mLock, 1, 0);
80 return (initial == 0);
94 uint32_t block = mHead >> mBlockSizeShift;
95 return &mBlocks[block][mHead & (mBlockSize-1)];
104 template <typename ArenaT>
105 bool enqueue_try_nosync(ArenaT& arena, const T* entry)
107 const float* pSrc = (const float*)entry;
108 float* pDst = (float*)&mCurBlock[mTail];
110 auto lambda = [&](int32_t i)
112 __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
113 _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
116 const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
117 static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
118 "FIFO element size should be multiple of SIMD width.");
120 UnrollerL<0, numSimdLines, 1>::step(lambda);
123 if (mTail == mBlockSize)
125 if (++mCurBlockIdx < mBlocks.size())
127 mCurBlock = mBlocks[mCurBlockIdx];
131 T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
132 SWR_ASSERT(newBlock);
134 mBlocks.push_back(newBlock);
135 mCurBlock = newBlock;