49ba71f6435827e2b20837a633394c5e60998459
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / fifo.hpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file fifo.hpp
24 *
25 * @brief Definitions for our fifos used for thread communication.
26 *
27 ******************************************************************************/
28 #pragma once
29
30
31 #include "common/os.h"
32 #include "arena.h"
33
34 #include <vector>
35 #include <cassert>
36
37 template<class T>
38 struct QUEUE
39 {
40 OSALIGNLINE(volatile uint32_t) mLock{ 0 };
41 OSALIGNLINE(volatile uint32_t) mNumEntries{ 0 };
42 std::vector<T*> mBlocks;
43 T* mCurBlock{ nullptr };
44 uint32_t mHead{ 0 };
45 uint32_t mTail{ 0 };
46 uint32_t mCurBlockIdx{ 0 };
47
48 // power of 2
49 static const uint32_t mBlockSizeShift = 6;
50 static const uint32_t mBlockSize = 1 << mBlockSizeShift;
51
52 template <typename ArenaT>
53 void clear(ArenaT& arena)
54 {
55 mHead = 0;
56 mTail = 0;
57 mBlocks.clear();
58 T* pNewBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
59 mBlocks.push_back(pNewBlock);
60 mCurBlock = pNewBlock;
61 mCurBlockIdx = 0;
62 mNumEntries = 0;
63 mLock = 0;
64 }
65
66 uint32_t getNumQueued()
67 {
68 return mNumEntries;
69 }
70
71 bool tryLock()
72 {
73 if (mLock)
74 {
75 return false;
76 }
77
78 // try to lock the FIFO
79 LONG initial = InterlockedCompareExchange(&mLock, 1, 0);
80 return (initial == 0);
81 }
82
83 void unlock()
84 {
85 mLock = 0;
86 }
87
88 T* peek()
89 {
90 if (mNumEntries == 0)
91 {
92 return nullptr;
93 }
94 uint32_t block = mHead >> mBlockSizeShift;
95 return &mBlocks[block][mHead & (mBlockSize-1)];
96 }
97
98 void dequeue_noinc()
99 {
100 mHead ++;
101 mNumEntries --;
102 }
103
104 template <typename ArenaT>
105 bool enqueue_try_nosync(ArenaT& arena, const T* entry)
106 {
107 const float* pSrc = (const float*)entry;
108 float* pDst = (float*)&mCurBlock[mTail];
109
110 auto lambda = [&](int32_t i)
111 {
112 __m256 vSrc = _mm256_load_ps(pSrc + i*KNOB_SIMD_WIDTH);
113 _mm256_stream_ps(pDst + i*KNOB_SIMD_WIDTH, vSrc);
114 };
115
116 const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH*4);
117 static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
118 "FIFO element size should be multiple of SIMD width.");
119
120 UnrollerL<0, numSimdLines, 1>::step(lambda);
121
122 mTail ++;
123 if (mTail == mBlockSize)
124 {
125 if (++mCurBlockIdx < mBlocks.size())
126 {
127 mCurBlock = mBlocks[mCurBlockIdx];
128 }
129 else
130 {
131 T* newBlock = (T*)arena.AllocAligned(sizeof(T)*mBlockSize, KNOB_SIMD_WIDTH*4);
132 SWR_ASSERT(newBlock);
133
134 mBlocks.push_back(newBlock);
135 mCurBlock = newBlock;
136 }
137
138 mTail = 0;
139 }
140
141 mNumEntries ++;
142 return true;
143 }
144
145 void destroy()
146 {
147 }
148
149 };