swr/rast: Separate RDTSC code from archrast
[mesa.git] / src / gallium / drivers / swr / rasterizer / core / binner.h
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file binner.h
24 *
25 * @brief Declaration for the macrotile binner
26 *
27 ******************************************************************************/
28 #include "state.h"
29 #include "conservativeRast.h"
30 #include "utils.h"
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Offsets added to post-viewport vertex positions based on
33 /// raster state.
34 ///
35 /// Can't use templated variable because we must stick with C++11 features.
36 /// Template variables were introduced with C++14
37 template <typename SIMD_T>
38 struct SwrPixelOffsets
39 {
40 public:
41 INLINE static typename SIMD_T::Float GetOffset(uint32_t loc)
42 {
43 SWR_ASSERT(loc <= 1);
44
45 return SIMD_T::set1_ps(loc ? 0.5f : 0.0f);
46 }
47 };
48
49 //////////////////////////////////////////////////////////////////////////
50 /// @brief Convert the X,Y coords of a triangle to the requested Fixed
51 /// Point precision from FP32.
52 template <typename SIMD_T, typename PT = FixedPointTraits<Fixed_16_8>>
53 INLINE typename SIMD_T::Integer fpToFixedPointVertical(const typename SIMD_T::Float &vIn)
54 {
55 return SIMD_T::cvtps_epi32(SIMD_T::mul_ps(vIn, SIMD_T::set1_ps(PT::ScaleT::value)));
56 }
57
58 //////////////////////////////////////////////////////////////////////////
59 /// @brief Helper function to set the X,Y coords of a triangle to the
60 /// requested Fixed Point precision from FP32.
61 /// @param tri: simdvector[3] of FP triangle verts
62 /// @param vXi: fixed point X coords of tri verts
63 /// @param vYi: fixed point Y coords of tri verts
64 template <typename SIMD_T>
65 INLINE static void FPToFixedPoint(const typename SIMD_T::Vec4 *const tri, typename SIMD_T::Integer(&vXi)[3], typename SIMD_T::Integer(&vYi)[3])
66 {
67 vXi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].x);
68 vYi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].y);
69 vXi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].x);
70 vYi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].y);
71 vXi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].x);
72 vYi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].y);
73 }
74
75 //////////////////////////////////////////////////////////////////////////
76 /// @brief Calculate bounding box for current triangle
77 /// @tparam CT: ConservativeRastFETraits type
78 /// @param vX: fixed point X position for triangle verts
79 /// @param vY: fixed point Y position for triangle verts
80 /// @param bbox: fixed point bbox
81 /// *Note*: expects vX, vY to be in the correct precision for the type
82 /// of rasterization. This avoids unnecessary FP->fixed conversions.
83 template <typename SIMD_T, typename CT>
84 INLINE void calcBoundingBoxIntVertical(const typename SIMD_T::Integer(&vX)[3], const typename SIMD_T::Integer(&vY)[3], SIMDBBOX_T<SIMD_T> &bbox)
85 {
86 typename SIMD_T::Integer vMinX = vX[0];
87
88 vMinX = SIMD_T::min_epi32(vMinX, vX[1]);
89 vMinX = SIMD_T::min_epi32(vMinX, vX[2]);
90
91 typename SIMD_T::Integer vMaxX = vX[0];
92
93 vMaxX = SIMD_T::max_epi32(vMaxX, vX[1]);
94 vMaxX = SIMD_T::max_epi32(vMaxX, vX[2]);
95
96 typename SIMD_T::Integer vMinY = vY[0];
97
98 vMinY = SIMD_T::min_epi32(vMinY, vY[1]);
99 vMinY = SIMD_T::min_epi32(vMinY, vY[2]);
100
101 typename SIMD_T::Integer vMaxY = vY[0];
102
103 vMaxY = SIMD_T::max_epi32(vMaxY, vY[1]);
104 vMaxY = SIMD_T::max_epi32(vMaxY, vY[2]);
105
106 if (CT::BoundingBoxOffsetT::value != 0)
107 {
108 /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
109 /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
110
111 const typename SIMD_T::Integer value = SIMD_T::set1_epi32(CT::BoundingBoxOffsetT::value);
112
113 vMinX = SIMD_T::sub_epi32(vMinX, value);
114 vMaxX = SIMD_T::add_epi32(vMaxX, value);
115 vMinY = SIMD_T::sub_epi32(vMinY, value);
116 vMaxY = SIMD_T::add_epi32(vMaxY, value);
117 }
118
119 bbox.xmin = vMinX;
120 bbox.xmax = vMaxX;
121 bbox.ymin = vMinY;
122 bbox.ymax = vMaxY;
123 }
124
125 //////////////////////////////////////////////////////////////////////////
126 /// @brief Gather scissor rect data based on per-prim viewport indices.
127 /// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point.
128 /// @param pViewportIndex - array of per-primitive vewport indexes.
129 /// @param scisXmin - output vector of per-prmitive scissor rect Xmin data.
130 /// @param scisYmin - output vector of per-prmitive scissor rect Ymin data.
131 /// @param scisXmax - output vector of per-prmitive scissor rect Xmax data.
132 /// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
133 //
134 /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
135 static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
136 simdscalari &scisXmin, simdscalari &scisYmin, simdscalari &scisXmax, simdscalari &scisYmax)
137 {
138 scisXmin = _simd_set_epi32(
139 pScissorsInFixedPoint[pViewportIndex[7]].xmin,
140 pScissorsInFixedPoint[pViewportIndex[6]].xmin,
141 pScissorsInFixedPoint[pViewportIndex[5]].xmin,
142 pScissorsInFixedPoint[pViewportIndex[4]].xmin,
143 pScissorsInFixedPoint[pViewportIndex[3]].xmin,
144 pScissorsInFixedPoint[pViewportIndex[2]].xmin,
145 pScissorsInFixedPoint[pViewportIndex[1]].xmin,
146 pScissorsInFixedPoint[pViewportIndex[0]].xmin);
147 scisYmin = _simd_set_epi32(
148 pScissorsInFixedPoint[pViewportIndex[7]].ymin,
149 pScissorsInFixedPoint[pViewportIndex[6]].ymin,
150 pScissorsInFixedPoint[pViewportIndex[5]].ymin,
151 pScissorsInFixedPoint[pViewportIndex[4]].ymin,
152 pScissorsInFixedPoint[pViewportIndex[3]].ymin,
153 pScissorsInFixedPoint[pViewportIndex[2]].ymin,
154 pScissorsInFixedPoint[pViewportIndex[1]].ymin,
155 pScissorsInFixedPoint[pViewportIndex[0]].ymin);
156 scisXmax = _simd_set_epi32(
157 pScissorsInFixedPoint[pViewportIndex[7]].xmax,
158 pScissorsInFixedPoint[pViewportIndex[6]].xmax,
159 pScissorsInFixedPoint[pViewportIndex[5]].xmax,
160 pScissorsInFixedPoint[pViewportIndex[4]].xmax,
161 pScissorsInFixedPoint[pViewportIndex[3]].xmax,
162 pScissorsInFixedPoint[pViewportIndex[2]].xmax,
163 pScissorsInFixedPoint[pViewportIndex[1]].xmax,
164 pScissorsInFixedPoint[pViewportIndex[0]].xmax);
165 scisYmax = _simd_set_epi32(
166 pScissorsInFixedPoint[pViewportIndex[7]].ymax,
167 pScissorsInFixedPoint[pViewportIndex[6]].ymax,
168 pScissorsInFixedPoint[pViewportIndex[5]].ymax,
169 pScissorsInFixedPoint[pViewportIndex[4]].ymax,
170 pScissorsInFixedPoint[pViewportIndex[3]].ymax,
171 pScissorsInFixedPoint[pViewportIndex[2]].ymax,
172 pScissorsInFixedPoint[pViewportIndex[1]].ymax,
173 pScissorsInFixedPoint[pViewportIndex[0]].ymax);
174 }
175
176 static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
177 simd16scalari &scisXmin, simd16scalari &scisYmin, simd16scalari &scisXmax, simd16scalari &scisYmax)
178 {
179 scisXmin = _simd16_set_epi32(
180 pScissorsInFixedPoint[pViewportIndex[15]].xmin,
181 pScissorsInFixedPoint[pViewportIndex[14]].xmin,
182 pScissorsInFixedPoint[pViewportIndex[13]].xmin,
183 pScissorsInFixedPoint[pViewportIndex[12]].xmin,
184 pScissorsInFixedPoint[pViewportIndex[11]].xmin,
185 pScissorsInFixedPoint[pViewportIndex[10]].xmin,
186 pScissorsInFixedPoint[pViewportIndex[9]].xmin,
187 pScissorsInFixedPoint[pViewportIndex[8]].xmin,
188 pScissorsInFixedPoint[pViewportIndex[7]].xmin,
189 pScissorsInFixedPoint[pViewportIndex[6]].xmin,
190 pScissorsInFixedPoint[pViewportIndex[5]].xmin,
191 pScissorsInFixedPoint[pViewportIndex[4]].xmin,
192 pScissorsInFixedPoint[pViewportIndex[3]].xmin,
193 pScissorsInFixedPoint[pViewportIndex[2]].xmin,
194 pScissorsInFixedPoint[pViewportIndex[1]].xmin,
195 pScissorsInFixedPoint[pViewportIndex[0]].xmin);
196
197 scisYmin = _simd16_set_epi32(
198 pScissorsInFixedPoint[pViewportIndex[15]].ymin,
199 pScissorsInFixedPoint[pViewportIndex[14]].ymin,
200 pScissorsInFixedPoint[pViewportIndex[13]].ymin,
201 pScissorsInFixedPoint[pViewportIndex[12]].ymin,
202 pScissorsInFixedPoint[pViewportIndex[11]].ymin,
203 pScissorsInFixedPoint[pViewportIndex[10]].ymin,
204 pScissorsInFixedPoint[pViewportIndex[9]].ymin,
205 pScissorsInFixedPoint[pViewportIndex[8]].ymin,
206 pScissorsInFixedPoint[pViewportIndex[7]].ymin,
207 pScissorsInFixedPoint[pViewportIndex[6]].ymin,
208 pScissorsInFixedPoint[pViewportIndex[5]].ymin,
209 pScissorsInFixedPoint[pViewportIndex[4]].ymin,
210 pScissorsInFixedPoint[pViewportIndex[3]].ymin,
211 pScissorsInFixedPoint[pViewportIndex[2]].ymin,
212 pScissorsInFixedPoint[pViewportIndex[1]].ymin,
213 pScissorsInFixedPoint[pViewportIndex[0]].ymin);
214
215 scisXmax = _simd16_set_epi32(
216 pScissorsInFixedPoint[pViewportIndex[15]].xmax,
217 pScissorsInFixedPoint[pViewportIndex[14]].xmax,
218 pScissorsInFixedPoint[pViewportIndex[13]].xmax,
219 pScissorsInFixedPoint[pViewportIndex[12]].xmax,
220 pScissorsInFixedPoint[pViewportIndex[11]].xmax,
221 pScissorsInFixedPoint[pViewportIndex[10]].xmax,
222 pScissorsInFixedPoint[pViewportIndex[9]].xmax,
223 pScissorsInFixedPoint[pViewportIndex[8]].xmax,
224 pScissorsInFixedPoint[pViewportIndex[7]].xmax,
225 pScissorsInFixedPoint[pViewportIndex[6]].xmax,
226 pScissorsInFixedPoint[pViewportIndex[5]].xmax,
227 pScissorsInFixedPoint[pViewportIndex[4]].xmax,
228 pScissorsInFixedPoint[pViewportIndex[3]].xmax,
229 pScissorsInFixedPoint[pViewportIndex[2]].xmax,
230 pScissorsInFixedPoint[pViewportIndex[1]].xmax,
231 pScissorsInFixedPoint[pViewportIndex[0]].xmax);
232
233 scisYmax = _simd16_set_epi32(
234 pScissorsInFixedPoint[pViewportIndex[15]].ymax,
235 pScissorsInFixedPoint[pViewportIndex[14]].ymax,
236 pScissorsInFixedPoint[pViewportIndex[13]].ymax,
237 pScissorsInFixedPoint[pViewportIndex[12]].ymax,
238 pScissorsInFixedPoint[pViewportIndex[11]].ymax,
239 pScissorsInFixedPoint[pViewportIndex[10]].ymax,
240 pScissorsInFixedPoint[pViewportIndex[9]].ymax,
241 pScissorsInFixedPoint[pViewportIndex[8]].ymax,
242 pScissorsInFixedPoint[pViewportIndex[7]].ymax,
243 pScissorsInFixedPoint[pViewportIndex[6]].ymax,
244 pScissorsInFixedPoint[pViewportIndex[5]].ymax,
245 pScissorsInFixedPoint[pViewportIndex[4]].ymax,
246 pScissorsInFixedPoint[pViewportIndex[3]].ymax,
247 pScissorsInFixedPoint[pViewportIndex[2]].ymax,
248 pScissorsInFixedPoint[pViewportIndex[1]].ymax,
249 pScissorsInFixedPoint[pViewportIndex[0]].ymax);
250 }