1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief AVX implementation for primitive assembly.
26 * N primitives are assembled at a time, where N is the SIMD width.
27 * A state machine, that is specific for a given topology, drives the
28 * assembly of vertices into triangles.
30 ******************************************************************************/
35 #if (KNOB_SIMD_WIDTH == 8)
37 bool PaTriList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
38 bool PaTriList1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
39 bool PaTriList2(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
40 void PaTriListSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[]);
42 bool PaTriStrip0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
43 bool PaTriStrip1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
44 void PaTriStripSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[]);
46 bool PaTriFan0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
47 bool PaTriFan1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
48 void PaTriFanSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[]);
50 bool PaQuadList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
51 bool PaQuadList1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
52 void PaQuadListSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[]);
54 bool PaLineLoop0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
55 bool PaLineLoop1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
57 bool PaLineList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
58 bool PaLineList1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
59 void PaLineListSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t index
, __m128 verts
[]);
61 bool PaLineStrip0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
62 bool PaLineStrip1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
63 void PaLineStripSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 lineverts
[]);
65 bool PaPoints0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
66 void PaPointsSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[]);
68 bool PaRectList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
69 bool PaRectList1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
70 bool PaRectList2(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[]);
71 void PaRectListSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[]);
73 template <uint32_t TotalControlPoints
>
74 void PaPatchListSingle(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[])
76 // We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
77 // KNOB_SIMD_WIDTH * 1 patch. This function is called once per attribute.
78 // Each attribute has 4 components.
80 /// @todo Optimize this
82 float* pOutVec
= (float*)verts
;
84 for (uint32_t cp
= 0; cp
< TotalControlPoints
; ++cp
)
86 uint32_t input_cp
= primIndex
* TotalControlPoints
+ cp
;
87 uint32_t input_vec
= input_cp
/ KNOB_SIMD_WIDTH
;
88 uint32_t input_lane
= input_cp
% KNOB_SIMD_WIDTH
;
90 // Loop over all components of the attribute
91 for (uint32_t i
= 0; i
< 4; ++i
)
93 const float* pInputVec
= (const float*)(&PaGetSimdVector(pa
, input_vec
, slot
)[i
]);
94 pOutVec
[cp
* 4 + i
] = pInputVec
[input_lane
];
99 template<uint32_t TotalControlPoints
, uint32_t CurrentControlPoints
= 1>
100 static bool PaPatchList(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
104 PaPatchList
<TotalControlPoints
, CurrentControlPoints
+ 1>,
105 PaPatchListSingle
<TotalControlPoints
>);
110 template<uint32_t TotalControlPoints
>
111 static bool PaPatchListTerm(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
113 // We have an input of KNOB_SIMD_WIDTH * TotalControlPoints and we output
114 // KNOB_SIMD_WIDTH * 1 patch. This function is called once per attribute.
115 // Each attribute has 4 components.
117 /// @todo Optimize this
119 // Loop over all components of the attribute
120 for (uint32_t i
= 0; i
< 4; ++i
)
122 for (uint32_t cp
= 0; cp
< TotalControlPoints
; ++cp
)
124 float vec
[KNOB_SIMD_WIDTH
];
125 for (uint32_t lane
= 0; lane
< KNOB_SIMD_WIDTH
; ++lane
)
127 uint32_t input_cp
= lane
* TotalControlPoints
+ cp
;
128 uint32_t input_vec
= input_cp
/ KNOB_SIMD_WIDTH
;
129 uint32_t input_lane
= input_cp
% KNOB_SIMD_WIDTH
;
131 const float* pInputVec
= (const float*)(&PaGetSimdVector(pa
, input_vec
, slot
)[i
]);
132 vec
[lane
] = pInputVec
[input_lane
];
134 verts
[cp
][i
] = _simd_loadu_ps(vec
);
140 PaPatchList
<TotalControlPoints
>,
141 PaPatchListSingle
<TotalControlPoints
>,
149 #define PA_PATCH_LIST_TERMINATOR(N) \
150 template<> bool PaPatchList<N, N>(PA_STATE_OPT& pa, uint32_t slot, simdvector verts[])\
151 { return PaPatchListTerm<N>(pa, slot, verts); }
152 PA_PATCH_LIST_TERMINATOR(1)
153 PA_PATCH_LIST_TERMINATOR(2)
154 PA_PATCH_LIST_TERMINATOR(3)
155 PA_PATCH_LIST_TERMINATOR(4)
156 PA_PATCH_LIST_TERMINATOR(5)
157 PA_PATCH_LIST_TERMINATOR(6)
158 PA_PATCH_LIST_TERMINATOR(7)
159 PA_PATCH_LIST_TERMINATOR(8)
160 PA_PATCH_LIST_TERMINATOR(9)
161 PA_PATCH_LIST_TERMINATOR(10)
162 PA_PATCH_LIST_TERMINATOR(11)
163 PA_PATCH_LIST_TERMINATOR(12)
164 PA_PATCH_LIST_TERMINATOR(13)
165 PA_PATCH_LIST_TERMINATOR(14)
166 PA_PATCH_LIST_TERMINATOR(15)
167 PA_PATCH_LIST_TERMINATOR(16)
168 PA_PATCH_LIST_TERMINATOR(17)
169 PA_PATCH_LIST_TERMINATOR(18)
170 PA_PATCH_LIST_TERMINATOR(19)
171 PA_PATCH_LIST_TERMINATOR(20)
172 PA_PATCH_LIST_TERMINATOR(21)
173 PA_PATCH_LIST_TERMINATOR(22)
174 PA_PATCH_LIST_TERMINATOR(23)
175 PA_PATCH_LIST_TERMINATOR(24)
176 PA_PATCH_LIST_TERMINATOR(25)
177 PA_PATCH_LIST_TERMINATOR(26)
178 PA_PATCH_LIST_TERMINATOR(27)
179 PA_PATCH_LIST_TERMINATOR(28)
180 PA_PATCH_LIST_TERMINATOR(29)
181 PA_PATCH_LIST_TERMINATOR(30)
182 PA_PATCH_LIST_TERMINATOR(31)
183 PA_PATCH_LIST_TERMINATOR(32)
184 #undef PA_PATCH_LIST_TERMINATOR
186 bool PaTriList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
188 SetNextPaState(pa
, PaTriList1
, PaTriListSingle0
);
189 return false; // Not enough vertices to assemble 4 or 8 triangles.
192 bool PaTriList1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
194 SetNextPaState(pa
, PaTriList2
, PaTriListSingle0
);
195 return false; // Not enough vertices to assemble 8 triangles.
198 bool PaTriList2(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
200 #if KNOB_ARCH == KNOB_ARCH_AVX
202 simdvector
& a
= PaGetSimdVector(pa
, 0, slot
);
203 simdvector
& b
= PaGetSimdVector(pa
, 1, slot
);
204 simdvector
& c
= PaGetSimdVector(pa
, 2, slot
);
207 // Tri Pattern - provoking vertex is always v0
208 // v0 -> 0 3 6 9 12 15 18 21
209 // v1 -> 1 4 7 10 13 16 19 22
210 // v2 -> 2 5 8 11 14 17 20 23
212 for (int i
= 0; i
< 4; ++i
)
214 simdvector
& v0
= verts
[0];
215 v0
[i
] = _simd_blend_ps(a
[i
], b
[i
], 0x92);
216 v0
[i
] = _simd_blend_ps(v0
[i
], c
[i
], 0x24);
217 v0
[i
] = _mm256_permute_ps(v0
[i
], 0x6C);
218 s
= _mm256_permute2f128_ps(v0
[i
], v0
[i
], 0x21);
219 v0
[i
] = _simd_blend_ps(v0
[i
], s
, 0x44);
221 simdvector
& v1
= verts
[1];
222 v1
[i
] = _simd_blend_ps(a
[i
], b
[i
], 0x24);
223 v1
[i
] = _simd_blend_ps(v1
[i
], c
[i
], 0x49);
224 v1
[i
] = _mm256_permute_ps(v1
[i
], 0xB1);
225 s
= _mm256_permute2f128_ps(v1
[i
], v1
[i
], 0x21);
226 v1
[i
] = _simd_blend_ps(v1
[i
], s
, 0x66);
228 simdvector
& v2
= verts
[2];
229 v2
[i
] = _simd_blend_ps(a
[i
], b
[i
], 0x49);
230 v2
[i
] = _simd_blend_ps(v2
[i
], c
[i
], 0x92);
231 v2
[i
] = _mm256_permute_ps(v2
[i
], 0xC6);
232 s
= _mm256_permute2f128_ps(v2
[i
], v2
[i
], 0x21);
233 v2
[i
] = _simd_blend_ps(v2
[i
], s
, 0x22);
236 #elif KNOB_ARCH >= KNOB_ARCH_AVX2
238 simdvector
&a
= PaGetSimdVector(pa
, 0, slot
);
239 simdvector
&b
= PaGetSimdVector(pa
, 1, slot
);
240 simdvector
&c
= PaGetSimdVector(pa
, 2, slot
);
242 // v0 -> a0 a3 a6 b1 b4 b7 c2 c5
243 // v1 -> a1 a4 a7 b2 b5 c0 c3 c6
244 // v2 -> a2 a5 b0 b3 b6 c1 c4 c7
246 const simdscalari perm0
= _simd_set_epi32(5, 2, 7, 4, 1, 6, 3, 0);
247 const simdscalari perm1
= _simd_set_epi32(6, 3, 0, 5, 2, 7, 4, 1);
248 const simdscalari perm2
= _simd_set_epi32(7, 4, 1, 6, 3, 0, 5, 2);
250 simdvector
&v0
= verts
[0];
251 simdvector
&v1
= verts
[1];
252 simdvector
&v2
= verts
[2];
254 for (int i
= 0; i
< 4; ++i
)
256 v0
[i
] = _simd_blend_ps(_simd_blend_ps(a
[i
], b
[i
], 0x92), c
[i
], 0x24);
257 v0
[i
] = _mm256_permutevar8x32_ps(v0
[i
], perm0
);
259 v1
[i
] = _simd_blend_ps(_simd_blend_ps(a
[i
], b
[i
], 0x24), c
[i
], 0x49);
260 v1
[i
] = _mm256_permutevar8x32_ps(v1
[i
], perm1
);
262 v2
[i
] = _simd_blend_ps(_simd_blend_ps(a
[i
], b
[i
], 0x49), c
[i
], 0x92);
263 v2
[i
] = _mm256_permutevar8x32_ps(v2
[i
], perm2
);
268 SetNextPaState(pa
, PaTriList0
, PaTriListSingle0
, 0, KNOB_SIMD_WIDTH
, true);
272 void PaTriListSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[])
274 // We have 12 simdscalars contained within 3 simdvectors which
275 // hold at least 8 triangles worth of data. We want to assemble a single
276 // triangle with data in horizontal form.
277 simdvector
& a
= PaGetSimdVector(pa
, 0, slot
);
278 simdvector
& b
= PaGetSimdVector(pa
, 1, slot
);
279 simdvector
& c
= PaGetSimdVector(pa
, 2, slot
);
281 // Convert from vertical to horizontal.
282 // Tri Pattern - provoking vertex is always v0
283 // v0 -> 0 3 6 9 12 15 18 21
284 // v1 -> 1 4 7 10 13 16 19 22
285 // v2 -> 2 5 8 11 14 17 20 23
289 verts
[0] = swizzleLane0(a
);
290 verts
[1] = swizzleLane1(a
);
291 verts
[2] = swizzleLane2(a
);
294 verts
[0] = swizzleLane3(a
);
295 verts
[1] = swizzleLane4(a
);
296 verts
[2] = swizzleLane5(a
);
299 verts
[0] = swizzleLane6(a
);
300 verts
[1] = swizzleLane7(a
);
301 verts
[2] = swizzleLane0(b
);
304 verts
[0] = swizzleLane1(b
);
305 verts
[1] = swizzleLane2(b
);
306 verts
[2] = swizzleLane3(b
);
309 verts
[0] = swizzleLane4(b
);
310 verts
[1] = swizzleLane5(b
);
311 verts
[2] = swizzleLane6(b
);
314 verts
[0] = swizzleLane7(b
);
315 verts
[1] = swizzleLane0(c
);
316 verts
[2] = swizzleLane1(c
);
319 verts
[0] = swizzleLane2(c
);
320 verts
[1] = swizzleLane3(c
);
321 verts
[2] = swizzleLane4(c
);
324 verts
[0] = swizzleLane5(c
);
325 verts
[1] = swizzleLane6(c
);
326 verts
[2] = swizzleLane7(c
);
331 bool PaTriStrip0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
333 SetNextPaState(pa
, PaTriStrip1
, PaTriStripSingle0
);
334 return false; // Not enough vertices to assemble 8 triangles.
337 bool PaTriStrip1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
339 simdvector
& a
= PaGetSimdVector(pa
, pa
.prev
, slot
);
340 simdvector
& b
= PaGetSimdVector(pa
, pa
.cur
, slot
);
343 for(int i
= 0; i
< 4; ++i
)
345 simdscalar a0
= a
[i
];
346 simdscalar b0
= b
[i
];
348 // Tri Pattern - provoking vertex is always v0
352 simdvector
& v0
= verts
[0];
356 s
= _mm256_permute2f128_ps(a0
, b0
, 0x21);
358 s
= _simd_shuffle_ps(a0
, s
, _MM_SHUFFLE(1, 0, 3, 2));
360 simdvector
& v1
= verts
[1];
362 v1
[i
] = _simd_shuffle_ps(a0
, s
, _MM_SHUFFLE(3, 1, 3, 1));
364 simdvector
& v2
= verts
[2];
366 v2
[i
] = _simd_shuffle_ps(a0
, s
, _MM_SHUFFLE(2, 2, 2, 2));
369 SetNextPaState(pa
, PaTriStrip1
, PaTriStripSingle0
, 0, KNOB_SIMD_WIDTH
);
373 void PaTriStripSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[])
375 simdvector
& a
= PaGetSimdVector(pa
, pa
.prev
, slot
);
376 simdvector
& b
= PaGetSimdVector(pa
, pa
.cur
, slot
);
378 // Convert from vertical to horizontal.
379 // Tri Pattern - provoking vertex is always v0
386 verts
[0] = swizzleLane0(a
);
387 verts
[1] = swizzleLane1(a
);
388 verts
[2] = swizzleLane2(a
);
391 verts
[0] = swizzleLane1(a
);
392 verts
[1] = swizzleLane3(a
);
393 verts
[2] = swizzleLane2(a
);
396 verts
[0] = swizzleLane2(a
);
397 verts
[1] = swizzleLane3(a
);
398 verts
[2] = swizzleLane4(a
);
401 verts
[0] = swizzleLane3(a
);
402 verts
[1] = swizzleLane5(a
);
403 verts
[2] = swizzleLane4(a
);
406 verts
[0] = swizzleLane4(a
);
407 verts
[1] = swizzleLane5(a
);
408 verts
[2] = swizzleLane6(a
);
411 verts
[0] = swizzleLane5(a
);
412 verts
[1] = swizzleLane7(a
);
413 verts
[2] = swizzleLane6(a
);
416 verts
[0] = swizzleLane6(a
);
417 verts
[1] = swizzleLane7(a
);
418 verts
[2] = swizzleLane0(b
);
421 verts
[0] = swizzleLane7(a
);
422 verts
[1] = swizzleLane1(b
);
423 verts
[2] = swizzleLane0(b
);
428 bool PaTriFan0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
430 simdvector
& a
= PaGetSimdVector(pa
, pa
.cur
, slot
);
432 // Extract vertex 0 to every lane of first vector
433 for(int i
= 0; i
< 4; ++i
)
436 simdvector
& v0
= verts
[0];
437 v0
[i
] = _simd_shuffle_ps(a0
, a0
, _MM_SHUFFLE(0, 0, 0, 0));
438 v0
[i
] = _mm256_permute2f128_ps(v0
[i
], a0
, 0x00);
441 // store off leading vertex for attributes
442 simdvertex
* pVertex
= (simdvertex
*)pa
.pStreamBase
;
443 pa
.leadingVertex
= pVertex
[pa
.cur
];
445 SetNextPaState(pa
, PaTriFan1
, PaTriFanSingle0
);
446 return false; // Not enough vertices to assemble 8 triangles.
449 bool PaTriFan1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
451 simdvector
& leadVert
= pa
.leadingVertex
.attrib
[slot
];
452 simdvector
& a
= PaGetSimdVector(pa
, pa
.prev
, slot
);
453 simdvector
& b
= PaGetSimdVector(pa
, pa
.cur
, slot
);
456 // need to fill vectors 1/2 with new verts, and v0 with anchor vert.
457 for(int i
= 0; i
< 4; ++i
)
459 simdscalar a0
= a
[i
];
460 simdscalar b0
= b
[i
];
462 __m256 comp
= leadVert
[i
];
463 simdvector
& v0
= verts
[0];
464 v0
[i
] = _simd_shuffle_ps(comp
, comp
, _MM_SHUFFLE(0, 0, 0, 0));
465 v0
[i
] = _mm256_permute2f128_ps(v0
[i
], comp
, 0x00);
467 simdvector
& v2
= verts
[2];
468 s
= _mm256_permute2f128_ps(a0
, b0
, 0x21);
469 v2
[i
] = _simd_shuffle_ps(a0
, s
, _MM_SHUFFLE(1, 0, 3, 2));
471 simdvector
& v1
= verts
[1];
472 v1
[i
] = _simd_shuffle_ps(a0
, v2
[i
], _MM_SHUFFLE(2, 1, 2, 1));
475 SetNextPaState(pa
, PaTriFan1
, PaTriFanSingle0
, 0, KNOB_SIMD_WIDTH
);
479 void PaTriFanSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[])
481 // vert 0 from leading vertex
482 simdvector
& lead
= pa
.leadingVertex
.attrib
[slot
];
483 verts
[0] = swizzleLane0(lead
);
485 simdvector
& a
= PaGetSimdVector(pa
, pa
.prev
, slot
);
486 simdvector
& b
= PaGetSimdVector(pa
, pa
.cur
, slot
);
491 verts
[1] = swizzleLaneN(a
, primIndex
+ 1);
495 verts
[1] = swizzleLane0(b
);
501 verts
[2] = swizzleLaneN(a
, primIndex
+ 2);
505 verts
[2] = swizzleLaneN(b
, primIndex
- 6);
509 bool PaQuadList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
511 SetNextPaState(pa
, PaQuadList1
, PaQuadListSingle0
);
512 return false; // Not enough vertices to assemble 8 triangles.
515 bool PaQuadList1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
517 simdvector
& a
= PaGetSimdVector(pa
, 0, slot
);
518 simdvector
& b
= PaGetSimdVector(pa
, 1, slot
);
521 for(int i
= 0; i
< 4; ++i
)
523 simdscalar a0
= a
[i
];
524 simdscalar b0
= b
[i
];
526 s1
= _mm256_permute2f128_ps(a0
, b0
, 0x20);
527 s2
= _mm256_permute2f128_ps(a0
, b0
, 0x31);
529 simdvector
& v0
= verts
[0];
530 v0
[i
] = _simd_shuffle_ps(s1
, s2
, _MM_SHUFFLE(0, 0, 0, 0));
532 simdvector
& v1
= verts
[1];
533 v1
[i
] = _simd_shuffle_ps(s1
, s2
, _MM_SHUFFLE(2, 1, 2, 1));
535 simdvector
& v2
= verts
[2];
536 v2
[i
] = _simd_shuffle_ps(s1
, s2
, _MM_SHUFFLE(3, 2, 3, 2));
539 SetNextPaState(pa
, PaQuadList0
, PaQuadListSingle0
, 0, KNOB_SIMD_WIDTH
, true);
543 void PaQuadListSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[])
545 simdvector
& a
= PaGetSimdVector(pa
, 0, slot
);
546 simdvector
& b
= PaGetSimdVector(pa
, 1, slot
);
551 // triangle 0 - 0 1 2
552 verts
[0] = swizzleLane0(a
);
553 verts
[1] = swizzleLane1(a
);
554 verts
[2] = swizzleLane2(a
);
558 // triangle 1 - 0 2 3
559 verts
[0] = swizzleLane0(a
);
560 verts
[1] = swizzleLane2(a
);
561 verts
[2] = swizzleLane3(a
);
565 // triangle 2 - 4 5 6
566 verts
[0] = swizzleLane4(a
);
567 verts
[1] = swizzleLane5(a
);
568 verts
[2] = swizzleLane6(a
);
572 // triangle 3 - 4 6 7
573 verts
[0] = swizzleLane4(a
);
574 verts
[1] = swizzleLane6(a
);
575 verts
[2] = swizzleLane7(a
);
579 // triangle 4 - 8 9 10 (0 1 2)
580 verts
[0] = swizzleLane0(b
);
581 verts
[1] = swizzleLane1(b
);
582 verts
[2] = swizzleLane2(b
);
586 // triangle 1 - 0 2 3
587 verts
[0] = swizzleLane0(b
);
588 verts
[1] = swizzleLane2(b
);
589 verts
[2] = swizzleLane3(b
);
593 // triangle 2 - 4 5 6
594 verts
[0] = swizzleLane4(b
);
595 verts
[1] = swizzleLane5(b
);
596 verts
[2] = swizzleLane6(b
);
600 // triangle 3 - 4 6 7
601 verts
[0] = swizzleLane4(b
);
602 verts
[1] = swizzleLane6(b
);
603 verts
[2] = swizzleLane7(b
);
608 void PaLineLoopSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t lineIndex
, __m128 verts
[])
610 PaLineStripSingle0(pa
, slot
, lineIndex
, verts
);
612 if (pa
.numPrimsComplete
+ lineIndex
== pa
.numPrims
- 1) {
613 simdvector
&start
= PaGetSimdVector(pa
, pa
.first
, slot
);
614 verts
[1] = swizzleLane0(start
);
618 bool PaLineLoop0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
620 SetNextPaState(pa
, PaLineLoop1
, PaLineLoopSingle0
);
624 bool PaLineLoop1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
626 PaLineStrip1(pa
, slot
, verts
);
628 if (pa
.numPrimsComplete
+ KNOB_SIMD_WIDTH
> pa
.numPrims
- 1) {
629 // loop reconnect now
630 int lane
= pa
.numPrims
- pa
.numPrimsComplete
- 1;
631 simdvector
&start
= PaGetSimdVector(pa
, pa
.first
, slot
);
632 for (int i
= 0; i
< 4; i
++) {
633 float *startVtx
= (float *)&(start
[i
]);
634 float *targetVtx
= (float *)&(verts
[1][i
]);
635 targetVtx
[lane
] = startVtx
[0];
639 SetNextPaState(pa
, PaLineLoop1
, PaLineLoopSingle0
, 0, KNOB_SIMD_WIDTH
);
644 bool PaLineList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
646 SetNextPaState(pa
, PaLineList1
, PaLineListSingle0
);
647 return false; // Not enough vertices to assemble 8 lines
650 bool PaLineList1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
652 simdvector
& a
= PaGetSimdVector(pa
, 0, slot
);
653 simdvector
& b
= PaGetSimdVector(pa
, 1, slot
);
654 /// @todo: verify provoking vertex is correct
655 // Line list 0 1 2 3 4 5 6 7
656 // 8 9 10 11 12 13 14 15
659 // 0 2 4 6 8 10 12 14
660 // 1 3 5 7 9 11 13 15
662 for (uint32_t i
= 0; i
< 4; ++i
)
665 __m256 vALowBLow
= _mm256_permute2f128_ps(a
.v
[i
], b
.v
[i
], 0x20);
666 // 4 5 6 7 12 13 14 15
667 __m256 vAHighBHigh
= _mm256_permute2f128_ps(a
.v
[i
], b
.v
[i
], 0x31);
669 // 0 2 4 6 8 10 12 14
670 verts
[0].v
[i
] = _mm256_shuffle_ps(vALowBLow
, vAHighBHigh
, _MM_SHUFFLE(2, 0, 2, 0));
671 // 1 3 5 7 9 11 13 15
672 verts
[1].v
[i
] = _mm256_shuffle_ps(vALowBLow
, vAHighBHigh
, _MM_SHUFFLE(3, 1, 3, 1));
675 SetNextPaState(pa
, PaLineList0
, PaLineListSingle0
, 0, KNOB_SIMD_WIDTH
, true);
679 void PaLineListSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[])
681 simdvector
&a
= PaGetSimdVector(pa
, pa
.prev
, slot
);
682 simdvector
&b
= PaGetSimdVector(pa
, pa
.cur
, slot
);
687 verts
[0] = swizzleLane0(a
);
688 verts
[1] = swizzleLane1(a
);
691 verts
[0] = swizzleLane2(a
);
692 verts
[1] = swizzleLane3(a
);
695 verts
[0] = swizzleLane4(a
);
696 verts
[1] = swizzleLane5(a
);
699 verts
[0] = swizzleLane6(a
);
700 verts
[1] = swizzleLane7(a
);
703 verts
[0] = swizzleLane0(b
);
704 verts
[1] = swizzleLane1(b
);
707 verts
[0] = swizzleLane2(b
);
708 verts
[1] = swizzleLane3(b
);
711 verts
[0] = swizzleLane4(b
);
712 verts
[1] = swizzleLane5(b
);
715 verts
[0] = swizzleLane6(b
);
716 verts
[1] = swizzleLane7(b
);
721 bool PaLineStrip0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
723 SetNextPaState(pa
, PaLineStrip1
, PaLineStripSingle0
);
724 return false; // Not enough vertices to assemble 8 lines
727 bool PaLineStrip1(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
729 simdvector
& a
= PaGetSimdVector(pa
, pa
.prev
, slot
);
730 simdvector
& b
= PaGetSimdVector(pa
, pa
.cur
, slot
);
732 /// @todo: verify provoking vertex is correct
733 // Line list 0 1 2 3 4 5 6 7
734 // 8 9 10 11 12 13 14 15
742 for(uint32_t i
= 0; i
< 4; ++i
)
745 __m256 vPermA
= _mm256_permute_ps(a
.v
[i
], 0x39); // indices hi->low 00 11 10 01 (0 3 2 1)
747 __m256 vAHighBLow
= _mm256_permute2f128_ps(a
.v
[i
], b
.v
[i
], 0x21);
750 __m256 vPermB
= _mm256_permute_ps(vAHighBLow
, 0); // indices hi->low (0 0 0 0)
752 verts
[1].v
[i
] = _mm256_blend_ps(vPermA
, vPermB
, 0x88);
755 SetNextPaState(pa
, PaLineStrip1
, PaLineStripSingle0
, 0, KNOB_SIMD_WIDTH
);
759 void PaLineStripSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t lineIndex
, __m128 verts
[])
761 simdvector
& a
= PaGetSimdVector(pa
, pa
.prev
, slot
);
762 simdvector
& b
= PaGetSimdVector(pa
, pa
.cur
, slot
);
767 verts
[0] = swizzleLane0(a
);
768 verts
[1] = swizzleLane1(a
);
771 verts
[0] = swizzleLane1(a
);
772 verts
[1] = swizzleLane2(a
);
775 verts
[0] = swizzleLane2(a
);
776 verts
[1] = swizzleLane3(a
);
779 verts
[0] = swizzleLane3(a
);
780 verts
[1] = swizzleLane4(a
);
783 verts
[0] = swizzleLane4(a
);
784 verts
[1] = swizzleLane5(a
);
787 verts
[0] = swizzleLane5(a
);
788 verts
[1] = swizzleLane6(a
);
791 verts
[0] = swizzleLane6(a
);
792 verts
[1] = swizzleLane7(a
);
795 verts
[0] = swizzleLane7(a
);
796 verts
[1] = swizzleLane0(b
);
801 bool PaPoints0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
803 simdvector
& a
= PaGetSimdVector(pa
, pa
.cur
, slot
);
805 verts
[0] = a
; // points only have 1 vertex.
807 SetNextPaState(pa
, PaPoints0
, PaPointsSingle0
, 0, KNOB_SIMD_WIDTH
, true);
811 void PaPointsSingle0(PA_STATE_OPT
& pa
, uint32_t slot
, uint32_t primIndex
, __m128 verts
[])
813 simdvector
&a
= PaGetSimdVector(pa
, pa
.cur
, slot
);
817 verts
[0] = swizzleLane0(a
);
820 verts
[0] = swizzleLane1(a
);
823 verts
[0] = swizzleLane2(a
);
826 verts
[0] = swizzleLane3(a
);
829 verts
[0] = swizzleLane4(a
);
832 verts
[0] = swizzleLane5(a
);
835 verts
[0] = swizzleLane6(a
);
838 verts
[0] = swizzleLane7(a
);
843 //////////////////////////////////////////////////////////////////////////
844 /// @brief State 1 for RECT_LIST topology.
845 /// There is not enough to assemble 8 triangles.
846 bool PaRectList0(PA_STATE_OPT
& pa
, uint32_t slot
, simdvector verts
[])
848 SetNextPaState(pa
, PaRectList1
, PaRectListSingle0
);
852 //////////////////////////////////////////////////////////////////////////
853 /// @brief State 1 for RECT_LIST topology.
854 /// Rect lists has the following format.
856 /// v2 o---o v5 o---o v8 o---o v11 o---o
857 /// | \ | | \ | | \ | | \ |
858 /// v1 o---o v4 o---o v7 o---o v10 o---o
861 /// Only 3 vertices of the rectangle are supplied. The 4th vertex is implied.
863 /// tri0 = { v0, v1, v2 } tri1 = { v0, v2, w } <-- w = v0 - v1 + v2
864 /// tri2 = { v3, v4, v5 } tri3 = { v3, v5, x } <-- x = v3 - v4 + v5
867 /// PA outputs 3 simdvectors for each of the triangle vertices v0, v1, v2
868 /// where v0 contains all the first vertices for 8 triangles.
871 /// verts[0] = { v0, v0, v3, v3, v6, v6, v9, v9 }
872 /// verts[1] = { v1, v2, v4, v5, v7, v8, v10, v11 }
873 /// verts[2] = { v2, w, v5, x, v8, y, v11, z }
875 /// @param pa - State for PA state machine.
876 /// @param slot - Index into VS output which is either a position (slot 0) or attribute.
877 /// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
883 // SIMD vectors a and b are the last two vertical outputs from the vertex shader.
884 simdvector
& a
= PaGetSimdVector(pa
, 0, slot
); // a[] = { v0, v1, v2, v3, v4, v5, v6, v7 }
885 simdvector
& b
= PaGetSimdVector(pa
, 1, slot
); // b[] = { v8, v9, v10, v11, v12, v13, v14, v15 }
887 __m256 tmp0
, tmp1
, tmp2
;
889 // Loop over each component in the simdvector.
890 for(int i
= 0; i
< 4; ++i
)
892 simdvector
& v0
= verts
[0]; // verts[0] needs to be { v0, v0, v3, v3, v6, v6, v9, v9 }
893 tmp0
= _mm256_permute2f128_ps(b
[i
], b
[i
], 0x01); // tmp0 = { v12, v13, v14, v15, v8, v9, v10, v11 }
894 v0
[i
] = _mm256_blend_ps(a
[i
], tmp0
, 0x20); // v0 = { v0, *, *, v3, *, v9, v6, * } where * is don't care.
895 tmp1
= _mm256_permute_ps(v0
[i
], 0xF0); // tmp1 = { v0, v0, v3, v3, *, *, *, * }
896 v0
[i
] = _mm256_permute_ps(v0
[i
], 0x5A); // v0 = { *, *, *, *, v6, v6, v9, v9 }
897 v0
[i
] = _mm256_blend_ps(tmp1
, v0
[i
], 0xF0); // v0 = { v0, v0, v3, v3, v6, v6, v9, v9 }
899 /// NOTE This is a bit expensive due to conflicts between vertices in 'a' and 'b'.
900 /// AVX2 should make this much cheaper.
901 simdvector
& v1
= verts
[1]; // verts[1] needs to be { v1, v2, v4, v5, v7, v8, v10, v11 }
902 v1
[i
] = _mm256_permute_ps(a
[i
], 0x09); // v1 = { v1, v2, *, *, *, *, *, * }
903 tmp1
= _mm256_permute_ps(a
[i
], 0x43); // tmp1 = { *, *, *, *, v7, *, v4, v5 }
904 tmp2
= _mm256_blend_ps(v1
[i
], tmp1
, 0xF0); // tmp2 = { v1, v2, *, *, v7, *, v4, v5 }
905 tmp1
= _mm256_permute2f128_ps(tmp2
, tmp2
, 0x1); // tmp1 = { v7, *, v4, v5, * *, *, * }
906 v1
[i
] = _mm256_permute_ps(tmp0
, 0xE0); // v1 = { *, *, *, *, *, v8, v10, v11 }
907 v1
[i
] = _mm256_blend_ps(tmp2
, v1
[i
], 0xE0); // v1 = { v1, v2, *, *, v7, v8, v10, v11 }
908 v1
[i
] = _mm256_blend_ps(v1
[i
], tmp1
, 0x0C); // v1 = { v1, v2, v4, v5, v7, v8, v10, v11 }
910 // verts[2] = { v2, w, v5, x, v8, y, v11, z }
911 simdvector
& v2
= verts
[2]; // verts[2] needs to be { v2, w, v5, x, v8, y, v11, z }
912 v2
[i
] = _mm256_permute_ps(tmp0
, 0x30); // v2 = { *, *, *, *, v8, *, v11, * }
913 tmp1
= _mm256_permute_ps(tmp2
, 0x31); // tmp1 = { v2, *, v5, *, *, *, *, * }
914 v2
[i
] = _mm256_blend_ps(tmp1
, v2
[i
], 0xF0);
916 // Need to compute 4th implied vertex for the rectangle.
917 tmp2
= _mm256_sub_ps(v0
[i
], v1
[i
]);
918 tmp2
= _mm256_add_ps(tmp2
, v2
[i
]); // tmp2 = { w, *, x, *, y, *, z, * }
919 tmp2
= _mm256_permute_ps(tmp2
, 0xA0); // tmp2 = { *, w, *, x, *, y, *, z }
920 v2
[i
] = _mm256_blend_ps(v2
[i
], tmp2
, 0xAA); // v2 = { v2, w, v5, x, v8, y, v11, z }
923 SetNextPaState(pa
, PaRectList1
, PaRectListSingle0
, 0, KNOB_SIMD_WIDTH
, true);
927 //////////////////////////////////////////////////////////////////////////
928 /// @brief State 2 for RECT_LIST topology.
929 /// Not implemented unless there is a use case for more then 8 rects.
930 /// @param pa - State for PA state machine.
931 /// @param slot - Index into VS output which is either a position (slot 0) or attribute.
932 /// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
938 SWR_ASSERT(0); // Is rect list used for anything other then clears?
939 SetNextPaState(pa
, PaRectList0
, PaRectListSingle0
, 0, KNOB_SIMD_WIDTH
, true);
943 //////////////////////////////////////////////////////////////////////////
944 /// @brief This procedure is called by the Binner to assemble the attributes.
945 /// Unlike position, which is stored vertically, the attributes are
946 /// stored horizontally. The outputs from the VS, labeled as 'a' and
947 /// 'b' are vertical. This function needs to transpose the lanes
948 /// containing the vertical attribute data into horizontal form.
949 /// @param pa - State for PA state machine.
950 /// @param slot - Index into VS output for a given attribute.
951 /// @param primIndex - Binner processes each triangle individually.
952 /// @param verts - triangle output for binner. SOA - Array of v0 for 8 triangles, followed by v1, etc.
953 void PaRectListSingle0(
959 // We have 12 simdscalars contained within 3 simdvectors which
960 // hold at least 8 triangles worth of data. We want to assemble a single
961 // triangle with data in horizontal form.
962 simdvector
& a
= PaGetSimdVector(pa
, 0, slot
);
964 // Convert from vertical to horizontal.
968 verts
[0] = swizzleLane0(a
);
969 verts
[1] = swizzleLane1(a
);
970 verts
[2] = swizzleLane2(a
);
973 verts
[0] = swizzleLane0(a
);
974 verts
[1] = swizzleLane2(a
);
975 verts
[2] = _mm_blend_ps(verts
[0], verts
[1], 0x2);
988 PA_STATE_OPT::PA_STATE_OPT(DRAW_CONTEXT
*in_pDC
, uint32_t in_numPrims
, uint8_t* pStream
, uint32_t in_streamSizeInVerts
,
989 bool in_isStreaming
, PRIMITIVE_TOPOLOGY topo
) : PA_STATE(in_pDC
, pStream
, in_streamSizeInVerts
), numPrims(in_numPrims
), numPrimsComplete(0), numSimdPrims(0),
990 cur(0), prev(0), first(0), counter(0), reset(false), pfnPaFunc(nullptr), isStreaming(in_isStreaming
)
992 const API_STATE
& state
= GetApiState(pDC
);
994 this->binTopology
= topo
== TOP_UNKNOWN
? state
.topology
: topo
;
996 switch (this->binTopology
)
998 case TOP_TRIANGLE_LIST
:
999 this->pfnPaFunc
= PaTriList0
;
1001 case TOP_TRIANGLE_STRIP
:
1002 this->pfnPaFunc
= PaTriStrip0
;
1004 case TOP_TRIANGLE_FAN
:
1005 this->pfnPaFunc
= PaTriFan0
;
1008 this->pfnPaFunc
= PaQuadList0
;
1009 this->numPrims
= in_numPrims
* 2; // Convert quad primitives into triangles
1011 case TOP_QUAD_STRIP
:
1012 // quad strip pattern when decomposed into triangles is the same as verts strips
1013 this->pfnPaFunc
= PaTriStrip0
;
1014 this->numPrims
= in_numPrims
* 2; // Convert quad primitives into triangles
1017 this->pfnPaFunc
= PaLineList0
;
1018 this->numPrims
= in_numPrims
;
1020 case TOP_LINE_STRIP
:
1021 this->pfnPaFunc
= PaLineStrip0
;
1022 this->numPrims
= in_numPrims
;
1025 this->pfnPaFunc
= PaLineLoop0
;
1026 this->numPrims
= in_numPrims
;
1028 case TOP_POINT_LIST
:
1029 // use point binner and rasterizer if supported
1030 this->pfnPaFunc
= PaPoints0
;
1031 this->numPrims
= in_numPrims
;
1034 this->pfnPaFunc
= PaRectList0
;
1035 this->numPrims
= in_numPrims
* 2;
1038 case TOP_PATCHLIST_1
:
1039 this->pfnPaFunc
= PaPatchList
<1>;
1041 case TOP_PATCHLIST_2
:
1042 this->pfnPaFunc
= PaPatchList
<2>;
1044 case TOP_PATCHLIST_3
:
1045 this->pfnPaFunc
= PaPatchList
<3>;
1047 case TOP_PATCHLIST_4
:
1048 this->pfnPaFunc
= PaPatchList
<4>;
1050 case TOP_PATCHLIST_5
:
1051 this->pfnPaFunc
= PaPatchList
<5>;
1053 case TOP_PATCHLIST_6
:
1054 this->pfnPaFunc
= PaPatchList
<6>;
1056 case TOP_PATCHLIST_7
:
1057 this->pfnPaFunc
= PaPatchList
<7>;
1059 case TOP_PATCHLIST_8
:
1060 this->pfnPaFunc
= PaPatchList
<8>;
1062 case TOP_PATCHLIST_9
:
1063 this->pfnPaFunc
= PaPatchList
<9>;
1065 case TOP_PATCHLIST_10
:
1066 this->pfnPaFunc
= PaPatchList
<10>;
1068 case TOP_PATCHLIST_11
:
1069 this->pfnPaFunc
= PaPatchList
<11>;
1071 case TOP_PATCHLIST_12
:
1072 this->pfnPaFunc
= PaPatchList
<12>;
1074 case TOP_PATCHLIST_13
:
1075 this->pfnPaFunc
= PaPatchList
<13>;
1077 case TOP_PATCHLIST_14
:
1078 this->pfnPaFunc
= PaPatchList
<14>;
1080 case TOP_PATCHLIST_15
:
1081 this->pfnPaFunc
= PaPatchList
<15>;
1083 case TOP_PATCHLIST_16
:
1084 this->pfnPaFunc
= PaPatchList
<16>;
1086 case TOP_PATCHLIST_17
:
1087 this->pfnPaFunc
= PaPatchList
<17>;
1089 case TOP_PATCHLIST_18
:
1090 this->pfnPaFunc
= PaPatchList
<18>;
1092 case TOP_PATCHLIST_19
:
1093 this->pfnPaFunc
= PaPatchList
<19>;
1095 case TOP_PATCHLIST_20
:
1096 this->pfnPaFunc
= PaPatchList
<20>;
1098 case TOP_PATCHLIST_21
:
1099 this->pfnPaFunc
= PaPatchList
<21>;
1101 case TOP_PATCHLIST_22
:
1102 this->pfnPaFunc
= PaPatchList
<22>;
1104 case TOP_PATCHLIST_23
:
1105 this->pfnPaFunc
= PaPatchList
<23>;
1107 case TOP_PATCHLIST_24
:
1108 this->pfnPaFunc
= PaPatchList
<24>;
1110 case TOP_PATCHLIST_25
:
1111 this->pfnPaFunc
= PaPatchList
<25>;
1113 case TOP_PATCHLIST_26
:
1114 this->pfnPaFunc
= PaPatchList
<26>;
1116 case TOP_PATCHLIST_27
:
1117 this->pfnPaFunc
= PaPatchList
<27>;
1119 case TOP_PATCHLIST_28
:
1120 this->pfnPaFunc
= PaPatchList
<28>;
1122 case TOP_PATCHLIST_29
:
1123 this->pfnPaFunc
= PaPatchList
<29>;
1125 case TOP_PATCHLIST_30
:
1126 this->pfnPaFunc
= PaPatchList
<30>;
1128 case TOP_PATCHLIST_31
:
1129 this->pfnPaFunc
= PaPatchList
<31>;
1131 case TOP_PATCHLIST_32
:
1132 this->pfnPaFunc
= PaPatchList
<32>;
1140 this->pfnPaFuncReset
= this->pfnPaFunc
;
1142 // simdscalari id8 = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
1143 // simdscalari id4 = _mm256_set_epi32(0, 0, 1, 1, 2, 2, 3, 3);
1144 simdscalari id8
= _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
1145 simdscalari id4
= _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0);
1147 switch(this->binTopology
)
1149 case TOP_TRIANGLE_LIST
:
1150 case TOP_TRIANGLE_STRIP
:
1151 case TOP_TRIANGLE_FAN
:
1152 case TOP_LINE_STRIP
:
1155 this->primIDIncr
= 8;
1159 case TOP_QUAD_STRIP
:
1161 this->primIDIncr
= 4;
1164 case TOP_POINT_LIST
:
1165 this->primIDIncr
= 8;
1168 case TOP_PATCHLIST_1
:
1169 case TOP_PATCHLIST_2
:
1170 case TOP_PATCHLIST_3
:
1171 case TOP_PATCHLIST_4
:
1172 case TOP_PATCHLIST_5
:
1173 case TOP_PATCHLIST_6
:
1174 case TOP_PATCHLIST_7
:
1175 case TOP_PATCHLIST_8
:
1176 case TOP_PATCHLIST_9
:
1177 case TOP_PATCHLIST_10
:
1178 case TOP_PATCHLIST_11
:
1179 case TOP_PATCHLIST_12
:
1180 case TOP_PATCHLIST_13
:
1181 case TOP_PATCHLIST_14
:
1182 case TOP_PATCHLIST_15
:
1183 case TOP_PATCHLIST_16
:
1184 case TOP_PATCHLIST_17
:
1185 case TOP_PATCHLIST_18
:
1186 case TOP_PATCHLIST_19
:
1187 case TOP_PATCHLIST_20
:
1188 case TOP_PATCHLIST_21
:
1189 case TOP_PATCHLIST_22
:
1190 case TOP_PATCHLIST_23
:
1191 case TOP_PATCHLIST_24
:
1192 case TOP_PATCHLIST_25
:
1193 case TOP_PATCHLIST_26
:
1194 case TOP_PATCHLIST_27
:
1195 case TOP_PATCHLIST_28
:
1196 case TOP_PATCHLIST_29
:
1197 case TOP_PATCHLIST_30
:
1198 case TOP_PATCHLIST_31
:
1199 case TOP_PATCHLIST_32
:
1200 // Always run KNOB_SIMD_WIDTH number of patches at a time.
1201 this->primIDIncr
= 8;