src/gallium/drivers/swr/rasterizer/core/frontend.h

   1 /****************************************************************************
   2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * @file frontend.h
  24 *
  25 * @brief Definitions for Frontend which handles vertex processing,
  26 *        primitive assembly, clipping, binning, etc.
  27 *
  28 ******************************************************************************/
  29 #pragma once
  30 #include "context.h"
  31 #include <type_traits>
  32
  33 #if ENABLE_AVX512_SIMD16
  34 // TODO: this belongs in state.h alongside the simdvector definition, but there is a llvm codegen issue
  35 struct simd16vertex
  36 {
  37     simd16vector    attrib[SWR_VTX_NUM_SLOTS];
  38 };
  39
  40 #endif
  41 // Calculates the A and B coefficients for the 3 edges of the triangle
  42 //
  43 // maths for edge equations:
  44 //   standard form of a line in 2d
  45 //   Ax + By + C = 0
  46 //   A = y0 - y1
  47 //   B = x1 - x0
  48 //   C = x0y1 - x1y0
  49 INLINE
  50 void triangleSetupAB(const __m128 vX, const __m128 vY, __m128 & vA, __m128 & vB)
  51 {
  52     // vYsub = y1 y2 y0 dc
  53     __m128 vYsub = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(3, 0, 2, 1));
  54     // vY =    y0 y1 y2 dc
  55     vA = _mm_sub_ps(vY, vYsub);
  56
  57     // Result:
  58     // A[0] = y0 - y1
  59     // A[1] = y1 - y2
  60     // A[2] = y2 - y0
  61
  62     // vXsub = x1 x2 x0 dc
  63     __m128 vXsub = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(3, 0, 2, 1));
  64     // vX =    x0 x1 x2 dc
  65     vB = _mm_sub_ps(vXsub, vX);
  66
  67     // Result:
  68     // B[0] = x1 - x0
  69     // B[1] = x2 - x1
  70     // B[2] = x0 - x2
  71 }
  72
  73 INLINE
  74 void triangleSetupABVertical(const simdscalar vX[3], const simdscalar vY[3], simdscalar (&vA)[3], simdscalar (&vB)[3])
  75 {
  76     // generate edge equations
  77     // A = y0 - y1
  78     // B = x1 - x0
  79     vA[0] = _simd_sub_ps(vY[0], vY[1]);
  80     vA[1] = _simd_sub_ps(vY[1], vY[2]);
  81     vA[2] = _simd_sub_ps(vY[2], vY[0]);
  82
  83     vB[0] = _simd_sub_ps(vX[1], vX[0]);
  84     vB[1] = _simd_sub_ps(vX[2], vX[1]);
  85     vB[2] = _simd_sub_ps(vX[0], vX[2]);
  86 }
  87
  88 INLINE
  89 void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i & vA, __m128i & vB)
  90 {
  91     // generate edge equations
  92     // A = y0 - y1
  93     // B = x1 - x0
  94     // C = x0y1 - x1y0
  95     __m128i vYsub = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 2, 1));
  96     vA = _mm_sub_epi32(vY, vYsub);
  97
  98     __m128i vXsub = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 2, 1));
  99     vB = _mm_sub_epi32(vXsub, vX);
 100 }
 101
 102 INLINE
 103 void triangleSetupABIntVertical(const simdscalari vX[3], const simdscalari vY[3], simdscalari (&vA)[3], simdscalari (&vB)[3])
 104 {
 105     // A = y0 - y1
 106     // B = x1 - x0
 107     vA[0] = _simd_sub_epi32(vY[0], vY[1]);
 108     vA[1] = _simd_sub_epi32(vY[1], vY[2]);
 109     vA[2] = _simd_sub_epi32(vY[2], vY[0]);
 110
 111     vB[0] = _simd_sub_epi32(vX[1], vX[0]);
 112     vB[1] = _simd_sub_epi32(vX[2], vX[1]);
 113     vB[2] = _simd_sub_epi32(vX[0], vX[2]);
 114 }
 115
 116 #if ENABLE_AVX512_SIMD16
 117 INLINE
 118 void triangleSetupABIntVertical(const simd16scalari vX[3], const simd16scalari vY[3], simd16scalari(&vA)[3], simd16scalari(&vB)[3])
 119 {
 120     // A = y0 - y1
 121     // B = x1 - x0
 122     vA[0] = _simd16_sub_epi32(vY[0], vY[1]);
 123     vA[1] = _simd16_sub_epi32(vY[1], vY[2]);
 124     vA[2] = _simd16_sub_epi32(vY[2], vY[0]);
 125
 126     vB[0] = _simd16_sub_epi32(vX[1], vX[0]);
 127     vB[1] = _simd16_sub_epi32(vX[2], vX[1]);
 128     vB[2] = _simd16_sub_epi32(vX[0], vX[2]);
 129 }
 130
 131 #endif
 132 // Calculate the determinant of the triangle
 133 // 2 vectors between the 3 points: P, Q
 134 // Px = x0-x2, Py = y0-y2
 135 // Qx = x1-x2, Qy = y1-y2
 136 //       |Px Qx|
 137 // det = |     | = PxQy - PyQx
 138 //       |Py Qy|
 139 // simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
 140 //               try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
 141 //               : B[2]*A[1] - (-(y2-y0))*(-(x2-x1))
 142 //               : B[2]*A[1] - (-1)(-1)(y2-y0)*(x2-x1)
 143 //               : B[2]*A[1] - A[2]*B[1]
 144 INLINE
 145 float calcDeterminantInt(const __m128i vA, const __m128i vB)
 146 {
 147     // vAShuf = [A1, A0, A2, A0]
 148     __m128i vAShuf = _mm_shuffle_epi32(vA, _MM_SHUFFLE(0, 2, 0, 1));
 149     // vBShuf = [B2, B0, B1, B0]
 150     __m128i vBShuf = _mm_shuffle_epi32(vB, _MM_SHUFFLE(0, 1, 0, 2));
 151     // vMul = [A1*B2, B1*A2]
 152     __m128i vMul   = _mm_mul_epi32(vAShuf, vBShuf);
 153
 154     // shuffle upper to lower
 155     // vMul2 = [B1*A2, B1*A2]
 156     __m128i vMul2 = _mm_shuffle_epi32(vMul, _MM_SHUFFLE(3, 2, 3, 2));
 157     //vMul = [A1*B2 - B1*A2]
 158     vMul = _mm_sub_epi64(vMul, vMul2);
 159
 160     int64_t result;
 161     _mm_store_sd((double*)&result, _mm_castsi128_pd(vMul));
 162
 163     double dResult = (double)result;
 164     dResult = dResult * (1.0 / FIXED_POINT16_SCALE);
 165
 166     return (float)dResult;
 167 }
 168
 169 INLINE
 170 void calcDeterminantIntVertical(const simdscalari vA[3], const simdscalari vB[3], simdscalari *pvDet)
 171 {
 172     // refer to calcDeterminantInt comment for calculation explanation
 173     // A1*B2
 174     simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]);     // 0 0 1 1 4 4 5 5
 175     simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]);     // 2 2 3 3 6 6 7 7
 176
 177     simdscalari vB2Lo = _simd_unpacklo_epi32(vB[2], vB[2]);
 178     simdscalari vB2Hi = _simd_unpackhi_epi32(vB[2], vB[2]);
 179
 180     simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo);        // 0 1 4 5
 181     simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi);        // 2 3 6 7
 182
 183     // B1*A2
 184     simdscalari vA2Lo = _simd_unpacklo_epi32(vA[2], vA[2]);
 185     simdscalari vA2Hi = _simd_unpackhi_epi32(vA[2], vA[2]);
 186
 187     simdscalari vB1Lo = _simd_unpacklo_epi32(vB[1], vB[1]);
 188     simdscalari vB1Hi = _simd_unpackhi_epi32(vB[1], vB[1]);
 189
 190     simdscalari vA2B1Lo = _simd_mul_epi32(vA2Lo, vB1Lo);
 191     simdscalari vA2B1Hi = _simd_mul_epi32(vA2Hi, vB1Hi);
 192
 193     // A1*B2 - A2*B1
 194     simdscalari detLo = _simd_sub_epi64(vA1B2Lo, vA2B1Lo);
 195     simdscalari detHi = _simd_sub_epi64(vA1B2Hi, vA2B1Hi);
 196
 197     // shuffle 0 1 4 5 -> 0 1 2 3
 198     simdscalari vResultLo = _simd_permute2f128_si(detLo, detHi, 0x20);
 199     simdscalari vResultHi = _simd_permute2f128_si(detLo, detHi, 0x31);
 200
 201     pvDet[0] = vResultLo;
 202     pvDet[1] = vResultHi;
 203 }
 204
 205 #if ENABLE_AVX512_SIMD16
 206 INLINE
 207 void calcDeterminantIntVertical(const simd16scalari vA[3], const simd16scalari vB[3], simd16scalari *pvDet)
 208 {
 209     // refer to calcDeterminantInt comment for calculation explanation
 210     // A1*B2
 211
 212 #if 1
 213     // TODO: get the native SIMD16 version working..
 214
 215     simdscalari vA_lo[3];
 216     simdscalari vA_hi[3];
 217     simdscalari vB_lo[3];
 218     simdscalari vB_hi[3];
 219
 220     for (uint32_t i = 0; i < 3; i += 1)
 221     {
 222         vA_lo[i] = _simd16_extract_si(vA[i], 0);
 223         vA_hi[i] = _simd16_extract_si(vA[i], 1);
 224         vB_lo[i] = _simd16_extract_si(vB[i], 0);
 225         vB_hi[i] = _simd16_extract_si(vB[i], 1);
 226     }
 227
 228     calcDeterminantIntVertical(vA_lo, vB_lo, reinterpret_cast<simdscalari *>(&pvDet[0]));
 229     calcDeterminantIntVertical(vA_hi, vB_hi, reinterpret_cast<simdscalari *>(&pvDet[1]));
 230 #else
 231     simd16scalari vA1Lo = _simd16_unpacklo_epi32(vA[1], vA[1]); // 0 0 1 1 4 4 5 5 8 8 9 9 C C D D
 232     simd16scalari vA1Hi = _simd16_unpackhi_epi32(vA[1], vA[1]); // 2 2 3 3 6 6 7 7 A A B B E E F F
 233
 234     simd16scalari vB2Lo = _simd16_unpacklo_epi32(vB[2], vB[2]);
 235     simd16scalari vB2Hi = _simd16_unpackhi_epi32(vB[2], vB[2]);
 236
 237     simd16scalari vA1B2Lo = _simd16_mul_epi32(vA1Lo, vB2Lo);    // 0 1 4 5 8 9 C D
 238     simd16scalari vA1B2Hi = _simd16_mul_epi32(vA1Hi, vB2Hi);    // 2 3 6 7 A B E F
 239
 240     // B1*A2
 241     simd16scalari vA2Lo = _simd16_unpacklo_epi32(vA[2], vA[2]);
 242     simd16scalari vA2Hi = _simd16_unpackhi_epi32(vA[2], vA[2]);
 243
 244     simd16scalari vB1Lo = _simd16_unpacklo_epi32(vB[1], vB[1]);
 245     simd16scalari vB1Hi = _simd16_unpackhi_epi32(vB[1], vB[1]);
 246
 247     simd16scalari vA2B1Lo = _simd16_mul_epi32(vA2Lo, vB1Lo);
 248     simd16scalari vA2B1Hi = _simd16_mul_epi32(vA2Hi, vB1Hi);
 249
 250     // A1*B2 - A2*B1
 251     simd16scalari detLo = _simd16_sub_epi64(vA1B2Lo, vA2B1Lo);
 252     simd16scalari detHi = _simd16_sub_epi64(vA1B2Hi, vA2B1Hi);
 253
 254     // shuffle 0 1 4 5 -> 0 1 2 3
 255     simd16scalari vResultLo = _simd16_permute2f128_si(detLo, detHi, 0x20);
 256     simd16scalari vResultHi = _simd16_permute2f128_si(detLo, detHi, 0x31);
 257
 258     pvDet[0] = vResultLo;
 259     pvDet[1] = vResultHi;
 260 #endif
 261 }
 262
 263 #endif
 264 INLINE
 265 void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128 &vB, __m128 &vC)
 266 {
 267     // C = -Ax - By
 268     vC  = _mm_mul_ps(vA, vX);
 269     __m128 vCy = _mm_mul_ps(vB, vY);
 270     vC  = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
 271     vC  = _mm_sub_ps(vC, vCy);
 272 }
 273
 274 INLINE
 275 void viewportTransform(__m128 &vX, __m128 &vY, __m128 &vZ, const SWR_VIEWPORT_MATRIX &vpMatrix)
 276 {
 277     vX = _mm_mul_ps(vX, _mm_set1_ps(vpMatrix.m00));
 278     vX = _mm_add_ps(vX, _mm_set1_ps(vpMatrix.m30));
 279
 280     vY = _mm_mul_ps(vY, _mm_set1_ps(vpMatrix.m11));
 281     vY = _mm_add_ps(vY, _mm_set1_ps(vpMatrix.m31));
 282
 283     vZ = _mm_mul_ps(vZ, _mm_set1_ps(vpMatrix.m22));
 284     vZ = _mm_add_ps(vZ, _mm_set1_ps(vpMatrix.m32));
 285 }
 286
 287 template<uint32_t NumVerts>
 288 INLINE
 289 void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
 290 {
 291     simdscalar m00 = _simd_load1_ps(&vpMatrices.m00[0]);
 292     simdscalar m30 = _simd_load1_ps(&vpMatrices.m30[0]);
 293     simdscalar m11 = _simd_load1_ps(&vpMatrices.m11[0]);
 294     simdscalar m31 = _simd_load1_ps(&vpMatrices.m31[0]);
 295     simdscalar m22 = _simd_load1_ps(&vpMatrices.m22[0]);
 296     simdscalar m32 = _simd_load1_ps(&vpMatrices.m32[0]);
 297
 298     for (uint32_t i = 0; i < NumVerts; ++i)
 299     {
 300         v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
 301         v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
 302         v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
 303     }
 304 }
 305
 306 #if USE_SIMD16_FRONTEND
 307 template<uint32_t NumVerts>
 308 INLINE
 309 void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
 310 {
 311     const simd16scalar m00 = _simd16_broadcast_ss(&vpMatrices.m00[0]);
 312     const simd16scalar m30 = _simd16_broadcast_ss(&vpMatrices.m30[0]);
 313     const simd16scalar m11 = _simd16_broadcast_ss(&vpMatrices.m11[0]);
 314     const simd16scalar m31 = _simd16_broadcast_ss(&vpMatrices.m31[0]);
 315     const simd16scalar m22 = _simd16_broadcast_ss(&vpMatrices.m22[0]);
 316     const simd16scalar m32 = _simd16_broadcast_ss(&vpMatrices.m32[0]);
 317
 318     for (uint32_t i = 0; i < NumVerts; ++i)
 319     {
 320         v[i].x = _simd16_fmadd_ps(v[i].x, m00, m30);
 321         v[i].y = _simd16_fmadd_ps(v[i].y, m11, m31);
 322         v[i].z = _simd16_fmadd_ps(v[i].z, m22, m32);
 323     }
 324 }
 325
 326 #endif
 327 template<uint32_t NumVerts>
 328 INLINE
 329 void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari vViewportIdx)
 330 {
 331     // perform a gather of each matrix element based on the viewport array indexes
 332     simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
 333     simdscalar m30 = _simd_i32gather_ps(&vpMatrices.m30[0], vViewportIdx, 4);
 334     simdscalar m11 = _simd_i32gather_ps(&vpMatrices.m11[0], vViewportIdx, 4);
 335     simdscalar m31 = _simd_i32gather_ps(&vpMatrices.m31[0], vViewportIdx, 4);
 336     simdscalar m22 = _simd_i32gather_ps(&vpMatrices.m22[0], vViewportIdx, 4);
 337     simdscalar m32 = _simd_i32gather_ps(&vpMatrices.m32[0], vViewportIdx, 4);
 338
 339     for (uint32_t i = 0; i < NumVerts; ++i)
 340     {
 341         v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
 342         v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
 343         v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
 344     }
 345 }
 346
 347 #if USE_SIMD16_FRONTEND
 348 template<uint32_t NumVerts>
 349 INLINE
 350 void viewportTransform(simd16vector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simd16scalari vViewportIdx)
 351 {
 352     // perform a gather of each matrix element based on the viewport array indexes
 353     const simd16scalar m00 = _simd16_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
 354     const simd16scalar m30 = _simd16_i32gather_ps(&vpMatrices.m30[0], vViewportIdx, 4);
 355     const simd16scalar m11 = _simd16_i32gather_ps(&vpMatrices.m11[0], vViewportIdx, 4);
 356     const simd16scalar m31 = _simd16_i32gather_ps(&vpMatrices.m31[0], vViewportIdx, 4);
 357     const simd16scalar m22 = _simd16_i32gather_ps(&vpMatrices.m22[0], vViewportIdx, 4);
 358     const simd16scalar m32 = _simd16_i32gather_ps(&vpMatrices.m32[0], vViewportIdx, 4);
 359
 360     for (uint32_t i = 0; i < NumVerts; ++i)
 361     {
 362         v[i].x = _simd16_fmadd_ps(v[i].x, m00, m30);
 363         v[i].y = _simd16_fmadd_ps(v[i].y, m11, m31);
 364         v[i].z = _simd16_fmadd_ps(v[i].z, m22, m32);
 365     }
 366 }
 367
 368 #endif
 369 INLINE
 370 void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
 371 {
 372     // Need horizontal fp min here
 373     __m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
 374     __m128i vX2 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 1, 2));
 375
 376     __m128i vY1 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 2, 0, 1));
 377     __m128i vY2 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 1, 2));
 378
 379
 380     __m128i vMinX = _mm_min_epi32(vX, vX1);
 381             vMinX = _mm_min_epi32(vMinX, vX2);
 382
 383     __m128i vMaxX = _mm_max_epi32(vX, vX1);
 384             vMaxX = _mm_max_epi32(vMaxX, vX2);
 385
 386     __m128i vMinY = _mm_min_epi32(vY, vY1);
 387             vMinY = _mm_min_epi32(vMinY, vY2);
 388
 389     __m128i vMaxY = _mm_max_epi32(vY, vY1);
 390             vMaxY = _mm_max_epi32(vMaxY, vY2);
 391
 392     bbox.xmin = _mm_extract_epi32(vMinX, 0);
 393     bbox.xmax = _mm_extract_epi32(vMaxX, 0);
 394     bbox.ymin = _mm_extract_epi32(vMinY, 0);
 395     bbox.ymax = _mm_extract_epi32(vMaxY, 0);
 396 }
 397
 398 INLINE
 399 bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
 400 {
 401     const API_STATE& state = GetApiState(pDC);
 402
 403     return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X &&
 404             state.rastState.pointSize == 1.0f &&
 405             !state.rastState.pointParam &&
 406             !state.rastState.pointSpriteEnable);
 407 }
 408
 409 INLINE
 410 bool vHasNaN(const __m128& vec)
 411 {
 412     const __m128 result = _mm_cmpunord_ps(vec, vec);
 413     const int32_t mask = _mm_movemask_ps(result);
 414     return (mask != 0);
 415 }
 416
 417 uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
 418 uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
 419
 420
 421 // ProcessDraw front-end function.  All combinations of parameter values are available
 422 PFN_FE_WORK_FUNC GetProcessDrawFunc(
 423     bool IsIndexed,
 424     bool IsCutIndexEnabled,
 425     bool HasTessellation,
 426     bool HasGeometryShader,
 427     bool HasStreamOut,
 428     bool HasRasterization);
 429
 430 void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 431 void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 432 void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 433 void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 434 void ProcessShutdown(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
 435
 436 PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
 437 #if USE_SIMD16_FRONTEND
 438 PFN_PROCESS_PRIMS_SIMD16 GetBinTrianglesFunc_simd16(bool IsConservative);
 439 #endif
 440
 441 struct PA_STATE_BASE;  // forward decl
 442 void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
 443 void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
 444 #if USE_SIMD16_FRONTEND
 445 void SIMDAPI BinPoints_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
 446 void SIMDAPI BinLines_simd16(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simd16vector prims[3], uint32_t primMask, simd16scalari primID, simd16scalari viewportIdx);
 447 #endif
 448