swr/rast: simdlib cleanup, clipper stack space fixes
[mesa.git] / src / gallium / drivers / swr / rasterizer / common / simdlib_interface.hpp
1 /****************************************************************************
2 * Copyright (C) 2017 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23 #pragma once
24 #if 0
25 //===========================================================================
26 // Placeholder name representing either SIMD4, SIMD256, or SIMD16 structures.
27 //===========================================================================
28 struct SIMD256 // or SIMD4 or SIMD16
29 {
30 //=======================================================================
31 // SIMD Types
32 //
33 // These typedefs are examples. The SIMD256 and SIMD16 implementations will
34 // use different base types with this same naming.
35 using Float = __m256; // Packed single-precision float vector
36 using Double = __m256d; // Packed double-precision float vector
37 using Integer = __m256i; // Packed integer vector (mutable element widths)
38 using Mask = uint8_t; // Integer representing mask bits
39
40 //=======================================================================
41 // Standard interface
42 // (available in both SIMD256 and SIMD16 widths)
43 //=======================================================================
44
45 //-----------------------------------------------------------------------
46 // Single precision floating point arithmetic operations
47 //-----------------------------------------------------------------------
48 static Float add_ps(Float a, Float b); // return a + b
49 static Float div_ps(Float a, Float b); // return a / b
50 static Float fmadd_ps(Float a, Float b, Float c); // return (a * b) + c
51 static Float fmsub_ps(Float a, Float b, Float c); // return (a * b) - c
52 static Float max_ps(Float a, Float b); // return (a > b) ? a : b
53 static Float min_ps(Float a, Float b); // return (a < b) ? a : b
54 static Float mul_ps(Float a, Float b); // return a * b
55 static Float rcp_ps(Float a); // return 1.0f / a
56 static Float rsqrt_ps(Float a); // return 1.0f / sqrt(a)
57 static Float sub_ps(Float a, Float b); // return a - b
58
59 enum class RoundMode
60 {
61 TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + (signof(value))0.5)
62 TO_NEG_INF = 0x01, // Round to negative infinity
63 TO_POS_INF = 0x02, // Round to positive infinity
64 TO_ZERO = 0x03, // Round to 0 a.k.a. truncate
65 CUR_DIRECTION = 0x04, // Round in direction set in MXCSR register
66
67 RAISE_EXC = 0x00, // Raise exception on overflow
68 NO_EXC = 0x08, // Suppress exceptions
69
70 NINT = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
71 NINT_NOEXC = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
72 FLOOR = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
73 FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
74 CEIL = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
75 CEIL_NOEXC = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
76 TRUNC = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
77 TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
78 RINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
79 NEARBYINT = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
80 };
81
82 // return round_func(a)
83 //
84 // round_func is chosen on the RMT template parameter. See the documentation
85 // for the RoundMode enumeration above.
86 template <RoundMode RMT>
87 static Float round_ps(Float a); // return round(a)
88
89
90 //-----------------------------------------------------------------------
91 // Integer (various width) arithmetic operations
92 //-----------------------------------------------------------------------
93 static Integer abs_epi32(Integer a); // return absolute_value(a) (int32)
94 static Integer add_epi32(Integer a, Integer b); // return a + b (int32)
95 static Integer add_epi8(Integer a, Integer b); // return a + b (int8)
96 static Integer adds_epu8(Integer a, Integer b); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8)
97 static Integer max_epi32(Integer a, Integer b); // return (a > b) ? a : b (int32)
98 static Integer max_epu32(Integer a, Integer b); // return (a > b) ? a : b (uint32)
99 static Integer min_epi32(Integer a, Integer b); // return (a < b) ? a : b (int32)
100 static Integer min_epu32(Integer a, Integer b); // return (a < b) ? a : b (uint32)
101 static Integer mul_epi32(Integer a, Integer b); // return a * b (int32)
102
103 // return (a * b) & 0xFFFFFFFF
104 //
105 // Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers,
106 // and store the low 32 bits of the intermediate integers in dst.
107 static Float mullo_epi32(Integer a, Integer b);
108
109 static Integer sub_epi32(Integer a, Integer b); // return a - b (int32)
110 static Integer sub_epi64(Integer a, Integer b); // return a - b (int64)
111 static Integer subs_epu8(Integer a, Integer b); // return (b > a) ? 0 : (a - b) (uint8)
112
113 //-----------------------------------------------------------------------
114 // Logical operations
115 //-----------------------------------------------------------------------
116 static Float and_ps(Float a, Float b); // return a & b (float treated as int)
117 static Integer and_si(Integer a, Integer b); // return a & b (int)
118 static Float andnot_ps(Float a, Float b); // return (~a) & b (float treated as int)
119 static Integer andnot_si(Integer a, Integer b); // return (~a) & b (int)
120 static Float or_ps(Float a, Float b); // return a | b (float treated as int)
121 static Float or_si(Integer a, Integer b); // return a | b (int)
122 static Float xor_ps(Float a, Float b); // return a ^ b (float treated as int)
123 static Integer xor_si(Integer a, Integer b); // return a ^ b (int)
124
125 //-----------------------------------------------------------------------
126 // Shift operations
127 //-----------------------------------------------------------------------
128 template<int ImmT>
129 static Integer slli_epi32(Integer a); // return a << ImmT
130 static Integer sllv_epi32(Integer a, Integer b); // return a << b
131 template<int ImmT>
132 static Integer srai_epi32(Integer a); // return a >> ImmT (int32)
133 template<int ImmT>
134 static Integer srli_epi32(Integer a); // return a >> ImmT (uint32)
135 template<int ImmT> // for each 128-bit lane:
136 static Integer srli_si(Integer a); // return a >> (ImmT*8) (uint)
137 template<int ImmT>
138 static Float srlisi_ps(Float a); // same as srli_si, but with Float cast to int
139 static Integer srlv_epi32(Integer a, Integer b); // return a >> b (uint32)
140
141 //-----------------------------------------------------------------------
142 // Conversion operations
143 //-----------------------------------------------------------------------
144 static Float castpd_ps(Double a); // return *(Float*)(&a)
145 static Integer castps_si(Float a); // return *(Integer*)(&a)
146 static Double castsi_pd(Integer a); // return *(Double*)(&a)
147 static Double castps_pd(Float a); // return *(Double*)(&a)
148 static Float castsi_ps(Integer a); // return *(Float*)(&a)
149 static Float cvtepi32_ps(Integer a); // return (float)a (int32 --> float)
150 static Integer cvtepu8_epi16(Integer a); // return (int16)a (uint8 --> int16)
151 static Integer cvtepu8_epi32(Integer a); // return (int32)a (uint8 --> int32)
152 static Integer cvtepu16_epi32(Integer a); // return (int32)a (uint16 --> int32)
153 static Integer cvtepu16_epi64(Integer a); // return (int64)a (uint16 --> int64)
154 static Integer cvtepu32_epi64(Integer a); // return (int64)a (uint32 --> int64)
155 static Integer cvtps_epi32(Float a); // return (int32)a (float --> int32)
156 static Integer cvttps_epi32(Float a); // return (int32)a (rnd_to_zero(float) --> int32)
157
158 //-----------------------------------------------------------------------
159 // Comparison operations
160 //-----------------------------------------------------------------------
161
162 // Comparison types used with cmp_ps:
163 // - ordered comparisons are always false if either operand is NaN
164 // - unordered comparisons are always true if either operand is NaN
165 // - signaling comparisons raise an exception if either operand is NaN
166 // - non-signaling comparisons will never raise an exception
167 //
168 // Ordered: return (a != NaN) && (b != NaN) && (a cmp b)
169 // Unordered: return (a == NaN) || (b == NaN) || (a cmp b)
170 enum class CompareType
171 {
172 EQ_OQ = 0x00, // Equal (ordered, nonsignaling)
173 LT_OS = 0x01, // Less-than (ordered, signaling)
174 LE_OS = 0x02, // Less-than-or-equal (ordered, signaling)
175 UNORD_Q = 0x03, // Unordered (nonsignaling)
176 NEQ_UQ = 0x04, // Not-equal (unordered, nonsignaling)
177 NLT_US = 0x05, // Not-less-than (unordered, signaling)
178 NLE_US = 0x06, // Not-less-than-or-equal (unordered, signaling)
179 ORD_Q = 0x07, // Ordered (nonsignaling)
180 EQ_UQ = 0x08, // Equal (unordered, non-signaling)
181 NGE_US = 0x09, // Not-greater-than-or-equal (unordered, signaling)
182 NGT_US = 0x0A, // Not-greater-than (unordered, signaling)
183 FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
184 NEQ_OQ = 0x0C, // Not-equal (ordered, non-signaling)
185 GE_OS = 0x0D, // Greater-than-or-equal (ordered, signaling)
186 GT_OS = 0x0E, // Greater-than (ordered, signaling)
187 TRUE_UQ = 0x0F, // True (unordered, non-signaling)
188 EQ_OS = 0x10, // Equal (ordered, signaling)
189 LT_OQ = 0x11, // Less-than (ordered, nonsignaling)
190 LE_OQ = 0x12, // Less-than-or-equal (ordered, nonsignaling)
191 UNORD_S = 0x13, // Unordered (signaling)
192 NEQ_US = 0x14, // Not-equal (unordered, signaling)
193 NLT_UQ = 0x15, // Not-less-than (unordered, nonsignaling)
194 NLE_UQ = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
195 ORD_S = 0x17, // Ordered (signaling)
196 EQ_US = 0x18, // Equal (unordered, signaling)
197 NGE_UQ = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
198 NGT_UQ = 0x1A, // Not-greater-than (unordered, nonsignaling)
199 FALSE_OS = 0x1B, // False (ordered, signaling)
200 NEQ_OS = 0x1C, // Not-equal (ordered, signaling)
201 GE_OQ = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
202 GT_OQ = 0x1E, // Greater-than (ordered, nonsignaling)
203 TRUE_US = 0x1F, // True (unordered, signaling)
204 };
205
206 // return a (CmpTypeT) b (float)
207 //
208 // See documentation for CompareType above for valid values for CmpTypeT.
209 template<CompareType CmpTypeT>
210 static Float cmp_ps(Float a, Float b); // return a (CmtTypeT) b (see above)
211 static Float cmpgt_ps(Float a, Float b); // return cmp_ps<CompareType::GT_OQ>(a, b)
212 static Float cmple_ps(Float a, Float b); // return cmp_ps<CompareType::LE_OQ>(a, b)
213 static Float cmplt_ps(Float a, Float b); // return cmp_ps<CompareType::LT_OQ>(a, b)
214 static Float cmpneq_ps(Float a, Float b); // return cmp_ps<CompareType::NEQ_OQ>(a, b)
215 static Float cmpeq_ps(Float a, Float b); // return cmp_ps<CompareType::EQ_OQ>(a, b)
216 static Float cmpge_ps(Float a, Float b); // return cmp_ps<CompareType::GE_OQ>(a, b)
217 static Integer cmpeq_epi8(Integer a, Integer b); // return a == b (int8)
218 static Integer cmpeq_epi16(Integer a, Integer b); // return a == b (int16)
219 static Integer cmpeq_epi32(Integer a, Integer b); // return a == b (int32)
220 static Integer cmpeq_epi64(Integer a, Integer b); // return a == b (int64)
221 static Integer cmpgt_epi8(Integer a, Integer b); // return a > b (int8)
222 static Integer cmpgt_epi16(Integer a, Integer b); // return a > b (int16)
223 static Integer cmpgt_epi32(Integer a, Integer b); // return a > b (int32)
224 static Integer cmpgt_epi64(Integer a, Integer b); // return a > b (int64)
225 static Integer cmplt_epi32(Integer a, Integer b); // return a < b (int32)
226 static bool testz_ps(Float a, Float b); // return all_lanes_zero(a & b) ? 1 : 0 (float)
227 static bool testz_si(Integer a, Integer b); // return all_lanes_zero(a & b) ? 1 : 0 (int)
228
229 //-----------------------------------------------------------------------
230 // Blend / shuffle / permute operations
231 //-----------------------------------------------------------------------
232 template<int ImmT>
233 static Float blend_ps(Float a, Float b); // return ImmT ? b : a (float)
234 static Integer blendv_epi32(Integer a, Integer b, Float mask); // return mask ? b : a (int)
235 static Float blendv_ps(Float a, Float b, Float mask); // return mask ? b : a (float)
236 static Float broadcast_ss(float const *p); // return *p (all elements in vector get same value)
237 static Integer packs_epi16(Integer a, Integer b); // See documentation for _mm256_packs_epi16 and _mm512_packs_epi16
238 static Integer packs_epi32(Integer a, Integer b); // See documentation for _mm256_packs_epi32 and _mm512_packs_epi32
239 static Integer packus_epi16(Integer a, Integer b); // See documentation for _mm256_packus_epi16 and _mm512_packus_epi16
240 static Integer packus_epi32(Integer a, Integer b); // See documentation for _mm256_packus_epi32 and _mm512_packus_epi32
241 static Float permute_epi32(Integer a, Integer swiz); // return a[swiz[i]] for each 32-bit lane i (int32)
242 static Float permute_ps(Float a, Integer swiz); // return a[swiz[i]] for each 32-bit lane i (float)
243 template<int SwizT>
244 static Integer shuffle_epi32(Integer a, Integer b);
245 template<int SwizT>
246 static Integer shuffle_epi64(Integer a, Integer b);
247 static Integer shuffle_epi8(Integer a, Integer b);
248 template<int SwizT>
249 static Float shuffle_pd(Double a, Double b);
250 template<int SwizT>
251 static Float shuffle_ps(Float a, Float b);
252 static Integer unpackhi_epi16(Integer a, Integer b);
253 static Integer unpackhi_epi32(Integer a, Integer b);
254 static Integer unpackhi_epi64(Integer a, Integer b);
255 static Integer unpackhi_epi8(Integer a, Integer b);
256 static Float unpackhi_pd(Double a, Double b);
257 static Float unpackhi_ps(Float a, Float b);
258 static Integer unpacklo_epi16(Integer a, Integer b);
259 static Integer unpacklo_epi32(Integer a, Integer b);
260 static Integer unpacklo_epi64(Integer a, Integer b);
261 static Integer unpacklo_epi8(Integer a, Integer b);
262 static Float unpacklo_pd(Double a, Double b);
263 static Float unpacklo_ps(Float a, Float b);
264
265 //-----------------------------------------------------------------------
266 // Load / store operations
267 //-----------------------------------------------------------------------
268 enum class ScaleFactor
269 {
270 SF_1, // No scaling
271 SF_2, // Scale offset by 2
272 SF_4, // Scale offset by 4
273 SF_8, // Scale offset by 8
274 };
275
276 template<ScaleFactor ScaleT = ScaleFactor::SF_1>
277 static Float i32gather_ps(float const* p, Integer idx); // return *(float*)(((int8*)p) + (idx * ScaleT))
278 static Float load1_ps(float const *p); // return *p (broadcast 1 value to all elements)
279 static Float load_ps(float const *p); // return *p (loads SIMD width elements from memory)
280 static Integer load_si(Integer const *p); // return *p
281 static Float loadu_ps(float const *p); // return *p (same as load_ps but allows for unaligned mem)
282 static Integer loadu_si(Integer const *p); // return *p (same as load_si but allows for unaligned mem)
283
284 // for each element: (mask & (1 << 31)) ? (i32gather_ps<ScaleT>(p, idx), mask = 0) : old
285 template<int ScaleT>
286 static Float mask_i32gather_ps(Float old, float const* p, Integer idx, Float mask);
287
288 static void maskstore_ps(float *p, Integer mask, Float src);
289 static int movemask_epi8(Integer a);
290 static int movemask_pd(Double a);
291 static int movemask_ps(Float a);
292 static Integer set1_epi32(int i); // return i (all elements are same value)
293 static Integer set1_epi8(char i); // return i (all elements are same value)
294 static Float set1_ps(float f); // return f (all elements are same value)
295 static Float setzero_ps(); // return 0 (float)
296 static Integer setzero_si(); // return 0 (integer)
297 static void store_ps(float *p, Float a); // *p = a (stores all elements contiguously in memory)
298 static void store_si(Integer *p, Integer a); // *p = a
299 static void stream_ps(float *p, Float a); // *p = a (same as store_ps, but doesn't keep memory in cache)
300
301 //=======================================================================
302 // Legacy interface (available only in SIMD256 width)
303 //=======================================================================
304
305 static Float broadcast_ps(__m128 const *p);
306 template<int ImmT>
307 static __m128d extractf128_pd(Double a);
308 template<int ImmT>
309 static __m128 extractf128_ps(Float a);
310 template<int ImmT>
311 static __m128i extractf128_si(Integer a);
312 template<int ImmT>
313 static Double insertf128_pd(Double a, __m128d b);
314 template<int ImmT>
315 static Float insertf128_ps(Float a, __m128 b);
316 template<int ImmT>
317 static Integer insertf128_si(Integer a, __m128i b);
318 static Integer loadu2_si(__m128 const* phi, __m128 const* plo);
319 template<int ImmT>
320 static Double permute2f128_pd(Double a, Double b);
321 template<int ImmT>
322 static Float permute2f128_ps(Float a, Float b);
323 template<int ImmT>
324 static Integer permute2f128_si(Integer a, Integer b);
325 static Integer set_epi32(int i7, int i6, int i5, int i4, int i3, int i2, int i1, int i0);
326 static void storeu2_si(__m128i *phi, __m128i *plo, Integer src);
327
328 //=======================================================================
329 // Advanced masking interface (currently available only in SIMD16 width)
330 //=======================================================================
331
332
333 //=======================================================================
334 // Extended Utility Functions (common to SIMD256 and SIMD16)
335 //=======================================================================
336
337 //-----------------------------------------------------------------------
338 // Extended Types
339 //-----------------------------------------------------------------------
340
341 // Vec4, an SOA SIMD set of 4-dimensional vectors
342 union Vec4
343 {
344 Vec4() = default;
345 Vec4(Float in)
346 {
347 s.x = in;
348 s.y = in;
349 s.z = in;
350 s.w = in;
351 }
352 Vec4(Float x, Float y, Float z, Float w)
353 {
354 s.x = x;
355 s.y = y;
356 s.z = z;
357 s.w = w;
358 }
359
360 Float v[4];
361 Integer vi[4];
362 struct
363 {
364 Float x;
365 Float y;
366 Float z;
367 Float w;
368 } s;
369 Float& operator[] (const int i) { return v[i]; }
370 Float const & operator[] (const int i) const { return v[i]; }
371 };
372
373 //-----------------------------------------------------------------------
374 // Extended Functions
375 //-----------------------------------------------------------------------
376 static void vec4_set1_ps(Vec4& r, const float *p); // r[0] = set1(p[0]), r[1] = set1(p[1]), ...
377 static void vec4_set1_vps(Vec4& r, Float s); // r[0] = s, r[1] = s, ...
378 static Float vec4_dp3_ps(const Vec4& v0, const Vec4& v1); // return dp3(v0, v1)
379 static Float vec4_dp4_ps(const Vec4& v0, const Vec4& v1); // return dp4(v0, v1)
380 static Float vec4_rcp_length_ps(const Vec4& v); // return 1.0f / sqrt(dp4(v, v))
381 static void vec4_normalize_ps(Vec4& r, const Vec4& v); // r = v * rcp_length(v)
382 static void vec4_mul_ps(Vec4& r, const Vec4& v, Float s); // r = v * set1_vps(s)
383 static void vec4_mul_ps(Vec4& r, const Vec4& v0, const Vec4& v1); // r = v0 * v1
384 static void vec4_add_ps(Vec4& r, const Vec4& v0, const Vec4& v1); // r = v0 + v1
385 static void vec4_min_ps(Vec4& r, const Vec4& v0, Float s); // r = (v0 < s) ? v0 : s
386 static void vec4_max_ps(Vec4& r, const Vec4& v0, Float s); // r = (v0 > s) ? v0 : s
387
388 // Matrix4x4 * Vector4
389 // result.s.x = (m00 * v.s.x) + (m01 * v.s.y) + (m02 * v.s.z) + (m03 * v.s.w)
390 // result.s.y = (m10 * v.s.x) + (m11 * v.s.y) + (m12 * v.s.z) + (m13 * v.s.w)
391 // result.s.z = (m20 * v.s.x) + (m21 * v.s.y) + (m22 * v.s.z) + (m23 * v.s.w)
392 // result.s.w = (m30 * v.s.x) + (m31 * v.s.y) + (m32 * v.s.z) + (m33 * v.s.w)
393 static void mat4x4_vec4_multiply(
394 Vec4& result,
395 const float *pMatrix,
396 const Vec4& v);
397
398 // Matrix4x4 * Vector3 - Direction Vector where w = 0.
399 // result.s.x = (m00 * v.s.x) + (m01 * v.s.y) + (m02 * v.s.z) + (m03 * 0)
400 // result.s.y = (m10 * v.s.x) + (m11 * v.s.y) + (m12 * v.s.z) + (m13 * 0)
401 // result.s.z = (m20 * v.s.x) + (m21 * v.s.y) + (m22 * v.s.z) + (m23 * 0)
402 // result.s.w = (m30 * v.s.x) + (m31 * v.s.y) + (m32 * v.s.z) + (m33 * 0)
403 static void mat3x3_vec3_w0_multiply(
404 Vec4& result,
405 const float *pMatrix,
406 const Vec4& v);
407
408 // Matrix4x4 * Vector3 - Position vector where w = 1.
409 // result.s.x = (m00 * v.s.x) + (m01 * v.s.y) + (m02 * v.s.z) + (m03 * 1)
410 // result.s.y = (m10 * v.s.x) + (m11 * v.s.y) + (m12 * v.s.z) + (m13 * 1)
411 // result.s.z = (m20 * v.s.x) + (m21 * v.s.y) + (m22 * v.s.z) + (m23 * 1)
412 // result.s.w = (m30 * v.s.x) + (m31 * v.s.y) + (m32 * v.s.z) + (m33 * 1)
413 static void mat4x4_vec3_w1_multiply(
414 Vec4& result,
415 const float *pMatrix,
416 const Vec4& v);
417
418 // Matrix4x3 * Vector3 - Position vector where w = 1.
419 // result.s.x = (m00 * v.s.x) + (m01 * v.s.y) + (m02 * v.s.z) + (m03 * 1)
420 // result.s.y = (m10 * v.s.x) + (m11 * v.s.y) + (m12 * v.s.z) + (m13 * 1)
421 // result.s.z = (m20 * v.s.x) + (m21 * v.s.y) + (m22 * v.s.z) + (m23 * 1)
422 // result.s.w = 1
423 static void mat4x3_vec3_w1_multiply(
424 Vec4& result,
425 const float *pMatrix,
426 const Vec4& v);
427 };
428 #endif // #if 0