0738d02332104e1f8c9f2d4e1f30b8fa7f7e6e63
[mesa.git] / src / gallium / drivers / swr / rasterizer / jitter / builder_misc.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file builder_misc.cpp
24 *
25 * @brief Implementation for miscellaneous builder functions
26 *
27 * Notes:
28 *
29 ******************************************************************************/
30 #include "jit_pch.hpp"
31 #include "builder.h"
32 #include "common/rdtsc_buckets.h"
33
34 #include <cstdarg>
35
36 namespace SwrJit
37 {
38 void __cdecl CallPrint(const char* fmt, ...);
39
40 //////////////////////////////////////////////////////////////////////////
41 /// @brief Convert an IEEE 754 32-bit single precision float to an
42 /// 16 bit float with 5 exponent bits and a variable
43 /// number of mantissa bits.
44 /// @param val - 32-bit float
45 /// @todo Maybe move this outside of this file into a header?
46 static uint16_t ConvertFloat32ToFloat16(float val)
47 {
48 uint32_t sign, exp, mant;
49 uint32_t roundBits;
50
51 // Extract the sign, exponent, and mantissa
52 uint32_t uf = *(uint32_t*)&val;
53 sign = (uf & 0x80000000) >> 31;
54 exp = (uf & 0x7F800000) >> 23;
55 mant = uf & 0x007FFFFF;
56
57 // Check for out of range
58 if (std::isnan(val))
59 {
60 exp = 0x1F;
61 mant = 0x200;
62 sign = 1; // set the sign bit for NANs
63 }
64 else if (std::isinf(val))
65 {
66 exp = 0x1f;
67 mant = 0x0;
68 }
69 else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
70 {
71 exp = 0x1E;
72 mant = 0x3FF;
73 }
74 else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
75 {
76 mant |= 0x00800000;
77 for (; exp <= 0x70; mant >>= 1, exp++)
78 ;
79 exp = 0;
80 mant = mant >> 13;
81 }
82 else if (exp < 0x66) // Too small to represent -> Zero
83 {
84 exp = 0;
85 mant = 0;
86 }
87 else
88 {
89 // Saves bits that will be shifted off for rounding
90 roundBits = mant & 0x1FFFu;
91 // convert exponent and mantissa to 16 bit format
92 exp = exp - 0x70;
93 mant = mant >> 13;
94
95 // Essentially RTZ, but round up if off by only 1 lsb
96 if (roundBits == 0x1FFFu)
97 {
98 mant++;
99 // check for overflow
100 if ((mant & 0xC00u) != 0)
101 exp++;
102 // make sure only the needed bits are used
103 mant &= 0x3FF;
104 }
105 }
106
107 uint32_t tmpVal = (sign << 15) | (exp << 10) | mant;
108 return (uint16_t)tmpVal;
109 }
110
111 //////////////////////////////////////////////////////////////////////////
112 /// @brief Convert an IEEE 754 16-bit float to an 32-bit single precision
113 /// float
114 /// @param val - 16-bit float
115 /// @todo Maybe move this outside of this file into a header?
116 static float ConvertFloat16ToFloat32(uint32_t val)
117 {
118 uint32_t result;
119 if ((val & 0x7fff) == 0)
120 {
121 result = ((uint32_t)(val & 0x8000)) << 16;
122 }
123 else if ((val & 0x7c00) == 0x7c00)
124 {
125 result = ((val & 0x3ff) == 0) ? 0x7f800000 : 0x7fc00000;
126 result |= ((uint32_t)val & 0x8000) << 16;
127 }
128 else
129 {
130 uint32_t sign = (val & 0x8000) << 16;
131 uint32_t mant = (val & 0x3ff) << 13;
132 uint32_t exp = (val >> 10) & 0x1f;
133 if ((exp == 0) && (mant != 0)) // Adjust exponent and mantissa for denormals
134 {
135 mant <<= 1;
136 while (mant < (0x400 << 13))
137 {
138 exp--;
139 mant <<= 1;
140 }
141 mant &= (0x3ff << 13);
142 }
143 exp = ((exp - 15 + 127) & 0xff) << 23;
144 result = sign | exp | mant;
145 }
146
147 return *(float*)&result;
148 }
149
150 Constant *Builder::C(bool i)
151 {
152 return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0));
153 }
154
155 Constant *Builder::C(char i)
156 {
157 return ConstantInt::get(IRB()->getInt8Ty(), i);
158 }
159
160 Constant *Builder::C(uint8_t i)
161 {
162 return ConstantInt::get(IRB()->getInt8Ty(), i);
163 }
164
165 Constant *Builder::C(int i)
166 {
167 return ConstantInt::get(IRB()->getInt32Ty(), i);
168 }
169
170 Constant *Builder::C(int64_t i)
171 {
172 return ConstantInt::get(IRB()->getInt64Ty(), i);
173 }
174
175 Constant *Builder::C(uint16_t i)
176 {
177 return ConstantInt::get(mInt16Ty,i);
178 }
179
180 Constant *Builder::C(uint32_t i)
181 {
182 return ConstantInt::get(IRB()->getInt32Ty(), i);
183 }
184
185 Constant *Builder::C(float i)
186 {
187 return ConstantFP::get(IRB()->getFloatTy(), i);
188 }
189
190 Constant *Builder::PRED(bool pred)
191 {
192 return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0));
193 }
194
195 Value *Builder::VIMMED1(int i)
196 {
197 return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
198 }
199
200 Value *Builder::VIMMED1_16(int i)
201 {
202 return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
203 }
204
205 Value *Builder::VIMMED1(uint32_t i)
206 {
207 return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
208 }
209
210 Value *Builder::VIMMED1_16(uint32_t i)
211 {
212 return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
213 }
214
215 Value *Builder::VIMMED1(float i)
216 {
217 return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
218 }
219
220 Value *Builder::VIMMED1_16(float i)
221 {
222 return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i)));
223 }
224
225 Value *Builder::VIMMED1(bool i)
226 {
227 return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
228 }
229
230 Value *Builder::VIMMED1_16(bool i)
231 {
232 return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
233 }
234
235 Value *Builder::VUNDEF_IPTR()
236 {
237 return UndefValue::get(VectorType::get(mInt32PtrTy,mVWidth));
238 }
239
240 Value *Builder::VUNDEF(Type* t)
241 {
242 return UndefValue::get(VectorType::get(t, mVWidth));
243 }
244
245 Value *Builder::VUNDEF_I()
246 {
247 return UndefValue::get(VectorType::get(mInt32Ty, mVWidth));
248 }
249
250 Value *Builder::VUNDEF_I_16()
251 {
252 return UndefValue::get(VectorType::get(mInt32Ty, mVWidth16));
253 }
254
255 Value *Builder::VUNDEF_F()
256 {
257 return UndefValue::get(VectorType::get(mFP32Ty, mVWidth));
258 }
259
260 Value *Builder::VUNDEF_F_16()
261 {
262 return UndefValue::get(VectorType::get(mFP32Ty, mVWidth16));
263 }
264
265 Value *Builder::VUNDEF(Type *ty, uint32_t size)
266 {
267 return UndefValue::get(VectorType::get(ty, size));
268 }
269
270 Value *Builder::VBROADCAST(Value *src, const llvm::Twine& name)
271 {
272 // check if src is already a vector
273 if (src->getType()->isVectorTy())
274 {
275 return src;
276 }
277
278 return VECTOR_SPLAT(mVWidth, src, name);
279 }
280
281 Value *Builder::VBROADCAST_16(Value *src)
282 {
283 // check if src is already a vector
284 if (src->getType()->isVectorTy())
285 {
286 return src;
287 }
288
289 return VECTOR_SPLAT(mVWidth16, src);
290 }
291
292 uint32_t Builder::IMMED(Value* v)
293 {
294 SWR_ASSERT(isa<ConstantInt>(v));
295 ConstantInt *pValConst = cast<ConstantInt>(v);
296 return pValConst->getZExtValue();
297 }
298
299 int32_t Builder::S_IMMED(Value* v)
300 {
301 SWR_ASSERT(isa<ConstantInt>(v));
302 ConstantInt *pValConst = cast<ConstantInt>(v);
303 return pValConst->getSExtValue();
304 }
305
306 CallInst *Builder::CALL(Value *Callee, const std::initializer_list<Value*> &argsList, const llvm::Twine& name)
307 {
308 std::vector<Value*> args;
309 for (auto arg : argsList)
310 args.push_back(arg);
311 return CALLA(Callee, args, name);
312 }
313
314 CallInst *Builder::CALL(Value *Callee, Value* arg)
315 {
316 std::vector<Value*> args;
317 args.push_back(arg);
318 return CALLA(Callee, args);
319 }
320
321 CallInst *Builder::CALL2(Value *Callee, Value* arg1, Value* arg2)
322 {
323 std::vector<Value*> args;
324 args.push_back(arg1);
325 args.push_back(arg2);
326 return CALLA(Callee, args);
327 }
328
329 CallInst *Builder::CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3)
330 {
331 std::vector<Value*> args;
332 args.push_back(arg1);
333 args.push_back(arg2);
334 args.push_back(arg3);
335 return CALLA(Callee, args);
336 }
337
338 //////////////////////////////////////////////////////////////////////////
339 Value *Builder::DEBUGTRAP()
340 {
341 Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::debugtrap);
342 return CALL(func);
343 }
344
345 Value *Builder::VRCP(Value *va, const llvm::Twine& name)
346 {
347 return FDIV(VIMMED1(1.0f), va, name); // 1 / a
348 }
349
350 Value *Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY)
351 {
352 Value* vOut = FMADDPS(vA, vX, vC);
353 vOut = FMADDPS(vB, vY, vOut);
354 return vOut;
355 }
356
357 //////////////////////////////////////////////////////////////////////////
358 /// @brief insert a JIT call to CallPrint
359 /// - outputs formatted string to both stdout and VS output window
360 /// - DEBUG builds only
361 /// Usage example:
362 /// PRINT("index %d = 0x%p\n",{C(lane), pIndex});
363 /// where C(lane) creates a constant value to print, and pIndex is the Value*
364 /// result from a GEP, printing out the pointer to memory
365 /// @param printStr - constant string to print, which includes format specifiers
366 /// @param printArgs - initializer list of Value*'s to print to std out
367 CallInst *Builder::PRINT(const std::string &printStr,const std::initializer_list<Value*> &printArgs)
368 {
369 // push the arguments to CallPrint into a vector
370 std::vector<Value*> printCallArgs;
371 // save room for the format string. we still need to modify it for vectors
372 printCallArgs.resize(1);
373
374 // search through the format string for special processing
375 size_t pos = 0;
376 std::string tempStr(printStr);
377 pos = tempStr.find('%', pos);
378 auto v = printArgs.begin();
379
380 while ((pos != std::string::npos) && (v != printArgs.end()))
381 {
382 Value* pArg = *v;
383 Type* pType = pArg->getType();
384
385 if (pType->isVectorTy())
386 {
387 Type* pContainedType = pType->getContainedType(0);
388
389 if (toupper(tempStr[pos + 1]) == 'X')
390 {
391 tempStr[pos] = '0';
392 tempStr[pos + 1] = 'x';
393 tempStr.insert(pos + 2, "%08X ");
394 pos += 7;
395
396 printCallArgs.push_back(VEXTRACT(pArg, C(0)));
397
398 std::string vectorFormatStr;
399 for (uint32_t i = 1; i < pType->getVectorNumElements(); ++i)
400 {
401 vectorFormatStr += "0x%08X ";
402 printCallArgs.push_back(VEXTRACT(pArg, C(i)));
403 }
404
405 tempStr.insert(pos, vectorFormatStr);
406 pos += vectorFormatStr.size();
407 }
408 else if ((tempStr[pos + 1] == 'f') && (pContainedType->isFloatTy()))
409 {
410 uint32_t i = 0;
411 for (; i < (pArg->getType()->getVectorNumElements()) - 1; i++)
412 {
413 tempStr.insert(pos, std::string("%f "));
414 pos += 3;
415 printCallArgs.push_back(FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
416 }
417 printCallArgs.push_back(FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
418 }
419 else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy()))
420 {
421 uint32_t i = 0;
422 for (; i < (pArg->getType()->getVectorNumElements()) - 1; i++)
423 {
424 tempStr.insert(pos, std::string("%d "));
425 pos += 3;
426 printCallArgs.push_back(VEXTRACT(pArg, C(i)));
427 }
428 printCallArgs.push_back(VEXTRACT(pArg, C(i)));
429 }
430 }
431 else
432 {
433 if (toupper(tempStr[pos + 1]) == 'X')
434 {
435 tempStr[pos] = '0';
436 tempStr.insert(pos + 1, "x%08");
437 printCallArgs.push_back(pArg);
438 pos += 3;
439 }
440 // for %f we need to cast float Values to doubles so that they print out correctly
441 else if ((tempStr[pos + 1] == 'f') && (pType->isFloatTy()))
442 {
443 printCallArgs.push_back(FP_EXT(pArg, Type::getDoubleTy(JM()->mContext)));
444 pos++;
445 }
446 else
447 {
448 printCallArgs.push_back(pArg);
449 }
450 }
451
452 // advance to the next arguement
453 v++;
454 pos = tempStr.find('%', ++pos);
455 }
456
457 // create global variable constant string
458 Constant *constString = ConstantDataArray::getString(JM()->mContext,tempStr,true);
459 GlobalVariable *gvPtr = new GlobalVariable(constString->getType(),true,GlobalValue::InternalLinkage,constString,"printStr");
460 JM()->mpCurrentModule->getGlobalList().push_back(gvPtr);
461
462 // get a pointer to the first character in the constant string array
463 std::vector<Constant*> geplist{C(0),C(0)};
464 Constant *strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr,geplist,false);
465
466 // insert the pointer to the format string in the argument vector
467 printCallArgs[0] = strGEP;
468
469 // get pointer to CallPrint function and insert decl into the module if needed
470 std::vector<Type*> args;
471 args.push_back(PointerType::get(mInt8Ty,0));
472 FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext),args,true);
473 Function *callPrintFn = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
474
475 // if we haven't yet added the symbol to the symbol table
476 if((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
477 {
478 sys::DynamicLibrary::AddSymbol("CallPrint", (void *)&CallPrint);
479 }
480
481 // insert a call to CallPrint
482 return CALLA(callPrintFn,printCallArgs);
483 }
484
485 //////////////////////////////////////////////////////////////////////////
486 /// @brief Wrapper around PRINT with initializer list.
487 CallInst* Builder::PRINT(const std::string &printStr)
488 {
489 return PRINT(printStr, {});
490 }
491
492 Value *Builder::EXTRACT_16(Value *x, uint32_t imm)
493 {
494 if (imm == 0)
495 {
496 return VSHUFFLE(x, UndefValue::get(x->getType()), { 0, 1, 2, 3, 4, 5, 6, 7 });
497 }
498 else
499 {
500 return VSHUFFLE(x, UndefValue::get(x->getType()), { 8, 9, 10, 11, 12, 13, 14, 15 });
501 }
502 }
503
504 Value *Builder::JOIN_16(Value *a, Value *b)
505 {
506 return VSHUFFLE(a, b, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
507 }
508
509 //////////////////////////////////////////////////////////////////////////
510 /// @brief convert x86 <N x float> mask to llvm <N x i1> mask
511 Value *Builder::MASK(Value *vmask)
512 {
513 Value *src = BITCAST(vmask, mSimdInt32Ty);
514 return ICMP_SLT(src, VIMMED1(0));
515 }
516
517 Value *Builder::MASK_16(Value *vmask)
518 {
519 Value *src = BITCAST(vmask, mSimd16Int32Ty);
520 return ICMP_SLT(src, VIMMED1_16(0));
521 }
522
523 //////////////////////////////////////////////////////////////////////////
524 /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask
525 Value *Builder::VMASK(Value *mask)
526 {
527 return S_EXT(mask, mSimdInt32Ty);
528 }
529
530 Value *Builder::VMASK_16(Value *mask)
531 {
532 return S_EXT(mask, mSimd16Int32Ty);
533 }
534
535 //////////////////////////////////////////////////////////////////////////
536 /// @brief Generate a VPSHUFB operation in LLVM IR. If not
537 /// supported on the underlying platform, emulate it
538 /// @param a - 256bit SIMD(32x8bit) of 8bit integer values
539 /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values
540 /// Byte masks in lower 128 lane of b selects 8 bit values from lower
541 /// 128bits of a, and vice versa for the upper lanes. If the mask
542 /// value is negative, '0' is inserted.
543 Value *Builder::PSHUFB(Value* a, Value* b)
544 {
545 Value* res;
546 // use avx2 pshufb instruction if available
547 if(JM()->mArch.AVX2())
548 {
549 res = VPSHUFB(a, b);
550 }
551 else
552 {
553 Constant* cB = dyn_cast<Constant>(b);
554 // number of 8 bit elements in b
555 uint32_t numElms = cast<VectorType>(cB->getType())->getNumElements();
556 // output vector
557 Value* vShuf = UndefValue::get(VectorType::get(mInt8Ty, numElms));
558
559 // insert an 8 bit value from the high and low lanes of a per loop iteration
560 numElms /= 2;
561 for(uint32_t i = 0; i < numElms; i++)
562 {
563 ConstantInt* cLow128b = cast<ConstantInt>(cB->getAggregateElement(i));
564 ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms));
565
566 // extract values from constant mask
567 char valLow128bLane = (char)(cLow128b->getSExtValue());
568 char valHigh128bLane = (char)(cHigh128b->getSExtValue());
569
570 Value* insertValLow128b;
571 Value* insertValHigh128b;
572
573 // if the mask value is negative, insert a '0' in the respective output position
574 // otherwise, lookup the value at mask position (bits 3..0 of the respective mask byte) in a and insert in output vector
575 insertValLow128b = (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
576 insertValHigh128b = (valHigh128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
577
578 vShuf = VINSERT(vShuf, insertValLow128b, i);
579 vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms));
580 }
581 res = vShuf;
582 }
583 return res;
584 }
585
586 //////////////////////////////////////////////////////////////////////////
587 /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
588 /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
589 /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only
590 /// lower 8 values are used.
591 Value *Builder::PMOVSXBD(Value* a)
592 {
593 // VPMOVSXBD output type
594 Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
595 // Extract 8 values from 128bit lane and sign extend
596 return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
597 }
598
599 //////////////////////////////////////////////////////////////////////////
600 /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
601 /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
602 /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
603 Value *Builder::PMOVSXWD(Value* a)
604 {
605 // VPMOVSXWD output type
606 Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
607 // Extract 8 values from 128bit lane and sign extend
608 return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
609 }
610
611 //////////////////////////////////////////////////////////////////////////
612 /// @brief Generate a VPERMD operation (shuffle 32 bit integer values
613 /// across 128 bit lanes) in LLVM IR. If not supported on the underlying
614 /// platform, emulate it
615 /// @param a - 256bit SIMD lane(8x32bit) of integer values.
616 /// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
617 Value *Builder::PERMD(Value* a, Value* idx)
618 {
619 Value* res;
620 // use avx2 permute instruction if available
621 if(JM()->mArch.AVX2())
622 {
623 res = VPERMD(a, idx);
624 }
625 else
626 {
627 if (isa<Constant>(idx))
628 {
629 res = VSHUFFLE(a, a, idx);
630 }
631 else
632 {
633 res = VUNDEF_I();
634 for (uint32_t l = 0; l < JM()->mVWidth; ++l)
635 {
636 Value* pIndex = VEXTRACT(idx, C(l));
637 Value* pVal = VEXTRACT(a, pIndex);
638 res = VINSERT(res, pVal, C(l));
639 }
640 }
641 }
642 return res;
643 }
644
645 //////////////////////////////////////////////////////////////////////////
646 /// @brief Generate a VPERMPS operation (shuffle 32 bit float values
647 /// across 128 bit lanes) in LLVM IR. If not supported on the underlying
648 /// platform, emulate it
649 /// @param a - 256bit SIMD lane(8x32bit) of float values.
650 /// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
651 Value *Builder::PERMPS(Value* a, Value* idx)
652 {
653 Value* res;
654 // use avx2 permute instruction if available
655 if (JM()->mArch.AVX2())
656 {
657 // llvm 3.6.0 swapped the order of the args to vpermd
658 res = VPERMPS(idx, a);
659 }
660 else
661 {
662 if (isa<Constant>(idx))
663 {
664 res = VSHUFFLE(a, a, idx);
665 }
666 else
667 {
668 res = VUNDEF_F();
669 for (uint32_t l = 0; l < JM()->mVWidth; ++l)
670 {
671 Value* pIndex = VEXTRACT(idx, C(l));
672 Value* pVal = VEXTRACT(a, pIndex);
673 res = VINSERT(res, pVal, C(l));
674 }
675 }
676 }
677
678 return res;
679 }
680
681 //////////////////////////////////////////////////////////////////////////
682 /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
683 /// in LLVM IR. If not supported on the underlying platform, emulate it
684 /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
685 Value *Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
686 {
687 if (JM()->mArch.F16C())
688 {
689 return VCVTPH2PS(a, name);
690 }
691 else
692 {
693 FunctionType* pFuncTy = FunctionType::get(mFP32Ty, mInt16Ty);
694 Function* pCvtPh2Ps = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy));
695
696 if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr)
697 {
698 sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32", (void *)&ConvertFloat16ToFloat32);
699 }
700
701 Value* pResult = UndefValue::get(mSimdFP32Ty);
702 for (uint32_t i = 0; i < mVWidth; ++i)
703 {
704 Value* pSrc = VEXTRACT(a, C(i));
705 Value* pConv = CALL(pCvtPh2Ps, std::initializer_list<Value*>{pSrc});
706 pResult = VINSERT(pResult, pConv, C(i));
707 }
708
709 pResult->setName(name);
710 return pResult;
711 }
712 }
713
714 //////////////////////////////////////////////////////////////////////////
715 /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion)
716 /// in LLVM IR. If not supported on the underlying platform, emulate it
717 /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
718 Value *Builder::CVTPS2PH(Value* a, Value* rounding)
719 {
720 if (JM()->mArch.F16C())
721 {
722 return VCVTPS2PH(a, rounding);
723 }
724 else
725 {
726 // call scalar C function for now
727 FunctionType* pFuncTy = FunctionType::get(mInt16Ty, mFP32Ty);
728 Function* pCvtPs2Ph = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
729
730 if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr)
731 {
732 sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16", (void *)&ConvertFloat32ToFloat16);
733 }
734
735 Value* pResult = UndefValue::get(mSimdInt16Ty);
736 for (uint32_t i = 0; i < mVWidth; ++i)
737 {
738 Value* pSrc = VEXTRACT(a, C(i));
739 Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
740 pResult = VINSERT(pResult, pConv, C(i));
741 }
742
743 return pResult;
744 }
745 }
746
747 Value *Builder::PMAXSD(Value* a, Value* b)
748 {
749 Value* cmp = ICMP_SGT(a, b);
750 return SELECT(cmp, a, b);
751 }
752
753 Value *Builder::PMINSD(Value* a, Value* b)
754 {
755 Value* cmp = ICMP_SLT(a, b);
756 return SELECT(cmp, a, b);
757 }
758
759 // Helper function to create alloca in entry block of function
760 Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
761 {
762 auto saveIP = IRB()->saveIP();
763 IRB()->SetInsertPoint(&pFunc->getEntryBlock(),
764 pFunc->getEntryBlock().begin());
765 Value* pAlloca = ALLOCA(pType);
766 if (saveIP.isSet()) IRB()->restoreIP(saveIP);
767 return pAlloca;
768 }
769
770 Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize)
771 {
772 auto saveIP = IRB()->saveIP();
773 IRB()->SetInsertPoint(&pFunc->getEntryBlock(),
774 pFunc->getEntryBlock().begin());
775 Value* pAlloca = ALLOCA(pType, pArraySize);
776 if (saveIP.isSet()) IRB()->restoreIP(saveIP);
777 return pAlloca;
778 }
779
780 Value* Builder::VABSPS(Value* a)
781 {
782 Value* asInt = BITCAST(a, mSimdInt32Ty);
783 Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty);
784 return result;
785 }
786
787 Value *Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
788 {
789 Value *lowCmp = ICMP_SLT(src, low);
790 Value *ret = SELECT(lowCmp, low, src);
791
792 Value *highCmp = ICMP_SGT(ret, high);
793 ret = SELECT(highCmp, high, ret, name);
794
795 return ret;
796 }
797
798 Value *Builder::FCLAMP(Value* src, Value* low, Value* high)
799 {
800 Value *lowCmp = FCMP_OLT(src, low);
801 Value *ret = SELECT(lowCmp, low, src);
802
803 Value *highCmp = FCMP_OGT(ret, high);
804 ret = SELECT(highCmp, high, ret);
805
806 return ret;
807 }
808
809 Value *Builder::FCLAMP(Value* src, float low, float high)
810 {
811 Value* result = VMAXPS(src, VIMMED1(low));
812 result = VMINPS(result, VIMMED1(high));
813
814 return result;
815 }
816
817 Value *Builder::FMADDPS(Value* a, Value* b, Value* c)
818 {
819 Value* vOut;
820 // use FMADs if available
821 if(JM()->mArch.AVX2())
822 {
823 vOut = VFMADDPS(a, b, c);
824 }
825 else
826 {
827 vOut = FADD(FMUL(a, b), c);
828 }
829 return vOut;
830 }
831
832 Value* Builder::POPCNT(Value* a)
833 {
834 Function* pCtPop = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::ctpop, { a->getType() });
835 return CALL(pCtPop, std::initializer_list<Value*>{a});
836 }
837
838 //////////////////////////////////////////////////////////////////////////
839 /// @brief C functions called by LLVM IR
840 //////////////////////////////////////////////////////////////////////////
841
842 //////////////////////////////////////////////////////////////////////////
843 /// @brief called in JIT code, inserted by PRINT
844 /// output to both stdout and visual studio debug console
845 void __cdecl CallPrint(const char* fmt, ...)
846 {
847 va_list args;
848 va_start(args, fmt);
849 vprintf(fmt, args);
850
851 #if defined( _WIN32 )
852 char strBuf[1024];
853 vsnprintf_s(strBuf, _TRUNCATE, fmt, args);
854 OutputDebugStringA(strBuf);
855 #endif
856
857 va_end(args);
858 }
859
860 Value *Builder::VEXTRACTI128(Value* a, Constant* imm8)
861 {
862 bool flag = !imm8->isZeroValue();
863 SmallVector<Constant*,8> idx;
864 for (unsigned i = 0; i < mVWidth / 2; i++) {
865 idx.push_back(C(flag ? i + mVWidth / 2 : i));
866 }
867 return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
868 }
869
870 Value *Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
871 {
872 bool flag = !imm8->isZeroValue();
873 SmallVector<Constant*,8> idx;
874 for (unsigned i = 0; i < mVWidth; i++) {
875 idx.push_back(C(i));
876 }
877 Value *inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
878
879 SmallVector<Constant*,8> idx2;
880 for (unsigned i = 0; i < mVWidth / 2; i++) {
881 idx2.push_back(C(flag ? i : i + mVWidth));
882 }
883 for (unsigned i = mVWidth / 2; i < mVWidth; i++) {
884 idx2.push_back(C(flag ? i + mVWidth / 2 : i));
885 }
886 return VSHUFFLE(a, inter, ConstantVector::get(idx2));
887 }
888
889 // rdtsc buckets macros
890 void Builder::RDTSC_START(Value* pBucketMgr, Value* pId)
891 {
892 // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
893 // buckets framework when single threaded
894 if (KNOB_SINGLE_THREADED)
895 {
896 std::vector<Type*> args{
897 PointerType::get(mInt32Ty, 0), // pBucketMgr
898 mInt32Ty // id
899 };
900
901 FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
902 Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
903 if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") == nullptr)
904 {
905 sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket", (void*)&BucketManager_StartBucket);
906 }
907
908 CALL(pFunc, { pBucketMgr, pId });
909 }
910 }
911
912 void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId)
913 {
914 // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
915 // buckets framework when single threaded
916 if (KNOB_SINGLE_THREADED)
917 {
918 std::vector<Type*> args{
919 PointerType::get(mInt32Ty, 0), // pBucketMgr
920 mInt32Ty // id
921 };
922
923 FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
924 Function* pFunc = cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
925 if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") == nullptr)
926 {
927 sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket", (void*)&BucketManager_StopBucket);
928 }
929
930 CALL(pFunc, { pBucketMgr, pId });
931 }
932 }
933
934 uint32_t Builder::GetTypeSize(Type* pType)
935 {
936 if (pType->isStructTy())
937 {
938 uint32_t numElems = pType->getStructNumElements();
939 Type* pElemTy = pType->getStructElementType(0);
940 return numElems * GetTypeSize(pElemTy);
941 }
942
943 if (pType->isArrayTy())
944 {
945 uint32_t numElems = pType->getArrayNumElements();
946 Type* pElemTy = pType->getArrayElementType();
947 return numElems * GetTypeSize(pElemTy);
948 }
949
950 if (pType->isIntegerTy())
951 {
952 uint32_t bitSize = pType->getIntegerBitWidth();
953 return bitSize / 8;
954 }
955
956 if (pType->isFloatTy())
957 {
958 return 4;
959 }
960
961 if (pType->isHalfTy())
962 {
963 return 2;
964 }
965
966 if (pType->isDoubleTy())
967 {
968 return 8;
969 }
970
971 SWR_ASSERT(false, "Unimplemented type.");
972 return 0;
973 }
974 }