1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * @file builder_misc.cpp
25 * @brief Implementation for miscellaneous builder functions
29 ******************************************************************************/
30 #include "jit_pch.hpp"
32 #include "common/rdtsc_buckets.h"
38 void __cdecl
CallPrint(const char* fmt
, ...);
40 //////////////////////////////////////////////////////////////////////////
41 /// @brief Convert an IEEE 754 32-bit single precision float to an
42 /// 16 bit float with 5 exponent bits and a variable
43 /// number of mantissa bits.
44 /// @param val - 32-bit float
45 /// @todo Maybe move this outside of this file into a header?
46 static uint16_t ConvertFloat32ToFloat16(float val
)
48 uint32_t sign
, exp
, mant
;
51 // Extract the sign, exponent, and mantissa
52 uint32_t uf
= *(uint32_t*)&val
;
53 sign
= (uf
& 0x80000000) >> 31;
54 exp
= (uf
& 0x7F800000) >> 23;
55 mant
= uf
& 0x007FFFFF;
57 // Check for out of range
62 sign
= 1; // set the sign bit for NANs
64 else if (std::isinf(val
))
69 else if (exp
> (0x70 + 0x1E)) // Too big to represent -> max representable value
74 else if ((exp
<= 0x70) && (exp
>= 0x66)) // It's a denorm
77 for (; exp
<= 0x70; mant
>>= 1, exp
++)
82 else if (exp
< 0x66) // Too small to represent -> Zero
89 // Saves bits that will be shifted off for rounding
90 roundBits
= mant
& 0x1FFFu
;
91 // convert exponent and mantissa to 16 bit format
95 // Essentially RTZ, but round up if off by only 1 lsb
96 if (roundBits
== 0x1FFFu
)
100 if ((mant
& 0xC00u
) != 0)
102 // make sure only the needed bits are used
107 uint32_t tmpVal
= (sign
<< 15) | (exp
<< 10) | mant
;
108 return (uint16_t)tmpVal
;
111 //////////////////////////////////////////////////////////////////////////
112 /// @brief Convert an IEEE 754 16-bit float to an 32-bit single precision
114 /// @param val - 16-bit float
115 /// @todo Maybe move this outside of this file into a header?
116 static float ConvertFloat16ToFloat32(uint32_t val
)
119 if ((val
& 0x7fff) == 0)
121 result
= ((uint32_t)(val
& 0x8000)) << 16;
123 else if ((val
& 0x7c00) == 0x7c00)
125 result
= ((val
& 0x3ff) == 0) ? 0x7f800000 : 0x7fc00000;
126 result
|= ((uint32_t)val
& 0x8000) << 16;
130 uint32_t sign
= (val
& 0x8000) << 16;
131 uint32_t mant
= (val
& 0x3ff) << 13;
132 uint32_t exp
= (val
>> 10) & 0x1f;
133 if ((exp
== 0) && (mant
!= 0)) // Adjust exponent and mantissa for denormals
136 while (mant
< (0x400 << 13))
141 mant
&= (0x3ff << 13);
143 exp
= ((exp
- 15 + 127) & 0xff) << 23;
144 result
= sign
| exp
| mant
;
147 return *(float*)&result
;
150 Constant
*Builder::C(bool i
)
152 return ConstantInt::get(IRB()->getInt1Ty(), (i
? 1 : 0));
155 Constant
*Builder::C(char i
)
157 return ConstantInt::get(IRB()->getInt8Ty(), i
);
160 Constant
*Builder::C(uint8_t i
)
162 return ConstantInt::get(IRB()->getInt8Ty(), i
);
165 Constant
*Builder::C(int i
)
167 return ConstantInt::get(IRB()->getInt32Ty(), i
);
170 Constant
*Builder::C(int64_t i
)
172 return ConstantInt::get(IRB()->getInt64Ty(), i
);
175 Constant
*Builder::C(uint16_t i
)
177 return ConstantInt::get(mInt16Ty
,i
);
180 Constant
*Builder::C(uint32_t i
)
182 return ConstantInt::get(IRB()->getInt32Ty(), i
);
185 Constant
*Builder::C(float i
)
187 return ConstantFP::get(IRB()->getFloatTy(), i
);
190 Constant
*Builder::PRED(bool pred
)
192 return ConstantInt::get(IRB()->getInt1Ty(), (pred
? 1 : 0));
195 Value
*Builder::VIMMED1(int i
)
197 return ConstantVector::getSplat(mVWidth
, cast
<ConstantInt
>(C(i
)));
200 Value
*Builder::VIMMED1_16(int i
)
202 return ConstantVector::getSplat(mVWidth16
, cast
<ConstantInt
>(C(i
)));
205 Value
*Builder::VIMMED1(uint32_t i
)
207 return ConstantVector::getSplat(mVWidth
, cast
<ConstantInt
>(C(i
)));
210 Value
*Builder::VIMMED1_16(uint32_t i
)
212 return ConstantVector::getSplat(mVWidth16
, cast
<ConstantInt
>(C(i
)));
215 Value
*Builder::VIMMED1(float i
)
217 return ConstantVector::getSplat(mVWidth
, cast
<ConstantFP
>(C(i
)));
220 Value
*Builder::VIMMED1_16(float i
)
222 return ConstantVector::getSplat(mVWidth16
, cast
<ConstantFP
>(C(i
)));
225 Value
*Builder::VIMMED1(bool i
)
227 return ConstantVector::getSplat(mVWidth
, cast
<ConstantInt
>(C(i
)));
230 Value
*Builder::VIMMED1_16(bool i
)
232 return ConstantVector::getSplat(mVWidth16
, cast
<ConstantInt
>(C(i
)));
235 Value
*Builder::VUNDEF_IPTR()
237 return UndefValue::get(VectorType::get(mInt32PtrTy
,mVWidth
));
240 Value
*Builder::VUNDEF(Type
* t
)
242 return UndefValue::get(VectorType::get(t
, mVWidth
));
245 Value
*Builder::VUNDEF_I()
247 return UndefValue::get(VectorType::get(mInt32Ty
, mVWidth
));
250 Value
*Builder::VUNDEF_I_16()
252 return UndefValue::get(VectorType::get(mInt32Ty
, mVWidth16
));
255 Value
*Builder::VUNDEF_F()
257 return UndefValue::get(VectorType::get(mFP32Ty
, mVWidth
));
260 Value
*Builder::VUNDEF_F_16()
262 return UndefValue::get(VectorType::get(mFP32Ty
, mVWidth16
));
265 Value
*Builder::VUNDEF(Type
*ty
, uint32_t size
)
267 return UndefValue::get(VectorType::get(ty
, size
));
270 Value
*Builder::VBROADCAST(Value
*src
, const llvm::Twine
& name
)
272 // check if src is already a vector
273 if (src
->getType()->isVectorTy())
278 return VECTOR_SPLAT(mVWidth
, src
, name
);
281 Value
*Builder::VBROADCAST_16(Value
*src
)
283 // check if src is already a vector
284 if (src
->getType()->isVectorTy())
289 return VECTOR_SPLAT(mVWidth16
, src
);
292 uint32_t Builder::IMMED(Value
* v
)
294 SWR_ASSERT(isa
<ConstantInt
>(v
));
295 ConstantInt
*pValConst
= cast
<ConstantInt
>(v
);
296 return pValConst
->getZExtValue();
299 int32_t Builder::S_IMMED(Value
* v
)
301 SWR_ASSERT(isa
<ConstantInt
>(v
));
302 ConstantInt
*pValConst
= cast
<ConstantInt
>(v
);
303 return pValConst
->getSExtValue();
306 CallInst
*Builder::CALL(Value
*Callee
, const std::initializer_list
<Value
*> &argsList
, const llvm::Twine
& name
)
308 std::vector
<Value
*> args
;
309 for (auto arg
: argsList
)
311 return CALLA(Callee
, args
, name
);
314 CallInst
*Builder::CALL(Value
*Callee
, Value
* arg
)
316 std::vector
<Value
*> args
;
318 return CALLA(Callee
, args
);
321 CallInst
*Builder::CALL2(Value
*Callee
, Value
* arg1
, Value
* arg2
)
323 std::vector
<Value
*> args
;
324 args
.push_back(arg1
);
325 args
.push_back(arg2
);
326 return CALLA(Callee
, args
);
329 CallInst
*Builder::CALL3(Value
*Callee
, Value
* arg1
, Value
* arg2
, Value
* arg3
)
331 std::vector
<Value
*> args
;
332 args
.push_back(arg1
);
333 args
.push_back(arg2
);
334 args
.push_back(arg3
);
335 return CALLA(Callee
, args
);
338 //////////////////////////////////////////////////////////////////////////
339 Value
*Builder::DEBUGTRAP()
341 Function
*func
= Intrinsic::getDeclaration(JM()->mpCurrentModule
, Intrinsic::debugtrap
);
345 Value
*Builder::VRCP(Value
*va
, const llvm::Twine
& name
)
347 return FDIV(VIMMED1(1.0f
), va
, name
); // 1 / a
350 Value
*Builder::VPLANEPS(Value
* vA
, Value
* vB
, Value
* vC
, Value
* &vX
, Value
* &vY
)
352 Value
* vOut
= FMADDPS(vA
, vX
, vC
);
353 vOut
= FMADDPS(vB
, vY
, vOut
);
357 //////////////////////////////////////////////////////////////////////////
358 /// @brief insert a JIT call to CallPrint
359 /// - outputs formatted string to both stdout and VS output window
360 /// - DEBUG builds only
362 /// PRINT("index %d = 0x%p\n",{C(lane), pIndex});
363 /// where C(lane) creates a constant value to print, and pIndex is the Value*
364 /// result from a GEP, printing out the pointer to memory
365 /// @param printStr - constant string to print, which includes format specifiers
366 /// @param printArgs - initializer list of Value*'s to print to std out
367 CallInst
*Builder::PRINT(const std::string
&printStr
,const std::initializer_list
<Value
*> &printArgs
)
369 // push the arguments to CallPrint into a vector
370 std::vector
<Value
*> printCallArgs
;
371 // save room for the format string. we still need to modify it for vectors
372 printCallArgs
.resize(1);
374 // search through the format string for special processing
376 std::string
tempStr(printStr
);
377 pos
= tempStr
.find('%', pos
);
378 auto v
= printArgs
.begin();
380 while ((pos
!= std::string::npos
) && (v
!= printArgs
.end()))
383 Type
* pType
= pArg
->getType();
385 if (pType
->isVectorTy())
387 Type
* pContainedType
= pType
->getContainedType(0);
389 if (toupper(tempStr
[pos
+ 1]) == 'X')
392 tempStr
[pos
+ 1] = 'x';
393 tempStr
.insert(pos
+ 2, "%08X ");
396 printCallArgs
.push_back(VEXTRACT(pArg
, C(0)));
398 std::string vectorFormatStr
;
399 for (uint32_t i
= 1; i
< pType
->getVectorNumElements(); ++i
)
401 vectorFormatStr
+= "0x%08X ";
402 printCallArgs
.push_back(VEXTRACT(pArg
, C(i
)));
405 tempStr
.insert(pos
, vectorFormatStr
);
406 pos
+= vectorFormatStr
.size();
408 else if ((tempStr
[pos
+ 1] == 'f') && (pContainedType
->isFloatTy()))
411 for (; i
< (pArg
->getType()->getVectorNumElements()) - 1; i
++)
413 tempStr
.insert(pos
, std::string("%f "));
415 printCallArgs
.push_back(FP_EXT(VEXTRACT(pArg
, C(i
)), Type::getDoubleTy(JM()->mContext
)));
417 printCallArgs
.push_back(FP_EXT(VEXTRACT(pArg
, C(i
)), Type::getDoubleTy(JM()->mContext
)));
419 else if ((tempStr
[pos
+ 1] == 'd') && (pContainedType
->isIntegerTy()))
422 for (; i
< (pArg
->getType()->getVectorNumElements()) - 1; i
++)
424 tempStr
.insert(pos
, std::string("%d "));
426 printCallArgs
.push_back(VEXTRACT(pArg
, C(i
)));
428 printCallArgs
.push_back(VEXTRACT(pArg
, C(i
)));
433 if (toupper(tempStr
[pos
+ 1]) == 'X')
436 tempStr
.insert(pos
+ 1, "x%08");
437 printCallArgs
.push_back(pArg
);
440 // for %f we need to cast float Values to doubles so that they print out correctly
441 else if ((tempStr
[pos
+ 1] == 'f') && (pType
->isFloatTy()))
443 printCallArgs
.push_back(FP_EXT(pArg
, Type::getDoubleTy(JM()->mContext
)));
448 printCallArgs
.push_back(pArg
);
452 // advance to the next arguement
454 pos
= tempStr
.find('%', ++pos
);
457 // create global variable constant string
458 Constant
*constString
= ConstantDataArray::getString(JM()->mContext
,tempStr
,true);
459 GlobalVariable
*gvPtr
= new GlobalVariable(constString
->getType(),true,GlobalValue::InternalLinkage
,constString
,"printStr");
460 JM()->mpCurrentModule
->getGlobalList().push_back(gvPtr
);
462 // get a pointer to the first character in the constant string array
463 std::vector
<Constant
*> geplist
{C(0),C(0)};
464 Constant
*strGEP
= ConstantExpr::getGetElementPtr(nullptr, gvPtr
,geplist
,false);
466 // insert the pointer to the format string in the argument vector
467 printCallArgs
[0] = strGEP
;
469 // get pointer to CallPrint function and insert decl into the module if needed
470 std::vector
<Type
*> args
;
471 args
.push_back(PointerType::get(mInt8Ty
,0));
472 FunctionType
* callPrintTy
= FunctionType::get(Type::getVoidTy(JM()->mContext
),args
,true);
473 Function
*callPrintFn
= cast
<Function
>(JM()->mpCurrentModule
->getOrInsertFunction("CallPrint", callPrintTy
));
475 // if we haven't yet added the symbol to the symbol table
476 if((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
478 sys::DynamicLibrary::AddSymbol("CallPrint", (void *)&CallPrint
);
481 // insert a call to CallPrint
482 return CALLA(callPrintFn
,printCallArgs
);
485 //////////////////////////////////////////////////////////////////////////
486 /// @brief Wrapper around PRINT with initializer list.
487 CallInst
* Builder::PRINT(const std::string
&printStr
)
489 return PRINT(printStr
, {});
492 Value
*Builder::EXTRACT_16(Value
*x
, uint32_t imm
)
496 return VSHUFFLE(x
, UndefValue::get(x
->getType()), { 0, 1, 2, 3, 4, 5, 6, 7 });
500 return VSHUFFLE(x
, UndefValue::get(x
->getType()), { 8, 9, 10, 11, 12, 13, 14, 15 });
504 Value
*Builder::JOIN_16(Value
*a
, Value
*b
)
506 return VSHUFFLE(a
, b
, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 });
509 //////////////////////////////////////////////////////////////////////////
510 /// @brief convert x86 <N x float> mask to llvm <N x i1> mask
511 Value
*Builder::MASK(Value
*vmask
)
513 Value
*src
= BITCAST(vmask
, mSimdInt32Ty
);
514 return ICMP_SLT(src
, VIMMED1(0));
517 Value
*Builder::MASK_16(Value
*vmask
)
519 Value
*src
= BITCAST(vmask
, mSimd16Int32Ty
);
520 return ICMP_SLT(src
, VIMMED1_16(0));
523 //////////////////////////////////////////////////////////////////////////
524 /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask
525 Value
*Builder::VMASK(Value
*mask
)
527 return S_EXT(mask
, mSimdInt32Ty
);
530 Value
*Builder::VMASK_16(Value
*mask
)
532 return S_EXT(mask
, mSimd16Int32Ty
);
535 //////////////////////////////////////////////////////////////////////////
536 /// @brief Generate a VPSHUFB operation in LLVM IR. If not
537 /// supported on the underlying platform, emulate it
538 /// @param a - 256bit SIMD(32x8bit) of 8bit integer values
539 /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values
540 /// Byte masks in lower 128 lane of b selects 8 bit values from lower
541 /// 128bits of a, and vice versa for the upper lanes. If the mask
542 /// value is negative, '0' is inserted.
543 Value
*Builder::PSHUFB(Value
* a
, Value
* b
)
546 // use avx2 pshufb instruction if available
547 if(JM()->mArch
.AVX2())
553 Constant
* cB
= dyn_cast
<Constant
>(b
);
554 // number of 8 bit elements in b
555 uint32_t numElms
= cast
<VectorType
>(cB
->getType())->getNumElements();
557 Value
* vShuf
= UndefValue::get(VectorType::get(mInt8Ty
, numElms
));
559 // insert an 8 bit value from the high and low lanes of a per loop iteration
561 for(uint32_t i
= 0; i
< numElms
; i
++)
563 ConstantInt
* cLow128b
= cast
<ConstantInt
>(cB
->getAggregateElement(i
));
564 ConstantInt
* cHigh128b
= cast
<ConstantInt
>(cB
->getAggregateElement(i
+ numElms
));
566 // extract values from constant mask
567 char valLow128bLane
= (char)(cLow128b
->getSExtValue());
568 char valHigh128bLane
= (char)(cHigh128b
->getSExtValue());
570 Value
* insertValLow128b
;
571 Value
* insertValHigh128b
;
573 // if the mask value is negative, insert a '0' in the respective output position
574 // otherwise, lookup the value at mask position (bits 3..0 of the respective mask byte) in a and insert in output vector
575 insertValLow128b
= (valLow128bLane
< 0) ? C((char)0) : VEXTRACT(a
, C((valLow128bLane
& 0xF)));
576 insertValHigh128b
= (valHigh128bLane
< 0) ? C((char)0) : VEXTRACT(a
, C((valHigh128bLane
& 0xF) + numElms
));
578 vShuf
= VINSERT(vShuf
, insertValLow128b
, i
);
579 vShuf
= VINSERT(vShuf
, insertValHigh128b
, (i
+ numElms
));
586 //////////////////////////////////////////////////////////////////////////
587 /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
588 /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
589 /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values. Only
590 /// lower 8 values are used.
591 Value
*Builder::PMOVSXBD(Value
* a
)
593 // VPMOVSXBD output type
594 Type
* v8x32Ty
= VectorType::get(mInt32Ty
, 8);
595 // Extract 8 values from 128bit lane and sign extend
596 return S_EXT(VSHUFFLE(a
, a
, C
<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty
);
599 //////////////////////////////////////////////////////////////////////////
600 /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
601 /// bits)in LLVM IR. If not supported on the underlying platform, emulate it
602 /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
603 Value
*Builder::PMOVSXWD(Value
* a
)
605 // VPMOVSXWD output type
606 Type
* v8x32Ty
= VectorType::get(mInt32Ty
, 8);
607 // Extract 8 values from 128bit lane and sign extend
608 return S_EXT(VSHUFFLE(a
, a
, C
<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty
);
611 //////////////////////////////////////////////////////////////////////////
612 /// @brief Generate a VPERMD operation (shuffle 32 bit integer values
613 /// across 128 bit lanes) in LLVM IR. If not supported on the underlying
614 /// platform, emulate it
615 /// @param a - 256bit SIMD lane(8x32bit) of integer values.
616 /// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
617 Value
*Builder::PERMD(Value
* a
, Value
* idx
)
620 // use avx2 permute instruction if available
621 if(JM()->mArch
.AVX2())
623 res
= VPERMD(a
, idx
);
627 if (isa
<Constant
>(idx
))
629 res
= VSHUFFLE(a
, a
, idx
);
634 for (uint32_t l
= 0; l
< JM()->mVWidth
; ++l
)
636 Value
* pIndex
= VEXTRACT(idx
, C(l
));
637 Value
* pVal
= VEXTRACT(a
, pIndex
);
638 res
= VINSERT(res
, pVal
, C(l
));
645 //////////////////////////////////////////////////////////////////////////
646 /// @brief Generate a VPERMPS operation (shuffle 32 bit float values
647 /// across 128 bit lanes) in LLVM IR. If not supported on the underlying
648 /// platform, emulate it
649 /// @param a - 256bit SIMD lane(8x32bit) of float values.
650 /// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
651 Value
*Builder::PERMPS(Value
* a
, Value
* idx
)
654 // use avx2 permute instruction if available
655 if (JM()->mArch
.AVX2())
657 // llvm 3.6.0 swapped the order of the args to vpermd
658 res
= VPERMPS(idx
, a
);
662 if (isa
<Constant
>(idx
))
664 res
= VSHUFFLE(a
, a
, idx
);
669 for (uint32_t l
= 0; l
< JM()->mVWidth
; ++l
)
671 Value
* pIndex
= VEXTRACT(idx
, C(l
));
672 Value
* pVal
= VEXTRACT(a
, pIndex
);
673 res
= VINSERT(res
, pVal
, C(l
));
681 //////////////////////////////////////////////////////////////////////////
682 /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
683 /// in LLVM IR. If not supported on the underlying platform, emulate it
684 /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
685 Value
*Builder::CVTPH2PS(Value
* a
, const llvm::Twine
& name
)
687 if (JM()->mArch
.F16C())
689 return VCVTPH2PS(a
, name
);
693 FunctionType
* pFuncTy
= FunctionType::get(mFP32Ty
, mInt16Ty
);
694 Function
* pCvtPh2Ps
= cast
<Function
>(JM()->mpCurrentModule
->getOrInsertFunction("ConvertFloat16ToFloat32", pFuncTy
));
696 if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat16ToFloat32") == nullptr)
698 sys::DynamicLibrary::AddSymbol("ConvertFloat16ToFloat32", (void *)&ConvertFloat16ToFloat32
);
701 Value
* pResult
= UndefValue::get(mSimdFP32Ty
);
702 for (uint32_t i
= 0; i
< mVWidth
; ++i
)
704 Value
* pSrc
= VEXTRACT(a
, C(i
));
705 Value
* pConv
= CALL(pCvtPh2Ps
, std::initializer_list
<Value
*>{pSrc
});
706 pResult
= VINSERT(pResult
, pConv
, C(i
));
709 pResult
->setName(name
);
714 //////////////////////////////////////////////////////////////////////////
715 /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion)
716 /// in LLVM IR. If not supported on the underlying platform, emulate it
717 /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
718 Value
*Builder::CVTPS2PH(Value
* a
, Value
* rounding
)
720 if (JM()->mArch
.F16C())
722 return VCVTPS2PH(a
, rounding
);
726 // call scalar C function for now
727 FunctionType
* pFuncTy
= FunctionType::get(mInt16Ty
, mFP32Ty
);
728 Function
* pCvtPs2Ph
= cast
<Function
>(JM()->mpCurrentModule
->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy
));
730 if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr)
732 sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16", (void *)&ConvertFloat32ToFloat16
);
735 Value
* pResult
= UndefValue::get(mSimdInt16Ty
);
736 for (uint32_t i
= 0; i
< mVWidth
; ++i
)
738 Value
* pSrc
= VEXTRACT(a
, C(i
));
739 Value
* pConv
= CALL(pCvtPs2Ph
, std::initializer_list
<Value
*>{pSrc
});
740 pResult
= VINSERT(pResult
, pConv
, C(i
));
747 Value
*Builder::PMAXSD(Value
* a
, Value
* b
)
749 Value
* cmp
= ICMP_SGT(a
, b
);
750 return SELECT(cmp
, a
, b
);
753 Value
*Builder::PMINSD(Value
* a
, Value
* b
)
755 Value
* cmp
= ICMP_SLT(a
, b
);
756 return SELECT(cmp
, a
, b
);
759 // Helper function to create alloca in entry block of function
760 Value
* Builder::CreateEntryAlloca(Function
* pFunc
, Type
* pType
)
762 auto saveIP
= IRB()->saveIP();
763 IRB()->SetInsertPoint(&pFunc
->getEntryBlock(),
764 pFunc
->getEntryBlock().begin());
765 Value
* pAlloca
= ALLOCA(pType
);
766 if (saveIP
.isSet()) IRB()->restoreIP(saveIP
);
770 Value
* Builder::CreateEntryAlloca(Function
* pFunc
, Type
* pType
, Value
* pArraySize
)
772 auto saveIP
= IRB()->saveIP();
773 IRB()->SetInsertPoint(&pFunc
->getEntryBlock(),
774 pFunc
->getEntryBlock().begin());
775 Value
* pAlloca
= ALLOCA(pType
, pArraySize
);
776 if (saveIP
.isSet()) IRB()->restoreIP(saveIP
);
780 Value
* Builder::VABSPS(Value
* a
)
782 Value
* asInt
= BITCAST(a
, mSimdInt32Ty
);
783 Value
* result
= BITCAST(AND(asInt
, VIMMED1(0x7fffffff)), mSimdFP32Ty
);
787 Value
*Builder::ICLAMP(Value
* src
, Value
* low
, Value
* high
, const llvm::Twine
& name
)
789 Value
*lowCmp
= ICMP_SLT(src
, low
);
790 Value
*ret
= SELECT(lowCmp
, low
, src
);
792 Value
*highCmp
= ICMP_SGT(ret
, high
);
793 ret
= SELECT(highCmp
, high
, ret
, name
);
798 Value
*Builder::FCLAMP(Value
* src
, Value
* low
, Value
* high
)
800 Value
*lowCmp
= FCMP_OLT(src
, low
);
801 Value
*ret
= SELECT(lowCmp
, low
, src
);
803 Value
*highCmp
= FCMP_OGT(ret
, high
);
804 ret
= SELECT(highCmp
, high
, ret
);
809 Value
*Builder::FCLAMP(Value
* src
, float low
, float high
)
811 Value
* result
= VMAXPS(src
, VIMMED1(low
));
812 result
= VMINPS(result
, VIMMED1(high
));
817 Value
*Builder::FMADDPS(Value
* a
, Value
* b
, Value
* c
)
820 // use FMADs if available
821 if(JM()->mArch
.AVX2())
823 vOut
= VFMADDPS(a
, b
, c
);
827 vOut
= FADD(FMUL(a
, b
), c
);
832 Value
* Builder::POPCNT(Value
* a
)
834 Function
* pCtPop
= Intrinsic::getDeclaration(JM()->mpCurrentModule
, Intrinsic::ctpop
, { a
->getType() });
835 return CALL(pCtPop
, std::initializer_list
<Value
*>{a
});
838 //////////////////////////////////////////////////////////////////////////
839 /// @brief C functions called by LLVM IR
840 //////////////////////////////////////////////////////////////////////////
842 //////////////////////////////////////////////////////////////////////////
843 /// @brief called in JIT code, inserted by PRINT
844 /// output to both stdout and visual studio debug console
845 void __cdecl
CallPrint(const char* fmt
, ...)
851 #if defined( _WIN32 )
853 vsnprintf_s(strBuf
, _TRUNCATE
, fmt
, args
);
854 OutputDebugStringA(strBuf
);
860 Value
*Builder::VEXTRACTI128(Value
* a
, Constant
* imm8
)
862 bool flag
= !imm8
->isZeroValue();
863 SmallVector
<Constant
*,8> idx
;
864 for (unsigned i
= 0; i
< mVWidth
/ 2; i
++) {
865 idx
.push_back(C(flag
? i
+ mVWidth
/ 2 : i
));
867 return VSHUFFLE(a
, VUNDEF_I(), ConstantVector::get(idx
));
870 Value
*Builder::VINSERTI128(Value
* a
, Value
* b
, Constant
* imm8
)
872 bool flag
= !imm8
->isZeroValue();
873 SmallVector
<Constant
*,8> idx
;
874 for (unsigned i
= 0; i
< mVWidth
; i
++) {
877 Value
*inter
= VSHUFFLE(b
, VUNDEF_I(), ConstantVector::get(idx
));
879 SmallVector
<Constant
*,8> idx2
;
880 for (unsigned i
= 0; i
< mVWidth
/ 2; i
++) {
881 idx2
.push_back(C(flag
? i
: i
+ mVWidth
));
883 for (unsigned i
= mVWidth
/ 2; i
< mVWidth
; i
++) {
884 idx2
.push_back(C(flag
? i
+ mVWidth
/ 2 : i
));
886 return VSHUFFLE(a
, inter
, ConstantVector::get(idx2
));
889 // rdtsc buckets macros
890 void Builder::RDTSC_START(Value
* pBucketMgr
, Value
* pId
)
892 // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
893 // buckets framework when single threaded
894 if (KNOB_SINGLE_THREADED
)
896 std::vector
<Type
*> args
{
897 PointerType::get(mInt32Ty
, 0), // pBucketMgr
901 FunctionType
* pFuncTy
= FunctionType::get(Type::getVoidTy(JM()->mContext
), args
, false);
902 Function
* pFunc
= cast
<Function
>(JM()->mpCurrentModule
->getOrInsertFunction("BucketManager_StartBucket", pFuncTy
));
903 if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") == nullptr)
905 sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket", (void*)&BucketManager_StartBucket
);
908 CALL(pFunc
, { pBucketMgr
, pId
});
912 void Builder::RDTSC_STOP(Value
* pBucketMgr
, Value
* pId
)
914 // @todo due to an issue with thread local storage propagation in llvm, we can only safely call into
915 // buckets framework when single threaded
916 if (KNOB_SINGLE_THREADED
)
918 std::vector
<Type
*> args
{
919 PointerType::get(mInt32Ty
, 0), // pBucketMgr
923 FunctionType
* pFuncTy
= FunctionType::get(Type::getVoidTy(JM()->mContext
), args
, false);
924 Function
* pFunc
= cast
<Function
>(JM()->mpCurrentModule
->getOrInsertFunction("BucketManager_StopBucket", pFuncTy
));
925 if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") == nullptr)
927 sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket", (void*)&BucketManager_StopBucket
);
930 CALL(pFunc
, { pBucketMgr
, pId
});
934 uint32_t Builder::GetTypeSize(Type
* pType
)
936 if (pType
->isStructTy())
938 uint32_t numElems
= pType
->getStructNumElements();
939 Type
* pElemTy
= pType
->getStructElementType(0);
940 return numElems
* GetTypeSize(pElemTy
);
943 if (pType
->isArrayTy())
945 uint32_t numElems
= pType
->getArrayNumElements();
946 Type
* pElemTy
= pType
->getArrayElementType();
947 return numElems
* GetTypeSize(pElemTy
);
950 if (pType
->isIntegerTy())
952 uint32_t bitSize
= pType
->getIntegerBitWidth();
956 if (pType
->isFloatTy())
961 if (pType
->isHalfTy())
966 if (pType
->isDoubleTy())
971 SWR_ASSERT(false, "Unimplemented type.");