+// returns true for odd formats that require special state.gather handling
+bool FetchJit::IsOddFormat(SWR_FORMAT format)
+{
+ const SWR_FORMAT_INFO& info = GetFormatInfo(format);
+ if (info.bpc[0] != 8 && info.bpc[0] != 16 && info.bpc[0] != 32 && info.bpc[0] != 64)
+ {
+ return true;
+ }
+ return false;
+}
+
+// format is uniform if all components are the same size and type
+bool FetchJit::IsUniformFormat(SWR_FORMAT format)
+{
+ const SWR_FORMAT_INFO& info = GetFormatInfo(format);
+ uint32_t bpc0 = info.bpc[0];
+ uint32_t type0 = info.type[0];
+
+ for (uint32_t c = 1; c < info.numComps; ++c)
+ {
+ if (bpc0 != info.bpc[c] || type0 != info.type[c])
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+// unpacks components based on format
+// foreach component in the pixel
+// mask off everything but this component
+// shift component to LSB
+void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[4])
+{
+ const SWR_FORMAT_INFO& info = GetFormatInfo(format);
+
+ uint32_t bitOffset = 0;
+ for (uint32_t c = 0; c < info.numComps; ++c)
+ {
+ uint32_t swizzledIndex = info.swizzle[c];
+ uint32_t compBits = info.bpc[c];
+ uint32_t bitmask = ((1 << compBits) - 1) << bitOffset;
+ Value* comp = AND(vInput, bitmask);
+ comp = LSHR(comp, bitOffset);
+
+ result[swizzledIndex] = comp;
+ bitOffset += compBits;
+ }
+}
+
+// gather for odd component size formats
+// gather SIMD full pixels per lane then shift/mask to move each component to their
+// own vector
+void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4])
+{
+ const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+
+ // only works if pixel size is <= 32bits
+ SWR_ASSERT(info.bpp <= 32);
+
+ Value* gather = VUNDEF_I();
+
+ // assign defaults
+ for (uint32_t comp = 0; comp < 4; ++comp)
+ {
+ result[comp] = VIMMED1((int)info.defaults[comp]);
+ }
+
+ // load the proper amount of data based on component size
+ PointerType* pLoadTy = nullptr;
+ switch (info.bpp)
+ {
+ case 8: pLoadTy = Type::getInt8PtrTy(JM()->mContext); break;
+ case 16: pLoadTy = Type::getInt16PtrTy(JM()->mContext); break;
+ case 24:
+ case 32: pLoadTy = Type::getInt32PtrTy(JM()->mContext); break;
+ default: SWR_ASSERT(0);
+ }
+
+ // allocate temporary memory for masked off lanes
+ Value* pTmp = ALLOCA(pLoadTy->getElementType());
+
+ // gather SIMD pixels
+ for (uint32_t e = 0; e < JM()->mVWidth; ++e)
+ {
+ Value* pElemOffset = VEXTRACT(offsets, C(e));
+ Value* pLoad = GEP(pBase, pElemOffset);
+ Value* pLaneMask = VEXTRACT(pMask, C(e));
+
+ pLoad = POINTER_CAST(pLoad, pLoadTy);
+
+ // mask in tmp pointer for disabled lanes
+ pLoad = SELECT(pLaneMask, pLoad, pTmp);
+
+ // load pixel
+ Value *val = LOAD(pLoad);
+
+ // zero extend to 32bit integer
+ val = INT_CAST(val, mInt32Ty, false);
+
+ // store in simd lane
+ gather = VINSERT(gather, val, C(e));
+ }
+
+ UnpackComponents(format, gather, result);
+
+ // cast to fp32
+ result[0] = BITCAST(result[0], mSimdFP32Ty);
+ result[1] = BITCAST(result[1], mSimdFP32Ty);
+ result[2] = BITCAST(result[2], mSimdFP32Ty);
+ result[3] = BITCAST(result[3], mSimdFP32Ty);
+}
+
+void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
+{
+ const SWR_FORMAT_INFO &info = GetFormatInfo(format);
+
+ for (uint32_t c = 0; c < info.numComps; ++c)
+ {
+ uint32_t compIndex = info.swizzle[c];
+
+ // skip any conversion on UNUSED components
+ if (info.type[c] == SWR_TYPE_UNUSED)
+ {
+ continue;
+ }
+
+ if (info.isNormalized[c])
+ {
+ if (info.type[c] == SWR_TYPE_SNORM)
+ {
+ /// @todo The most-negative value maps to -1.0f. e.g. the 5-bit value 10000 maps to -1.0f.
+
+ /// result = c * (1.0f / (2^(n-1) - 1);
+ uint32_t n = info.bpc[c];
+ uint32_t pow2 = 1 << (n - 1);
+ float scale = 1.0f / (float)(pow2 - 1);
+ Value *vScale = VIMMED1(scale);
+ texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
+ texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
+ texels[compIndex] = FMUL(texels[compIndex], vScale);
+ }
+ else
+ {
+ SWR_ASSERT(info.type[c] == SWR_TYPE_UNORM);
+
+ /// result = c * (1.0f / (2^n - 1))
+ uint32_t n = info.bpc[c];
+ uint32_t pow2 = 1 << n;
+ // special case 24bit unorm format, which requires a full divide to meet ULP requirement
+ if (n == 24)
+ {
+ float scale = (float)(pow2 - 1);
+ Value* vScale = VIMMED1(scale);
+ texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
+ texels[compIndex] = SI_TO_FP(texels[compIndex], mSimdFP32Ty);
+ texels[compIndex] = FDIV(texels[compIndex], vScale);
+ }
+ else
+ {
+ float scale = 1.0f / (float)(pow2 - 1);
+ Value *vScale = VIMMED1(scale);
+ texels[compIndex] = BITCAST(texels[compIndex], mSimdInt32Ty);
+ texels[compIndex] = UI_TO_FP(texels[compIndex], mSimdFP32Ty);
+ texels[compIndex] = FMUL(texels[compIndex], vScale);
+ }
+ }
+ continue;
+ }
+ }
+}
+