- // cast types
- Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
- Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits
-
- if(bPackedOutput)
- {
- Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
- // shuffle mask
- Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
- 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15});
- Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
- // after pshufb: group components together in each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
-
- Value* vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})), v128Ty);
- // after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
-
- // do the same for zw components
- Value* vi128ZW = nullptr;
- if(info.numComps > 2)
- {
- vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})), v128Ty);
- }
-
- // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
- for(uint32_t i = 0; i < 4; i++)
- {
- uint32_t swizzleIndex = info.swizzle[i];
- // todo: fix for packed
- Value* vGatherMaskedVal = VIMMED1((int32_t)(info.defaults[i]));
- if(i >= info.numComps)
- {
- // set the default component val
- vGatherOutput[swizzleIndex] = vGatherMaskedVal;
- continue;
- }
-
- // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
-
- // sign extend
- vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
- }
- }
- // else zero extend
- else{
- // shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
- // apply defaults
- for (uint32_t i = 0; i < 4; ++i)
- {
- vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
- }
-
- for(uint32_t i = 0; i < info.numComps; i++){
- uint32_t swizzleIndex = info.swizzle[i];
-
- // pshufb masks for each component
- Value* vConstMask;
- switch(i)
- {
- case 0:
- // x shuffle mask
- vConstMask = C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
- 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
- break;
- case 1:
- // y shuffle mask
- vConstMask = C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
- 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
- break;
- case 2:
- // z shuffle mask
- vConstMask = C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
- 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
- break;
- case 3:
- // w shuffle mask
- vConstMask = C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
- 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
- break;
- default:
- vConstMask = nullptr;
- break;
- }
-
- vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
- // after pshufb for x channel
- // 256i - 0 1 2 3 4 5 6 7
- // x000 x000 x000 x000 x000 x000 x000 x000
- }
- }