1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation of the blend jitter
29 ******************************************************************************/
30 #include "jit_pch.hpp"
33 #include "blend_jit.h"
34 #include "gen_state_llvm.h"
35 #include "functionpasses/passes.h"
37 // components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
38 #define QUANTIZE_THRESHOLD 2
41 using namespace SwrJit
;
43 //////////////////////////////////////////////////////////////////////////
44 /// Interface to Jitting a blend shader
45 //////////////////////////////////////////////////////////////////////////
46 struct BlendJit
: public Builder
48 BlendJit(JitManager
* pJitMgr
) : Builder(pJitMgr
){};
50 template <bool Color
, bool Alpha
>
51 void GenerateBlendFactor(SWR_BLEND_FACTOR factor
,
63 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(1.0f
);
65 case BLENDFACTOR_SRC_COLOR
:
71 case BLENDFACTOR_SRC_ALPHA
:
72 out
[0] = out
[1] = out
[2] = out
[3] = src
[3];
74 case BLENDFACTOR_DST_ALPHA
:
75 out
[0] = out
[1] = out
[2] = out
[3] = dst
[3];
77 case BLENDFACTOR_DST_COLOR
:
83 case BLENDFACTOR_SRC_ALPHA_SATURATE
:
84 out
[0] = out
[1] = out
[2] = VMINPS(src
[3], FSUB(VIMMED1(1.0f
), dst
[3]));
85 out
[3] = VIMMED1(1.0f
);
87 case BLENDFACTOR_CONST_COLOR
:
88 out
[0] = constColor
[0];
89 out
[1] = constColor
[1];
90 out
[2] = constColor
[2];
91 out
[3] = constColor
[3];
93 case BLENDFACTOR_CONST_ALPHA
:
94 out
[0] = out
[1] = out
[2] = out
[3] = constColor
[3];
96 case BLENDFACTOR_SRC1_COLOR
:
102 case BLENDFACTOR_SRC1_ALPHA
:
103 out
[0] = out
[1] = out
[2] = out
[3] = src1
[3];
105 case BLENDFACTOR_ZERO
:
106 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(0.0f
);
108 case BLENDFACTOR_INV_SRC_COLOR
:
109 out
[0] = FSUB(VIMMED1(1.0f
), src
[0]);
110 out
[1] = FSUB(VIMMED1(1.0f
), src
[1]);
111 out
[2] = FSUB(VIMMED1(1.0f
), src
[2]);
112 out
[3] = FSUB(VIMMED1(1.0f
), src
[3]);
114 case BLENDFACTOR_INV_SRC_ALPHA
:
115 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), src
[3]);
117 case BLENDFACTOR_INV_DST_ALPHA
:
118 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), dst
[3]);
120 case BLENDFACTOR_INV_DST_COLOR
:
121 out
[0] = FSUB(VIMMED1(1.0f
), dst
[0]);
122 out
[1] = FSUB(VIMMED1(1.0f
), dst
[1]);
123 out
[2] = FSUB(VIMMED1(1.0f
), dst
[2]);
124 out
[3] = FSUB(VIMMED1(1.0f
), dst
[3]);
126 case BLENDFACTOR_INV_CONST_COLOR
:
127 out
[0] = FSUB(VIMMED1(1.0f
), constColor
[0]);
128 out
[1] = FSUB(VIMMED1(1.0f
), constColor
[1]);
129 out
[2] = FSUB(VIMMED1(1.0f
), constColor
[2]);
130 out
[3] = FSUB(VIMMED1(1.0f
), constColor
[3]);
132 case BLENDFACTOR_INV_CONST_ALPHA
:
133 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), constColor
[3]);
135 case BLENDFACTOR_INV_SRC1_COLOR
:
136 out
[0] = FSUB(VIMMED1(1.0f
), src1
[0]);
137 out
[1] = FSUB(VIMMED1(1.0f
), src1
[1]);
138 out
[2] = FSUB(VIMMED1(1.0f
), src1
[2]);
139 out
[3] = FSUB(VIMMED1(1.0f
), src1
[3]);
141 case BLENDFACTOR_INV_SRC1_ALPHA
:
142 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), src1
[3]);
145 SWR_INVALID("Unsupported blend factor: %d", factor
);
146 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(0.0f
);
163 void Clamp(SWR_FORMAT format
, Value
* src
[4])
165 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
166 SWR_TYPE type
= info
.type
[0];
174 src
[0] = VMINPS(VMAXPS(src
[0], VIMMED1(0.0f
)), VIMMED1(1.0f
));
175 src
[1] = VMINPS(VMAXPS(src
[1], VIMMED1(0.0f
)), VIMMED1(1.0f
));
176 src
[2] = VMINPS(VMAXPS(src
[2], VIMMED1(0.0f
)), VIMMED1(1.0f
));
177 src
[3] = VMINPS(VMAXPS(src
[3], VIMMED1(0.0f
)), VIMMED1(1.0f
));
181 src
[0] = VMINPS(VMAXPS(src
[0], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
182 src
[1] = VMINPS(VMAXPS(src
[1], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
183 src
[2] = VMINPS(VMAXPS(src
[2], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
184 src
[3] = VMINPS(VMAXPS(src
[3], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
187 case SWR_TYPE_UNKNOWN
:
188 SWR_INVALID("Unsupport format type: %d", type
);
192 void ApplyDefaults(SWR_FORMAT format
, Value
* src
[4])
194 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
196 bool valid
[] = {false, false, false, false};
197 for (uint32_t c
= 0; c
< info
.numComps
; ++c
)
199 valid
[info
.swizzle
[c
]] = true;
202 for (uint32_t c
= 0; c
< 4; ++c
)
206 src
[c
] = BITCAST(VIMMED1((int)info
.defaults
[c
]), mSimdFP32Ty
);
211 void ApplyUnusedDefaults(SWR_FORMAT format
, Value
* src
[4])
213 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
215 for (uint32_t c
= 0; c
< info
.numComps
; ++c
)
217 if (info
.type
[c
] == SWR_TYPE_UNUSED
)
219 src
[info
.swizzle
[c
]] =
220 BITCAST(VIMMED1((int)info
.defaults
[info
.swizzle
[c
]]), mSimdFP32Ty
);
225 void Quantize(SWR_FORMAT format
, Value
* src
[4])
227 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
228 for (uint32_t c
= 0; c
< info
.numComps
; ++c
)
230 if (info
.bpc
[c
] <= QUANTIZE_THRESHOLD
&& info
.type
[c
] != SWR_TYPE_UNUSED
)
232 uint32_t swizComp
= info
.swizzle
[c
];
233 float factor
= (float)((1 << info
.bpc
[c
]) - 1);
234 switch (info
.type
[c
])
237 src
[swizComp
] = FADD(FMUL(src
[swizComp
], VIMMED1(factor
)), VIMMED1(0.5f
));
238 src
[swizComp
] = VROUND(src
[swizComp
], C(_MM_FROUND_TO_ZERO
));
239 src
[swizComp
] = FMUL(src
[swizComp
], VIMMED1(1.0f
/ factor
));
242 SWR_INVALID("Unsupported format type: %d", info
.type
[c
]);
248 template <bool Color
, bool Alpha
>
249 void BlendFunc(SWR_BLEND_OP blendOp
,
259 for (uint32_t i
= 0; i
< 4; ++i
)
261 srcBlend
[i
] = FMUL(src
[i
], srcFactor
[i
]);
262 dstBlend
[i
] = FMUL(dst
[i
], dstFactor
[i
]);
268 out
[0] = FADD(srcBlend
[0], dstBlend
[0]);
269 out
[1] = FADD(srcBlend
[1], dstBlend
[1]);
270 out
[2] = FADD(srcBlend
[2], dstBlend
[2]);
271 out
[3] = FADD(srcBlend
[3], dstBlend
[3]);
274 case BLENDOP_SUBTRACT
:
275 out
[0] = FSUB(srcBlend
[0], dstBlend
[0]);
276 out
[1] = FSUB(srcBlend
[1], dstBlend
[1]);
277 out
[2] = FSUB(srcBlend
[2], dstBlend
[2]);
278 out
[3] = FSUB(srcBlend
[3], dstBlend
[3]);
281 case BLENDOP_REVSUBTRACT
:
282 out
[0] = FSUB(dstBlend
[0], srcBlend
[0]);
283 out
[1] = FSUB(dstBlend
[1], srcBlend
[1]);
284 out
[2] = FSUB(dstBlend
[2], srcBlend
[2]);
285 out
[3] = FSUB(dstBlend
[3], srcBlend
[3]);
289 out
[0] = VMINPS(src
[0], dst
[0]);
290 out
[1] = VMINPS(src
[1], dst
[1]);
291 out
[2] = VMINPS(src
[2], dst
[2]);
292 out
[3] = VMINPS(src
[3], dst
[3]);
296 out
[0] = VMAXPS(src
[0], dst
[0]);
297 out
[1] = VMAXPS(src
[1], dst
[1]);
298 out
[2] = VMAXPS(src
[2], dst
[2]);
299 out
[3] = VMAXPS(src
[3], dst
[3]);
303 SWR_INVALID("Unsupported blend operation: %d", blendOp
);
304 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(0.0f
);
321 void LogicOpFunc(SWR_LOGIC_OP logicOp
, Value
* src
[4], Value
* dst
[4], Value
* result
[4])
323 // Op: (s == PS output, d = RT contents)
327 result
[0] = VIMMED1(0);
328 result
[1] = VIMMED1(0);
329 result
[2] = VIMMED1(0);
330 result
[3] = VIMMED1(0);
335 result
[0] = XOR(OR(src
[0], dst
[0]), VIMMED1(0xFFFFFFFF));
336 result
[1] = XOR(OR(src
[1], dst
[1]), VIMMED1(0xFFFFFFFF));
337 result
[2] = XOR(OR(src
[2], dst
[2]), VIMMED1(0xFFFFFFFF));
338 result
[3] = XOR(OR(src
[3], dst
[3]), VIMMED1(0xFFFFFFFF));
341 case LOGICOP_AND_INVERTED
:
343 // todo: use avx andnot instr when I can find the intrinsic to call
344 result
[0] = AND(XOR(src
[0], VIMMED1(0xFFFFFFFF)), dst
[0]);
345 result
[1] = AND(XOR(src
[1], VIMMED1(0xFFFFFFFF)), dst
[1]);
346 result
[2] = AND(XOR(src
[2], VIMMED1(0xFFFFFFFF)), dst
[2]);
347 result
[3] = AND(XOR(src
[3], VIMMED1(0xFFFFFFFF)), dst
[3]);
350 case LOGICOP_COPY_INVERTED
:
352 result
[0] = XOR(src
[0], VIMMED1(0xFFFFFFFF));
353 result
[1] = XOR(src
[1], VIMMED1(0xFFFFFFFF));
354 result
[2] = XOR(src
[2], VIMMED1(0xFFFFFFFF));
355 result
[3] = XOR(src
[3], VIMMED1(0xFFFFFFFF));
358 case LOGICOP_AND_REVERSE
:
360 // todo: use avx andnot instr when I can find the intrinsic to call
361 result
[0] = AND(XOR(dst
[0], VIMMED1(0xFFFFFFFF)), src
[0]);
362 result
[1] = AND(XOR(dst
[1], VIMMED1(0xFFFFFFFF)), src
[1]);
363 result
[2] = AND(XOR(dst
[2], VIMMED1(0xFFFFFFFF)), src
[2]);
364 result
[3] = AND(XOR(dst
[3], VIMMED1(0xFFFFFFFF)), src
[3]);
369 result
[0] = XOR(dst
[0], VIMMED1(0xFFFFFFFF));
370 result
[1] = XOR(dst
[1], VIMMED1(0xFFFFFFFF));
371 result
[2] = XOR(dst
[2], VIMMED1(0xFFFFFFFF));
372 result
[3] = XOR(dst
[3], VIMMED1(0xFFFFFFFF));
377 result
[0] = XOR(src
[0], dst
[0]);
378 result
[1] = XOR(src
[1], dst
[1]);
379 result
[2] = XOR(src
[2], dst
[2]);
380 result
[3] = XOR(src
[3], dst
[3]);
385 result
[0] = XOR(AND(src
[0], dst
[0]), VIMMED1(0xFFFFFFFF));
386 result
[1] = XOR(AND(src
[1], dst
[1]), VIMMED1(0xFFFFFFFF));
387 result
[2] = XOR(AND(src
[2], dst
[2]), VIMMED1(0xFFFFFFFF));
388 result
[3] = XOR(AND(src
[3], dst
[3]), VIMMED1(0xFFFFFFFF));
393 result
[0] = AND(src
[0], dst
[0]);
394 result
[1] = AND(src
[1], dst
[1]);
395 result
[2] = AND(src
[2], dst
[2]);
396 result
[3] = AND(src
[3], dst
[3]);
401 result
[0] = XOR(XOR(src
[0], dst
[0]), VIMMED1(0xFFFFFFFF));
402 result
[1] = XOR(XOR(src
[1], dst
[1]), VIMMED1(0xFFFFFFFF));
403 result
[2] = XOR(XOR(src
[2], dst
[2]), VIMMED1(0xFFFFFFFF));
404 result
[3] = XOR(XOR(src
[3], dst
[3]), VIMMED1(0xFFFFFFFF));
414 case LOGICOP_OR_INVERTED
:
416 result
[0] = OR(XOR(src
[0], VIMMED1(0xFFFFFFFF)), dst
[0]);
417 result
[1] = OR(XOR(src
[1], VIMMED1(0xFFFFFFFF)), dst
[1]);
418 result
[2] = OR(XOR(src
[2], VIMMED1(0xFFFFFFFF)), dst
[2]);
419 result
[3] = OR(XOR(src
[3], VIMMED1(0xFFFFFFFF)), dst
[3]);
429 case LOGICOP_OR_REVERSE
:
431 result
[0] = OR(XOR(dst
[0], VIMMED1(0xFFFFFFFF)), src
[0]);
432 result
[1] = OR(XOR(dst
[1], VIMMED1(0xFFFFFFFF)), src
[1]);
433 result
[2] = OR(XOR(dst
[2], VIMMED1(0xFFFFFFFF)), src
[2]);
434 result
[3] = OR(XOR(dst
[3], VIMMED1(0xFFFFFFFF)), src
[3]);
439 result
[0] = OR(src
[0], dst
[0]);
440 result
[1] = OR(src
[1], dst
[1]);
441 result
[2] = OR(src
[2], dst
[2]);
442 result
[3] = OR(src
[3], dst
[3]);
446 result
[0] = VIMMED1(0xFFFFFFFF);
447 result
[1] = VIMMED1(0xFFFFFFFF);
448 result
[2] = VIMMED1(0xFFFFFFFF);
449 result
[3] = VIMMED1(0xFFFFFFFF);
453 SWR_INVALID("Unsupported logic operation: %d", logicOp
);
454 result
[0] = result
[1] = result
[2] = result
[3] = VIMMED1(0.0f
);
460 AlphaTest(const BLEND_COMPILE_STATE
& state
, Value
* pBlendState
, Value
* ppAlpha
, Value
* ppMask
)
462 // load uint32_t reference
463 Value
* pRef
= VBROADCAST(LOAD(pBlendState
, {0, SWR_BLEND_STATE_alphaTestReference
}));
466 Value
* pAlpha
= LOAD(ppAlpha
, {0, 0});
468 Value
* pTest
= nullptr;
469 if (state
.alphaTestFormat
== ALPHA_TEST_UNORM8
)
471 // convert float alpha to unorm8
472 Value
* pAlphaU8
= FMUL(pAlpha
, VIMMED1(256.0f
));
473 pAlphaU8
= FP_TO_UI(pAlphaU8
, mSimdInt32Ty
);
476 switch (state
.alphaTestFunction
)
479 pTest
= VIMMED1(true);
482 pTest
= VIMMED1(false);
485 pTest
= ICMP_ULT(pAlphaU8
, pRef
);
488 pTest
= ICMP_EQ(pAlphaU8
, pRef
);
491 pTest
= ICMP_ULE(pAlphaU8
, pRef
);
494 pTest
= ICMP_UGT(pAlphaU8
, pRef
);
497 pTest
= ICMP_NE(pAlphaU8
, pRef
);
500 pTest
= ICMP_UGE(pAlphaU8
, pRef
);
503 SWR_INVALID("Invalid alpha test function");
510 pRef
= BITCAST(pRef
, mSimdFP32Ty
);
513 switch (state
.alphaTestFunction
)
516 pTest
= VIMMED1(true);
519 pTest
= VIMMED1(false);
522 pTest
= FCMP_OLT(pAlpha
, pRef
);
525 pTest
= FCMP_OEQ(pAlpha
, pRef
);
528 pTest
= FCMP_OLE(pAlpha
, pRef
);
531 pTest
= FCMP_OGT(pAlpha
, pRef
);
534 pTest
= FCMP_ONE(pAlpha
, pRef
);
537 pTest
= FCMP_OGE(pAlpha
, pRef
);
540 SWR_INVALID("Invalid alpha test function");
546 Value
* pMask
= LOAD(ppMask
);
548 // convert to int1 mask
551 // and with alpha test result
552 pMask
= AND(pMask
, pTest
);
554 // convert back to vector mask
555 pMask
= VMASK(pMask
);
558 STORE(pMask
, ppMask
);
561 Function
* Create(const BLEND_COMPILE_STATE
& state
)
563 std::stringstream
fnName("BLND_",
564 std::ios_base::in
| std::ios_base::out
| std::ios_base::ate
);
565 fnName
<< ComputeCRC(0, &state
, sizeof(state
));
567 // blend function signature
568 // typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_CONTEXT*);
570 std::vector
<Type
*> args
{
571 PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0) // SWR_BLEND_CONTEXT*
574 // std::vector<Type*> args{
575 // PointerType::get(Gen_SWR_BLEND_CONTEXT(JM()), 0), // SWR_BLEND_CONTEXT*
578 FunctionType
* fTy
= FunctionType::get(IRB()->getVoidTy(), args
, false);
579 Function
* blendFunc
= Function::Create(
580 fTy
, GlobalValue::ExternalLinkage
, fnName
.str(), JM()->mpCurrentModule
);
581 blendFunc
->getParent()->setModuleIdentifier(blendFunc
->getName());
583 BasicBlock
* entry
= BasicBlock::Create(JM()->mContext
, "entry", blendFunc
);
585 IRB()->SetInsertPoint(entry
);
588 auto argitr
= blendFunc
->arg_begin();
589 Value
* pBlendContext
= &*argitr
++;
590 pBlendContext
->setName("pBlendContext");
591 Value
* pBlendState
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_pBlendState
});
592 pBlendState
->setName("pBlendState");
593 Value
* pSrc
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_src
});
594 pSrc
->setName("src");
595 Value
* pSrc1
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_src1
});
596 pSrc1
->setName("src1");
597 Value
* pSrc0Alpha
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_src0alpha
});
598 pSrc0Alpha
->setName("src0alpha");
599 Value
* sampleNum
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_sampleNum
});
600 sampleNum
->setName("sampleNum");
601 Value
* pDst
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_pDst
});
602 pDst
->setName("pDst");
603 Value
* pResult
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_result
});
604 pResult
->setName("result");
605 Value
* ppoMask
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_oMask
});
606 ppoMask
->setName("ppoMask");
607 Value
* ppMask
= LOAD(pBlendContext
, {0, SWR_BLEND_CONTEXT_pMask
});
608 ppMask
->setName("pMask");
610 static_assert(KNOB_COLOR_HOT_TILE_FORMAT
== R32G32B32A32_FLOAT
,
611 "Unsupported hot tile format");
613 Value
* constantColor
[4];
617 for (uint32_t i
= 0; i
< 4; ++i
)
620 dst
[i
] = LOAD(pDst
, {0, i
});
622 // load constant color
623 constantColor
[i
] = VBROADCAST(LOAD(pBlendState
, {0, SWR_BLEND_STATE_constantColor
, i
}));
626 src
[i
] = LOAD(pSrc
, {0, i
});
629 src1
[i
] = LOAD(pSrc1
, {0, i
});
631 Value
* currentSampleMask
= VIMMED1(-1);
632 if (state
.desc
.alphaToCoverageEnable
)
634 Value
* pClampedSrc
= FCLAMP(src
[3], 0.0f
, 1.0f
);
635 uint32_t bits
= (1 << state
.desc
.numSamples
) - 1;
636 currentSampleMask
= FMUL(pClampedSrc
, VBROADCAST(C((float)bits
)));
637 currentSampleMask
= FP_TO_SI(FADD(currentSampleMask
, VIMMED1(0.5f
)), mSimdInt32Ty
);
641 if (state
.desc
.alphaTestEnable
)
643 // Gather for archrast stats
644 STORE(C(1), pBlendContext
, {0, SWR_BLEND_CONTEXT_isAlphaTested
});
645 AlphaTest(state
, pBlendState
, pSrc0Alpha
, ppMask
);
649 // Gather for archrast stats
650 STORE(C(0), pBlendContext
, {0, SWR_BLEND_CONTEXT_isAlphaTested
});
654 if (state
.blendState
.blendEnable
)
656 // Gather for archrast stats
657 STORE(C(1), pBlendContext
, {0, SWR_BLEND_CONTEXT_isAlphaBlended
});
660 Clamp(state
.format
, src
);
661 Clamp(state
.format
, src1
);
662 Clamp(state
.format
, dst
);
663 Clamp(state
.format
, constantColor
);
665 // apply defaults to hottile contents to take into account missing components
666 ApplyDefaults(state
.format
, dst
);
668 // Force defaults for unused 'X' components
669 ApplyUnusedDefaults(state
.format
, dst
);
671 // Quantize low precision components
672 Quantize(state
.format
, dst
);
674 // special case clamping for R11G11B10_float which has no sign bit
675 if (state
.format
== R11G11B10_FLOAT
)
677 dst
[0] = VMAXPS(dst
[0], VIMMED1(0.0f
));
678 dst
[1] = VMAXPS(dst
[1], VIMMED1(0.0f
));
679 dst
[2] = VMAXPS(dst
[2], VIMMED1(0.0f
));
680 dst
[3] = VMAXPS(dst
[3], VIMMED1(0.0f
));
685 if (state
.desc
.independentAlphaBlendEnable
)
687 GenerateBlendFactor
<true, false>(
688 state
.blendState
.sourceBlendFactor
, constantColor
, src
, src1
, dst
, srcFactor
);
689 GenerateBlendFactor
<false, true>(state
.blendState
.sourceAlphaBlendFactor
,
696 GenerateBlendFactor
<true, false>(
697 state
.blendState
.destBlendFactor
, constantColor
, src
, src1
, dst
, dstFactor
);
698 GenerateBlendFactor
<false, true>(state
.blendState
.destAlphaBlendFactor
,
705 BlendFunc
<true, false>(
706 state
.blendState
.colorBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
707 BlendFunc
<false, true>(
708 state
.blendState
.alphaBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
712 GenerateBlendFactor
<true, true>(
713 state
.blendState
.sourceBlendFactor
, constantColor
, src
, src1
, dst
, srcFactor
);
714 GenerateBlendFactor
<true, true>(
715 state
.blendState
.destBlendFactor
, constantColor
, src
, src1
, dst
, dstFactor
);
717 BlendFunc
<true, true>(
718 state
.blendState
.colorBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
722 for (uint32_t i
= 0; i
< 4; ++i
)
724 STORE(result
[i
], pResult
, {0, i
});
729 // Gather for archrast stats
730 STORE(C(0), pBlendContext
, {0, SWR_BLEND_CONTEXT_isAlphaBlended
});
733 if (state
.blendState
.logicOpEnable
)
735 const SWR_FORMAT_INFO
& info
= GetFormatInfo(state
.format
);
739 if (!state
.blendState
.blendEnable
)
741 Clamp(state
.format
, src
);
742 Clamp(state
.format
, dst
);
745 for (uint32_t i
= 0; i
< 4; i
++)
747 if (info
.type
[i
] == SWR_TYPE_UNUSED
)
752 if (info
.bpc
[i
] >= 32)
754 vMask
[i
] = VIMMED1(0xFFFFFFFF);
755 scale
[i
] = 0xFFFFFFFF;
759 vMask
[i
] = VIMMED1((1 << info
.bpc
[i
]) - 1);
760 if (info
.type
[i
] == SWR_TYPE_SNORM
)
761 scale
[i
] = (1 << (info
.bpc
[i
] - 1)) - 1;
763 scale
[i
] = (1 << info
.bpc
[i
]) - 1;
766 switch (info
.type
[i
])
769 SWR_INVALID("Unsupported type for logic op: %d", info
.type
[i
]);
772 case SWR_TYPE_UNKNOWN
:
773 case SWR_TYPE_UNUSED
:
778 src
[i
] = BITCAST(src
[i
], mSimdInt32Ty
);
779 dst
[i
] = BITCAST(dst
[i
], mSimdInt32Ty
);
782 src
[i
] = FP_TO_SI(FMUL(src
[i
], VIMMED1(scale
[i
])), mSimdInt32Ty
);
783 dst
[i
] = FP_TO_SI(FMUL(dst
[i
], VIMMED1(scale
[i
])), mSimdInt32Ty
);
786 src
[i
] = FP_TO_UI(FMUL(src
[i
], VIMMED1(scale
[i
])), mSimdInt32Ty
);
787 dst
[i
] = FP_TO_UI(FMUL(dst
[i
], VIMMED1(scale
[i
])), mSimdInt32Ty
);
792 LogicOpFunc(state
.blendState
.logicOpFunc
, src
, dst
, result
);
795 for (uint32_t i
= 0; i
< 4; ++i
)
797 if (info
.type
[i
] == SWR_TYPE_UNUSED
)
802 // clear upper bits from PS output not in RT format after doing logic op
803 result
[i
] = AND(result
[i
], vMask
[i
]);
805 switch (info
.type
[i
])
808 SWR_INVALID("Unsupported type for logic op: %d", info
.type
[i
]);
811 case SWR_TYPE_UNKNOWN
:
812 case SWR_TYPE_UNUSED
:
817 result
[i
] = BITCAST(result
[i
], mSimdFP32Ty
);
820 result
[i
] = SHL(result
[i
], C(32 - info
.bpc
[i
]));
821 result
[i
] = ASHR(result
[i
], C(32 - info
.bpc
[i
]));
822 result
[i
] = FMUL(SI_TO_FP(result
[i
], mSimdFP32Ty
), VIMMED1(1.0f
/ scale
[i
]));
825 result
[i
] = FMUL(UI_TO_FP(result
[i
], mSimdFP32Ty
), VIMMED1(1.0f
/ scale
[i
]));
829 STORE(result
[i
], pResult
, {0, i
});
833 if (state
.desc
.oMaskEnable
)
835 assert(!(state
.desc
.alphaToCoverageEnable
));
837 Value
* oMask
= LOAD(ppoMask
);
838 currentSampleMask
= AND(oMask
, currentSampleMask
);
841 if (state
.desc
.sampleMaskEnable
)
843 Value
* sampleMask
= LOAD(pBlendState
, {0, SWR_BLEND_STATE_sampleMask
});
844 currentSampleMask
= AND(VBROADCAST(sampleMask
), currentSampleMask
);
847 if (state
.desc
.sampleMaskEnable
|| state
.desc
.alphaToCoverageEnable
||
848 state
.desc
.oMaskEnable
)
850 // load coverage mask and mask off any lanes with no samples
851 Value
* pMask
= LOAD(ppMask
);
852 Value
* sampleMasked
= SHL(C(1), sampleNum
);
853 currentSampleMask
= AND(currentSampleMask
, VBROADCAST(sampleMasked
));
854 currentSampleMask
= S_EXT(ICMP_UGT(currentSampleMask
, VBROADCAST(C(0))), mSimdInt32Ty
);
855 Value
* outputMask
= AND(pMask
, currentSampleMask
);
857 STORE(outputMask
, GEP(ppMask
, C(0)));
862 JitManager::DumpToFile(blendFunc
, "");
864 ::FunctionPassManager
passes(JM()->mpCurrentModule
);
866 passes
.add(createBreakCriticalEdgesPass());
867 passes
.add(createCFGSimplificationPass());
868 passes
.add(createEarlyCSEPass());
869 passes
.add(createPromoteMemoryToRegisterPass());
870 passes
.add(createCFGSimplificationPass());
871 passes
.add(createEarlyCSEPass());
872 passes
.add(createInstructionCombiningPass());
873 #if LLVM_VERSION_MAJOR <= 11
874 passes
.add(createConstantPropagationPass());
876 passes
.add(createSCCPPass());
877 passes
.add(createAggressiveDCEPass());
879 passes
.add(createLowerX86Pass(this));
881 passes
.run(*blendFunc
);
883 JitManager::DumpToFile(blendFunc
, "optimized");
889 //////////////////////////////////////////////////////////////////////////
890 /// @brief JITs from fetch shader IR
891 /// @param hJitMgr - JitManager handle
892 /// @param func - LLVM function IR
893 /// @return PFN_FETCH_FUNC - pointer to fetch code
894 PFN_BLEND_JIT_FUNC
JitBlendFunc(HANDLE hJitMgr
, const HANDLE hFunc
)
896 const llvm::Function
* func
= (const llvm::Function
*)hFunc
;
897 JitManager
* pJitMgr
= reinterpret_cast<JitManager
*>(hJitMgr
);
898 PFN_BLEND_JIT_FUNC pfnBlend
;
899 pfnBlend
= (PFN_BLEND_JIT_FUNC
)(pJitMgr
->mpExec
->getFunctionAddress(func
->getName().str()));
900 // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot
901 // add new IR to the module
902 pJitMgr
->mIsModuleFinalized
= true;
907 //////////////////////////////////////////////////////////////////////////
908 /// @brief JIT compiles blend shader
909 /// @param hJitMgr - JitManager handle
910 /// @param state - blend state to build function from
911 extern "C" PFN_BLEND_JIT_FUNC JITCALL
JitCompileBlend(HANDLE hJitMgr
,
912 const BLEND_COMPILE_STATE
& state
)
914 JitManager
* pJitMgr
= reinterpret_cast<JitManager
*>(hJitMgr
);
916 pJitMgr
->SetupNewModule();
918 BlendJit
theJit(pJitMgr
);
919 HANDLE hFunc
= theJit
.Create(state
);
921 return JitBlendFunc(hJitMgr
, hFunc
);