1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Implementation of the blend jitter
29 ******************************************************************************/
32 #include "blend_jit.h"
33 #include "gen_state_llvm.h"
37 // components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
38 #define QUANTIZE_THRESHOLD 2
41 using namespace SwrJit
;
43 //////////////////////////////////////////////////////////////////////////
44 /// Interface to Jitting a blend shader
45 //////////////////////////////////////////////////////////////////////////
46 struct BlendJit
: public Builder
48 BlendJit(JitManager
* pJitMgr
) : Builder(pJitMgr
){};
50 template<bool Color
, bool Alpha
>
51 void GenerateBlendFactor(SWR_BLEND_FACTOR factor
, Value
* constColor
[4], Value
* src
[4], Value
* src1
[4], Value
* dst
[4], Value
* result
[4])
58 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(1.0f
);
60 case BLENDFACTOR_SRC_COLOR
:
66 case BLENDFACTOR_SRC_ALPHA
:
67 out
[0] = out
[1] = out
[2] = out
[3] = src
[3];
69 case BLENDFACTOR_DST_ALPHA
:
70 out
[0] = out
[1] = out
[2] = out
[3] = dst
[3];
72 case BLENDFACTOR_DST_COLOR
:
78 case BLENDFACTOR_SRC_ALPHA_SATURATE
:
79 out
[0] = out
[1] = out
[2] = VMINPS(src
[3], FSUB(VIMMED1(1.0f
), dst
[3]));
80 out
[3] = VIMMED1(1.0f
);
82 case BLENDFACTOR_CONST_COLOR
:
83 out
[0] = constColor
[0];
84 out
[1] = constColor
[1];
85 out
[2] = constColor
[2];
86 out
[3] = constColor
[3];
88 case BLENDFACTOR_CONST_ALPHA
:
89 out
[0] = out
[1] = out
[2] = out
[3] = constColor
[3];
91 case BLENDFACTOR_SRC1_COLOR
:
97 case BLENDFACTOR_SRC1_ALPHA
:
98 out
[0] = out
[1] = out
[2] = out
[3] = src1
[3];
100 case BLENDFACTOR_ZERO
:
101 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(0.0f
);
103 case BLENDFACTOR_INV_SRC_COLOR
:
104 out
[0] = FSUB(VIMMED1(1.0f
), src
[0]);
105 out
[1] = FSUB(VIMMED1(1.0f
), src
[1]);
106 out
[2] = FSUB(VIMMED1(1.0f
), src
[2]);
107 out
[3] = FSUB(VIMMED1(1.0f
), src
[3]);
109 case BLENDFACTOR_INV_SRC_ALPHA
:
110 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), src
[3]);
112 case BLENDFACTOR_INV_DST_ALPHA
:
113 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), dst
[3]);
115 case BLENDFACTOR_INV_DST_COLOR
:
116 out
[0] = FSUB(VIMMED1(1.0f
), dst
[0]);
117 out
[1] = FSUB(VIMMED1(1.0f
), dst
[1]);
118 out
[2] = FSUB(VIMMED1(1.0f
), dst
[2]);
119 out
[3] = FSUB(VIMMED1(1.0f
), dst
[3]);
121 case BLENDFACTOR_INV_CONST_COLOR
:
122 out
[0] = FSUB(VIMMED1(1.0f
), constColor
[0]);
123 out
[1] = FSUB(VIMMED1(1.0f
), constColor
[1]);
124 out
[2] = FSUB(VIMMED1(1.0f
), constColor
[2]);
125 out
[3] = FSUB(VIMMED1(1.0f
), constColor
[3]);
127 case BLENDFACTOR_INV_CONST_ALPHA
:
128 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), constColor
[3]);
130 case BLENDFACTOR_INV_SRC1_COLOR
:
131 out
[0] = FSUB(VIMMED1(1.0f
), src1
[0]);
132 out
[1] = FSUB(VIMMED1(1.0f
), src1
[1]);
133 out
[2] = FSUB(VIMMED1(1.0f
), src1
[2]);
134 out
[3] = FSUB(VIMMED1(1.0f
), src1
[3]);
136 case BLENDFACTOR_INV_SRC1_ALPHA
:
137 out
[0] = out
[1] = out
[2] = out
[3] = FSUB(VIMMED1(1.0f
), src1
[3]);
140 SWR_INVALID("Unsupported blend factor: %d", factor
);
141 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(0.0f
);
158 void Clamp(SWR_FORMAT format
, Value
* src
[4])
160 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
161 SWR_TYPE type
= info
.type
[0];
169 src
[0] = VMINPS(VMAXPS(src
[0], VIMMED1(0.0f
)), VIMMED1(1.0f
));
170 src
[1] = VMINPS(VMAXPS(src
[1], VIMMED1(0.0f
)), VIMMED1(1.0f
));
171 src
[2] = VMINPS(VMAXPS(src
[2], VIMMED1(0.0f
)), VIMMED1(1.0f
));
172 src
[3] = VMINPS(VMAXPS(src
[3], VIMMED1(0.0f
)), VIMMED1(1.0f
));
176 src
[0] = VMINPS(VMAXPS(src
[0], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
177 src
[1] = VMINPS(VMAXPS(src
[1], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
178 src
[2] = VMINPS(VMAXPS(src
[2], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
179 src
[3] = VMINPS(VMAXPS(src
[3], VIMMED1(-1.0f
)), VIMMED1(1.0f
));
182 case SWR_TYPE_UNKNOWN
: SWR_INVALID("Unsupport format type: %d", type
);
186 void ApplyDefaults(SWR_FORMAT format
, Value
* src
[4])
188 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
190 bool valid
[] = { false, false, false, false };
191 for (uint32_t c
= 0; c
< info
.numComps
; ++c
)
193 valid
[info
.swizzle
[c
]] = true;
196 for (uint32_t c
= 0; c
< 4; ++c
)
200 src
[c
] = BITCAST(VIMMED1((int)info
.defaults
[c
]), mSimdFP32Ty
);
205 void ApplyUnusedDefaults(SWR_FORMAT format
, Value
* src
[4])
207 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
209 for (uint32_t c
= 0; c
< info
.numComps
; ++c
)
211 if (info
.type
[c
] == SWR_TYPE_UNUSED
)
213 src
[info
.swizzle
[c
]] = BITCAST(VIMMED1((int)info
.defaults
[info
.swizzle
[c
]]), mSimdFP32Ty
);
218 void Quantize(SWR_FORMAT format
, Value
* src
[4])
220 const SWR_FORMAT_INFO
& info
= GetFormatInfo(format
);
221 for (uint32_t c
= 0; c
< info
.numComps
; ++c
)
223 if (info
.bpc
[c
] <= QUANTIZE_THRESHOLD
&& info
.type
[c
] != SWR_TYPE_UNUSED
)
225 uint32_t swizComp
= info
.swizzle
[c
];
226 float factor
= (float)((1 << info
.bpc
[c
]) - 1);
227 switch (info
.type
[c
])
230 src
[swizComp
] = FADD(FMUL(src
[swizComp
], VIMMED1(factor
)), VIMMED1(0.5f
));
231 src
[swizComp
] = VROUND(src
[swizComp
], C(_MM_FROUND_TO_ZERO
));
232 src
[swizComp
] = FMUL(src
[swizComp
], VIMMED1(1.0f
/factor
));
234 default: SWR_INVALID("Unsupported format type: %d", info
.type
[c
]);
240 template<bool Color
, bool Alpha
>
241 void BlendFunc(SWR_BLEND_OP blendOp
, Value
* src
[4], Value
* srcFactor
[4], Value
* dst
[4], Value
* dstFactor
[4], Value
* result
[4])
246 for (uint32_t i
= 0; i
< 4; ++i
)
248 srcBlend
[i
] = FMUL(src
[i
], srcFactor
[i
]);
249 dstBlend
[i
] = FMUL(dst
[i
], dstFactor
[i
]);
255 out
[0] = FADD(srcBlend
[0], dstBlend
[0]);
256 out
[1] = FADD(srcBlend
[1], dstBlend
[1]);
257 out
[2] = FADD(srcBlend
[2], dstBlend
[2]);
258 out
[3] = FADD(srcBlend
[3], dstBlend
[3]);
261 case BLENDOP_SUBTRACT
:
262 out
[0] = FSUB(srcBlend
[0], dstBlend
[0]);
263 out
[1] = FSUB(srcBlend
[1], dstBlend
[1]);
264 out
[2] = FSUB(srcBlend
[2], dstBlend
[2]);
265 out
[3] = FSUB(srcBlend
[3], dstBlend
[3]);
268 case BLENDOP_REVSUBTRACT
:
269 out
[0] = FSUB(dstBlend
[0], srcBlend
[0]);
270 out
[1] = FSUB(dstBlend
[1], srcBlend
[1]);
271 out
[2] = FSUB(dstBlend
[2], srcBlend
[2]);
272 out
[3] = FSUB(dstBlend
[3], srcBlend
[3]);
276 out
[0] = VMINPS(src
[0], dst
[0]);
277 out
[1] = VMINPS(src
[1], dst
[1]);
278 out
[2] = VMINPS(src
[2], dst
[2]);
279 out
[3] = VMINPS(src
[3], dst
[3]);
283 out
[0] = VMAXPS(src
[0], dst
[0]);
284 out
[1] = VMAXPS(src
[1], dst
[1]);
285 out
[2] = VMAXPS(src
[2], dst
[2]);
286 out
[3] = VMAXPS(src
[3], dst
[3]);
290 SWR_INVALID("Unsupported blend operation: %d", blendOp
);
291 out
[0] = out
[1] = out
[2] = out
[3] = VIMMED1(0.0f
);
308 void LogicOpFunc(SWR_LOGIC_OP logicOp
, Value
* src
[4], Value
* dst
[4], Value
* result
[4])
310 // Op: (s == PS output, d = RT contents)
314 result
[0] = VIMMED1(0);
315 result
[1] = VIMMED1(0);
316 result
[2] = VIMMED1(0);
317 result
[3] = VIMMED1(0);
322 result
[0] = XOR(OR(src
[0], dst
[0]), VIMMED1(0xFFFFFFFF));
323 result
[1] = XOR(OR(src
[1], dst
[1]), VIMMED1(0xFFFFFFFF));
324 result
[2] = XOR(OR(src
[2], dst
[2]), VIMMED1(0xFFFFFFFF));
325 result
[3] = XOR(OR(src
[3], dst
[3]), VIMMED1(0xFFFFFFFF));
328 case LOGICOP_AND_INVERTED
:
330 // todo: use avx andnot instr when I can find the intrinsic to call
331 result
[0] = AND(XOR(src
[0], VIMMED1(0xFFFFFFFF)), dst
[0]);
332 result
[1] = AND(XOR(src
[1], VIMMED1(0xFFFFFFFF)), dst
[1]);
333 result
[2] = AND(XOR(src
[2], VIMMED1(0xFFFFFFFF)), dst
[2]);
334 result
[3] = AND(XOR(src
[3], VIMMED1(0xFFFFFFFF)), dst
[3]);
337 case LOGICOP_COPY_INVERTED
:
339 result
[0] = XOR(src
[0], VIMMED1(0xFFFFFFFF));
340 result
[1] = XOR(src
[1], VIMMED1(0xFFFFFFFF));
341 result
[2] = XOR(src
[2], VIMMED1(0xFFFFFFFF));
342 result
[3] = XOR(src
[3], VIMMED1(0xFFFFFFFF));
345 case LOGICOP_AND_REVERSE
:
347 // todo: use avx andnot instr when I can find the intrinsic to call
348 result
[0] = AND(XOR(dst
[0], VIMMED1(0xFFFFFFFF)), src
[0]);
349 result
[1] = AND(XOR(dst
[1], VIMMED1(0xFFFFFFFF)), src
[1]);
350 result
[2] = AND(XOR(dst
[2], VIMMED1(0xFFFFFFFF)), src
[2]);
351 result
[3] = AND(XOR(dst
[3], VIMMED1(0xFFFFFFFF)), src
[3]);
356 result
[0] = XOR(dst
[0], VIMMED1(0xFFFFFFFF));
357 result
[1] = XOR(dst
[1], VIMMED1(0xFFFFFFFF));
358 result
[2] = XOR(dst
[2], VIMMED1(0xFFFFFFFF));
359 result
[3] = XOR(dst
[3], VIMMED1(0xFFFFFFFF));
364 result
[0] = XOR(src
[0], dst
[0]);
365 result
[1] = XOR(src
[1], dst
[1]);
366 result
[2] = XOR(src
[2], dst
[2]);
367 result
[3] = XOR(src
[3], dst
[3]);
372 result
[0] = XOR(AND(src
[0], dst
[0]), VIMMED1(0xFFFFFFFF));
373 result
[1] = XOR(AND(src
[1], dst
[1]), VIMMED1(0xFFFFFFFF));
374 result
[2] = XOR(AND(src
[2], dst
[2]), VIMMED1(0xFFFFFFFF));
375 result
[3] = XOR(AND(src
[3], dst
[3]), VIMMED1(0xFFFFFFFF));
380 result
[0] = AND(src
[0], dst
[0]);
381 result
[1] = AND(src
[1], dst
[1]);
382 result
[2] = AND(src
[2], dst
[2]);
383 result
[3] = AND(src
[3], dst
[3]);
388 result
[0] = XOR(XOR(src
[0], dst
[0]), VIMMED1(0xFFFFFFFF));
389 result
[1] = XOR(XOR(src
[1], dst
[1]), VIMMED1(0xFFFFFFFF));
390 result
[2] = XOR(XOR(src
[2], dst
[2]), VIMMED1(0xFFFFFFFF));
391 result
[3] = XOR(XOR(src
[3], dst
[3]), VIMMED1(0xFFFFFFFF));
401 case LOGICOP_OR_INVERTED
:
403 result
[0] = OR(XOR(src
[0], VIMMED1(0xFFFFFFFF)), dst
[0]);
404 result
[1] = OR(XOR(src
[1], VIMMED1(0xFFFFFFFF)), dst
[1]);
405 result
[2] = OR(XOR(src
[2], VIMMED1(0xFFFFFFFF)), dst
[2]);
406 result
[3] = OR(XOR(src
[3], VIMMED1(0xFFFFFFFF)), dst
[3]);
416 case LOGICOP_OR_REVERSE
:
418 result
[0] = OR(XOR(dst
[0], VIMMED1(0xFFFFFFFF)), src
[0]);
419 result
[1] = OR(XOR(dst
[1], VIMMED1(0xFFFFFFFF)), src
[1]);
420 result
[2] = OR(XOR(dst
[2], VIMMED1(0xFFFFFFFF)), src
[2]);
421 result
[3] = OR(XOR(dst
[3], VIMMED1(0xFFFFFFFF)), src
[3]);
426 result
[0] = OR(src
[0], dst
[0]);
427 result
[1] = OR(src
[1], dst
[1]);
428 result
[2] = OR(src
[2], dst
[2]);
429 result
[3] = OR(src
[3], dst
[3]);
433 result
[0] = VIMMED1(0xFFFFFFFF);
434 result
[1] = VIMMED1(0xFFFFFFFF);
435 result
[2] = VIMMED1(0xFFFFFFFF);
436 result
[3] = VIMMED1(0xFFFFFFFF);
440 SWR_INVALID("Unsupported logic operation: %d", logicOp
);
441 result
[0] = result
[1] = result
[2] = result
[3] = VIMMED1(0.0f
);
446 void AlphaTest(const BLEND_COMPILE_STATE
& state
, Value
* pBlendState
, Value
* ppAlpha
, Value
* ppMask
)
448 // load uint32_t reference
449 Value
* pRef
= VBROADCAST(LOAD(pBlendState
, { 0, SWR_BLEND_STATE_alphaTestReference
}));
452 Value
* pAlpha
= LOAD(ppAlpha
);
454 Value
* pTest
= nullptr;
455 if (state
.alphaTestFormat
== ALPHA_TEST_UNORM8
)
457 // convert float alpha to unorm8
458 Value
* pAlphaU8
= FMUL(pAlpha
, VIMMED1(256.0f
));
459 pAlphaU8
= FP_TO_UI(pAlphaU8
, mSimdInt32Ty
);
462 switch (state
.alphaTestFunction
)
464 case ZFUNC_ALWAYS
: pTest
= VIMMED1(true); break;
465 case ZFUNC_NEVER
: pTest
= VIMMED1(false); break;
466 case ZFUNC_LT
: pTest
= ICMP_ULT(pAlphaU8
, pRef
); break;
467 case ZFUNC_EQ
: pTest
= ICMP_EQ(pAlphaU8
, pRef
); break;
468 case ZFUNC_LE
: pTest
= ICMP_ULE(pAlphaU8
, pRef
); break;
469 case ZFUNC_GT
: pTest
= ICMP_UGT(pAlphaU8
, pRef
); break;
470 case ZFUNC_NE
: pTest
= ICMP_NE(pAlphaU8
, pRef
); break;
471 case ZFUNC_GE
: pTest
= ICMP_UGE(pAlphaU8
, pRef
); break;
473 SWR_INVALID("Invalid alpha test function");
480 pRef
= BITCAST(pRef
, mSimdFP32Ty
);
483 switch (state
.alphaTestFunction
)
485 case ZFUNC_ALWAYS
: pTest
= VIMMED1(true); break;
486 case ZFUNC_NEVER
: pTest
= VIMMED1(false); break;
487 case ZFUNC_LT
: pTest
= FCMP_OLT(pAlpha
, pRef
); break;
488 case ZFUNC_EQ
: pTest
= FCMP_OEQ(pAlpha
, pRef
); break;
489 case ZFUNC_LE
: pTest
= FCMP_OLE(pAlpha
, pRef
); break;
490 case ZFUNC_GT
: pTest
= FCMP_OGT(pAlpha
, pRef
); break;
491 case ZFUNC_NE
: pTest
= FCMP_ONE(pAlpha
, pRef
); break;
492 case ZFUNC_GE
: pTest
= FCMP_OGE(pAlpha
, pRef
); break;
494 SWR_INVALID("Invalid alpha test function");
500 Value
* pMask
= LOAD(ppMask
);
502 // convert to int1 mask
505 // and with alpha test result
506 pMask
= AND(pMask
, pTest
);
508 // convert back to vector mask
509 pMask
= VMASK(pMask
);
512 STORE(pMask
, ppMask
);
515 Function
* Create(const BLEND_COMPILE_STATE
& state
)
517 std::stringstream
fnName("BlendShader_", std::ios_base::in
| std::ios_base::out
| std::ios_base::ate
);
518 fnName
<< ComputeCRC(0, &state
, sizeof(state
));
520 // blend function signature
521 //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
523 std::vector
<Type
*> args
{
524 PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE*
525 PointerType::get(mSimdFP32Ty
, 0), // simdvector& src
526 PointerType::get(mSimdFP32Ty
, 0), // simdvector& src1
527 PointerType::get(mSimdFP32Ty
, 0), // src0alpha
528 Type::getInt32Ty(JM()->mContext
), // sampleNum
529 PointerType::get(mSimdFP32Ty
, 0), // uint8_t* pDst
530 PointerType::get(mSimdFP32Ty
, 0), // simdvector& result
531 PointerType::get(mSimdInt32Ty
, 0), // simdscalari* oMask
532 PointerType::get(mSimdInt32Ty
, 0), // simdscalari* pMask
535 FunctionType
* fTy
= FunctionType::get(IRB()->getVoidTy(), args
, false);
536 Function
* blendFunc
= Function::Create(fTy
, GlobalValue::ExternalLinkage
, fnName
.str(), JM()->mpCurrentModule
);
537 blendFunc
->getParent()->setModuleIdentifier(blendFunc
->getName());
539 BasicBlock
* entry
= BasicBlock::Create(JM()->mContext
, "entry", blendFunc
);
541 IRB()->SetInsertPoint(entry
);
544 auto argitr
= blendFunc
->arg_begin();
545 Value
* pBlendState
= &*argitr
++;
546 pBlendState
->setName("pBlendState");
547 Value
* pSrc
= &*argitr
++;
548 pSrc
->setName("src");
549 Value
* pSrc1
= &*argitr
++;
550 pSrc1
->setName("src1");
551 Value
* pSrc0Alpha
= &*argitr
++;
552 pSrc0Alpha
->setName("src0alpha");
553 Value
* sampleNum
= &*argitr
++;
554 sampleNum
->setName("sampleNum");
555 Value
* pDst
= &*argitr
++;
556 pDst
->setName("pDst");
557 Value
* pResult
= &*argitr
++;
558 pResult
->setName("result");
559 Value
* ppoMask
= &*argitr
++;
560 ppoMask
->setName("ppoMask");
561 Value
* ppMask
= &*argitr
++;
562 ppMask
->setName("pMask");
564 static_assert(KNOB_COLOR_HOT_TILE_FORMAT
== R32G32B32A32_FLOAT
, "Unsupported hot tile format");
566 Value
* constantColor
[4];
570 for (uint32_t i
= 0; i
< 4; ++i
)
573 dst
[i
] = LOAD(pDst
, { i
});
575 // load constant color
576 constantColor
[i
] = VBROADCAST(LOAD(pBlendState
, { 0, SWR_BLEND_STATE_constantColor
, i
}));
579 src
[i
] = LOAD(pSrc
, { i
});
582 src1
[i
] = LOAD(pSrc1
, { i
});
584 Value
* currentMask
= VIMMED1(-1);
585 if (state
.desc
.alphaToCoverageEnable
)
587 Value
* pClampedSrc
= FCLAMP(src
[3], 0.0f
, 1.0f
);
588 uint32_t bits
= (1 << state
.desc
.numSamples
) - 1;
589 currentMask
= FMUL(pClampedSrc
, VBROADCAST(C((float)bits
)));
590 currentMask
= FP_TO_SI(FADD(currentMask
, VIMMED1(0.5f
)), mSimdInt32Ty
);
594 if (state
.desc
.alphaTestEnable
)
596 AlphaTest(state
, pBlendState
, pSrc0Alpha
, ppMask
);
600 if (state
.blendState
.blendEnable
)
603 Clamp(state
.format
, src
);
604 Clamp(state
.format
, src1
);
605 Clamp(state
.format
, dst
);
606 Clamp(state
.format
, constantColor
);
608 // apply defaults to hottile contents to take into account missing components
609 ApplyDefaults(state
.format
, dst
);
611 // Force defaults for unused 'X' components
612 ApplyUnusedDefaults(state
.format
, dst
);
614 // Quantize low precision components
615 Quantize(state
.format
, dst
);
617 // special case clamping for R11G11B10_float which has no sign bit
618 if (state
.format
== R11G11B10_FLOAT
)
620 dst
[0] = VMAXPS(dst
[0], VIMMED1(0.0f
));
621 dst
[1] = VMAXPS(dst
[1], VIMMED1(0.0f
));
622 dst
[2] = VMAXPS(dst
[2], VIMMED1(0.0f
));
623 dst
[3] = VMAXPS(dst
[3], VIMMED1(0.0f
));
628 if (state
.desc
.independentAlphaBlendEnable
)
630 GenerateBlendFactor
<true, false>(state
.blendState
.sourceBlendFactor
, constantColor
, src
, src1
, dst
, srcFactor
);
631 GenerateBlendFactor
<false, true>(state
.blendState
.sourceAlphaBlendFactor
, constantColor
, src
, src1
, dst
, srcFactor
);
633 GenerateBlendFactor
<true, false>(state
.blendState
.destBlendFactor
, constantColor
, src
, src1
, dst
, dstFactor
);
634 GenerateBlendFactor
<false, true>(state
.blendState
.destAlphaBlendFactor
, constantColor
, src
, src1
, dst
, dstFactor
);
636 BlendFunc
<true, false>(state
.blendState
.colorBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
637 BlendFunc
<false, true>(state
.blendState
.alphaBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
641 GenerateBlendFactor
<true, true>(state
.blendState
.sourceBlendFactor
, constantColor
, src
, src1
, dst
, srcFactor
);
642 GenerateBlendFactor
<true, true>(state
.blendState
.destBlendFactor
, constantColor
, src
, src1
, dst
, dstFactor
);
644 BlendFunc
<true, true>(state
.blendState
.colorBlendFunc
, src
, srcFactor
, dst
, dstFactor
, result
);
648 for (uint32_t i
= 0; i
< 4; ++i
)
650 STORE(result
[i
], pResult
, { i
});
654 if(state
.blendState
.logicOpEnable
)
656 const SWR_FORMAT_INFO
& info
= GetFormatInfo(state
.format
);
660 if (!state
.blendState
.blendEnable
)
662 Clamp(state
.format
, src
);
663 Clamp(state
.format
, dst
);
666 for(uint32_t i
= 0; i
< 4; i
++)
668 if (info
.type
[i
] == SWR_TYPE_UNUSED
)
673 if (info
.bpc
[i
] >= 32)
675 vMask
[i
] = VIMMED1(0xFFFFFFFF);
676 scale
[i
] = 0xFFFFFFFF;
680 vMask
[i
] = VIMMED1((1 << info
.bpc
[i
]) - 1);
681 if (info
.type
[i
] == SWR_TYPE_SNORM
)
682 scale
[i
] = (1 << (info
.bpc
[i
] - 1)) - 1;
684 scale
[i
] = (1 << info
.bpc
[i
]) - 1;
687 switch (info
.type
[i
])
690 SWR_INVALID("Unsupported type for logic op: %d", info
.type
[i
]);
693 case SWR_TYPE_UNKNOWN
:
694 case SWR_TYPE_UNUSED
:
699 src
[i
] = BITCAST(src
[i
], mSimdInt32Ty
);
700 dst
[i
] = BITCAST(dst
[i
], mSimdInt32Ty
);
704 FMUL(src
[i
], VIMMED1(scale
[i
])),
707 FMUL(dst
[i
], VIMMED1(scale
[i
])),
712 FMUL(src
[i
], VIMMED1(scale
[i
])),
715 FMUL(dst
[i
], VIMMED1(scale
[i
])),
721 LogicOpFunc(state
.blendState
.logicOpFunc
, src
, dst
, result
);
724 for(uint32_t i
= 0; i
< 4; ++i
)
726 if (info
.type
[i
] == SWR_TYPE_UNUSED
)
731 // clear upper bits from PS output not in RT format after doing logic op
732 result
[i
] = AND(result
[i
], vMask
[i
]);
734 switch (info
.type
[i
])
737 SWR_INVALID("Unsupported type for logic op: %d", info
.type
[i
]);
740 case SWR_TYPE_UNKNOWN
:
741 case SWR_TYPE_UNUSED
:
746 result
[i
] = BITCAST(result
[i
], mSimdFP32Ty
);
749 result
[i
] = SHL(result
[i
], C(32 - info
.bpc
[i
]));
750 result
[i
] = ASHR(result
[i
], C(32 - info
.bpc
[i
]));
751 result
[i
] = FMUL(SI_TO_FP(result
[i
], mSimdFP32Ty
),
752 VIMMED1(1.0f
/ scale
[i
]));
755 result
[i
] = FMUL(UI_TO_FP(result
[i
], mSimdFP32Ty
),
756 VIMMED1(1.0f
/ scale
[i
]));
760 STORE(result
[i
], pResult
, {i
});
764 if(state
.desc
.oMaskEnable
)
766 assert(!(state
.desc
.alphaToCoverageEnable
));
768 Value
* oMask
= LOAD(ppoMask
);
769 Value
* sampleMasked
= VBROADCAST(SHL(C(1), sampleNum
));
770 oMask
= AND(oMask
, sampleMasked
);
771 currentMask
= AND(oMask
, currentMask
);
774 if(state
.desc
.sampleMaskEnable
)
776 Value
* sampleMask
= LOAD(pBlendState
, { 0, SWR_BLEND_STATE_sampleMask
});
777 Value
* sampleMasked
= SHL(C(1), sampleNum
);
778 sampleMask
= AND(sampleMask
, sampleMasked
);
779 sampleMask
= VBROADCAST(ICMP_SGT(sampleMask
, C(0)));
780 sampleMask
= S_EXT(sampleMask
, mSimdInt32Ty
);
781 currentMask
= AND(sampleMask
, currentMask
);
784 if (state
.desc
.alphaToCoverageEnable
)
786 Value
* sampleMasked
= SHL(C(1), sampleNum
);
787 currentMask
= AND(currentMask
, VBROADCAST(sampleMasked
));
790 if(state
.desc
.sampleMaskEnable
|| state
.desc
.alphaToCoverageEnable
||
791 state
.desc
.oMaskEnable
)
793 // load coverage mask
794 Value
* pMask
= LOAD(ppMask
);
795 currentMask
= S_EXT(ICMP_UGT(currentMask
, VBROADCAST(C(0))), mSimdInt32Ty
);
796 Value
* outputMask
= AND(pMask
, currentMask
);
798 STORE(outputMask
, GEP(ppMask
, C(0)));
803 JitManager::DumpToFile(blendFunc
, "");
805 ::FunctionPassManager
passes(JM()->mpCurrentModule
);
807 passes
.add(createBreakCriticalEdgesPass());
808 passes
.add(createCFGSimplificationPass());
809 passes
.add(createEarlyCSEPass());
810 passes
.add(createPromoteMemoryToRegisterPass());
811 passes
.add(createCFGSimplificationPass());
812 passes
.add(createEarlyCSEPass());
813 passes
.add(createInstructionCombiningPass());
814 passes
.add(createInstructionSimplifierPass());
815 passes
.add(createConstantPropagationPass());
816 passes
.add(createSCCPPass());
817 passes
.add(createAggressiveDCEPass());
819 passes
.run(*blendFunc
);
821 JitManager::DumpToFile(blendFunc
, "optimized");
827 //////////////////////////////////////////////////////////////////////////
828 /// @brief JITs from fetch shader IR
829 /// @param hJitMgr - JitManager handle
830 /// @param func - LLVM function IR
831 /// @return PFN_FETCH_FUNC - pointer to fetch code
832 PFN_BLEND_JIT_FUNC
JitBlendFunc(HANDLE hJitMgr
, const HANDLE hFunc
)
834 const llvm::Function
*func
= (const llvm::Function
*)hFunc
;
835 JitManager
* pJitMgr
= reinterpret_cast<JitManager
*>(hJitMgr
);
836 PFN_BLEND_JIT_FUNC pfnBlend
;
837 pfnBlend
= (PFN_BLEND_JIT_FUNC
)(pJitMgr
->mpExec
->getFunctionAddress(func
->getName().str()));
838 // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
839 pJitMgr
->mIsModuleFinalized
= true;
844 //////////////////////////////////////////////////////////////////////////
845 /// @brief JIT compiles blend shader
846 /// @param hJitMgr - JitManager handle
847 /// @param state - blend state to build function from
848 extern "C" PFN_BLEND_JIT_FUNC JITCALL
JitCompileBlend(HANDLE hJitMgr
, const BLEND_COMPILE_STATE
& state
)
850 JitManager
* pJitMgr
= reinterpret_cast<JitManager
*>(hJitMgr
);
852 pJitMgr
->SetupNewModule();
854 BlendJit
theJit(pJitMgr
);
855 HANDLE hFunc
= theJit
.Create(state
);
857 return JitBlendFunc(hJitMgr
, hFunc
);