427884004f5f90e6227fe7b4fd9ec3fd4e887788
[mesa.git] / src / gallium / drivers / swr / rasterizer / jitter / blend_jit.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file blend_jit.cpp
24 *
25 * @brief Implementation of the blend jitter
26 *
27 * Notes:
28 *
29 ******************************************************************************/
30 #include "builder.h"
31 #include "jit_api.h"
32 #include "blend_jit.h"
33 #include "gen_state_llvm.h"
34
35 #include <sstream>
36
37 // components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
38 #define QUANTIZE_THRESHOLD 2
39
40 using namespace llvm;
41 using namespace SwrJit;
42
43 //////////////////////////////////////////////////////////////////////////
44 /// Interface to Jitting a blend shader
45 //////////////////////////////////////////////////////////////////////////
46 struct BlendJit : public Builder
47 {
48 BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
49
50 template<bool Color, bool Alpha>
51 void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4])
52 {
53 Value* out[4];
54
55 switch (factor)
56 {
57 case BLENDFACTOR_ONE:
58 out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f);
59 break;
60 case BLENDFACTOR_SRC_COLOR:
61 out[0] = src[0];
62 out[1] = src[1];
63 out[2] = src[2];
64 out[3] = src[3];
65 break;
66 case BLENDFACTOR_SRC_ALPHA:
67 out[0] = out[1] = out[2] = out[3] = src[3];
68 break;
69 case BLENDFACTOR_DST_ALPHA:
70 out[0] = out[1] = out[2] = out[3] = dst[3];
71 break;
72 case BLENDFACTOR_DST_COLOR:
73 out[0] = dst[0];
74 out[1] = dst[1];
75 out[2] = dst[2];
76 out[3] = dst[3];
77 break;
78 case BLENDFACTOR_SRC_ALPHA_SATURATE:
79 out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
80 out[3] = VIMMED1(1.0f);
81 break;
82 case BLENDFACTOR_CONST_COLOR:
83 out[0] = constColor[0];
84 out[1] = constColor[1];
85 out[2] = constColor[2];
86 out[3] = constColor[3];
87 break;
88 case BLENDFACTOR_CONST_ALPHA:
89 out[0] = out[1] = out[2] = out[3] = constColor[3];
90 break;
91 case BLENDFACTOR_SRC1_COLOR:
92 out[0] = src1[0];
93 out[1] = src1[1];
94 out[2] = src1[2];
95 out[3] = src1[3];
96 break;
97 case BLENDFACTOR_SRC1_ALPHA:
98 out[0] = out[1] = out[2] = out[3] = src1[3];
99 break;
100 case BLENDFACTOR_ZERO:
101 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
102 break;
103 case BLENDFACTOR_INV_SRC_COLOR:
104 out[0] = FSUB(VIMMED1(1.0f), src[0]);
105 out[1] = FSUB(VIMMED1(1.0f), src[1]);
106 out[2] = FSUB(VIMMED1(1.0f), src[2]);
107 out[3] = FSUB(VIMMED1(1.0f), src[3]);
108 break;
109 case BLENDFACTOR_INV_SRC_ALPHA:
110 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]);
111 break;
112 case BLENDFACTOR_INV_DST_ALPHA:
113 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]);
114 break;
115 case BLENDFACTOR_INV_DST_COLOR:
116 out[0] = FSUB(VIMMED1(1.0f), dst[0]);
117 out[1] = FSUB(VIMMED1(1.0f), dst[1]);
118 out[2] = FSUB(VIMMED1(1.0f), dst[2]);
119 out[3] = FSUB(VIMMED1(1.0f), dst[3]);
120 break;
121 case BLENDFACTOR_INV_CONST_COLOR:
122 out[0] = FSUB(VIMMED1(1.0f), constColor[0]);
123 out[1] = FSUB(VIMMED1(1.0f), constColor[1]);
124 out[2] = FSUB(VIMMED1(1.0f), constColor[2]);
125 out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
126 break;
127 case BLENDFACTOR_INV_CONST_ALPHA:
128 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
129 break;
130 case BLENDFACTOR_INV_SRC1_COLOR:
131 out[0] = FSUB(VIMMED1(1.0f), src1[0]);
132 out[1] = FSUB(VIMMED1(1.0f), src1[1]);
133 out[2] = FSUB(VIMMED1(1.0f), src1[2]);
134 out[3] = FSUB(VIMMED1(1.0f), src1[3]);
135 break;
136 case BLENDFACTOR_INV_SRC1_ALPHA:
137 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]);
138 break;
139 default:
140 SWR_INVALID("Unsupported blend factor: %d", factor);
141 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
142 break;
143 }
144
145 if (Color)
146 {
147 result[0] = out[0];
148 result[1] = out[1];
149 result[2] = out[2];
150 }
151
152 if (Alpha)
153 {
154 result[3] = out[3];
155 }
156 }
157
158 void Clamp(SWR_FORMAT format, Value* src[4])
159 {
160 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
161 SWR_TYPE type = info.type[0];
162
163 switch (type)
164 {
165 default:
166 break;
167
168 case SWR_TYPE_UNORM:
169 src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f));
170 src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f));
171 src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f));
172 src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f));
173 break;
174
175 case SWR_TYPE_SNORM:
176 src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f));
177 src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f));
178 src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f));
179 src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
180 break;
181
182 case SWR_TYPE_UNKNOWN: SWR_INVALID("Unsupport format type: %d", type);
183 }
184 }
185
186 void ApplyDefaults(SWR_FORMAT format, Value* src[4])
187 {
188 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
189
190 bool valid[] = { false, false, false, false };
191 for (uint32_t c = 0; c < info.numComps; ++c)
192 {
193 valid[info.swizzle[c]] = true;
194 }
195
196 for (uint32_t c = 0; c < 4; ++c)
197 {
198 if (!valid[c])
199 {
200 src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty);
201 }
202 }
203 }
204
205 void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4])
206 {
207 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
208
209 for (uint32_t c = 0; c < info.numComps; ++c)
210 {
211 if (info.type[c] == SWR_TYPE_UNUSED)
212 {
213 src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
214 }
215 }
216 }
217
218 void Quantize(SWR_FORMAT format, Value* src[4])
219 {
220 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
221 for (uint32_t c = 0; c < info.numComps; ++c)
222 {
223 if (info.bpc[c] <= QUANTIZE_THRESHOLD && info.type[c] != SWR_TYPE_UNUSED)
224 {
225 uint32_t swizComp = info.swizzle[c];
226 float factor = (float)((1 << info.bpc[c]) - 1);
227 switch (info.type[c])
228 {
229 case SWR_TYPE_UNORM:
230 src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
231 src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
232 src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor));
233 break;
234 default: SWR_INVALID("Unsupported format type: %d", info.type[c]);
235 }
236 }
237 }
238 }
239
240 template<bool Color, bool Alpha>
241 void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4])
242 {
243 Value* out[4];
244 Value* srcBlend[4];
245 Value* dstBlend[4];
246 for (uint32_t i = 0; i < 4; ++i)
247 {
248 srcBlend[i] = FMUL(src[i], srcFactor[i]);
249 dstBlend[i] = FMUL(dst[i], dstFactor[i]);
250 }
251
252 switch (blendOp)
253 {
254 case BLENDOP_ADD:
255 out[0] = FADD(srcBlend[0], dstBlend[0]);
256 out[1] = FADD(srcBlend[1], dstBlend[1]);
257 out[2] = FADD(srcBlend[2], dstBlend[2]);
258 out[3] = FADD(srcBlend[3], dstBlend[3]);
259 break;
260
261 case BLENDOP_SUBTRACT:
262 out[0] = FSUB(srcBlend[0], dstBlend[0]);
263 out[1] = FSUB(srcBlend[1], dstBlend[1]);
264 out[2] = FSUB(srcBlend[2], dstBlend[2]);
265 out[3] = FSUB(srcBlend[3], dstBlend[3]);
266 break;
267
268 case BLENDOP_REVSUBTRACT:
269 out[0] = FSUB(dstBlend[0], srcBlend[0]);
270 out[1] = FSUB(dstBlend[1], srcBlend[1]);
271 out[2] = FSUB(dstBlend[2], srcBlend[2]);
272 out[3] = FSUB(dstBlend[3], srcBlend[3]);
273 break;
274
275 case BLENDOP_MIN:
276 out[0] = VMINPS(src[0], dst[0]);
277 out[1] = VMINPS(src[1], dst[1]);
278 out[2] = VMINPS(src[2], dst[2]);
279 out[3] = VMINPS(src[3], dst[3]);
280 break;
281
282 case BLENDOP_MAX:
283 out[0] = VMAXPS(src[0], dst[0]);
284 out[1] = VMAXPS(src[1], dst[1]);
285 out[2] = VMAXPS(src[2], dst[2]);
286 out[3] = VMAXPS(src[3], dst[3]);
287 break;
288
289 default:
290 SWR_INVALID("Unsupported blend operation: %d", blendOp);
291 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
292 break;
293 }
294
295 if (Color)
296 {
297 result[0] = out[0];
298 result[1] = out[1];
299 result[2] = out[2];
300 }
301
302 if (Alpha)
303 {
304 result[3] = out[3];
305 }
306 }
307
308 void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
309 {
310 // Op: (s == PS output, d = RT contents)
311 switch(logicOp)
312 {
313 case LOGICOP_CLEAR:
314 result[0] = VIMMED1(0);
315 result[1] = VIMMED1(0);
316 result[2] = VIMMED1(0);
317 result[3] = VIMMED1(0);
318 break;
319
320 case LOGICOP_NOR:
321 // ~(s | d)
322 result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
323 result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
324 result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
325 result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
326 break;
327
328 case LOGICOP_AND_INVERTED:
329 // ~s & d
330 // todo: use avx andnot instr when I can find the intrinsic to call
331 result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
332 result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
333 result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
334 result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
335 break;
336
337 case LOGICOP_COPY_INVERTED:
338 // ~s
339 result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF));
340 result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF));
341 result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF));
342 result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF));
343 break;
344
345 case LOGICOP_AND_REVERSE:
346 // s & ~d
347 // todo: use avx andnot instr when I can find the intrinsic to call
348 result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
349 result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
350 result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
351 result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
352 break;
353
354 case LOGICOP_INVERT:
355 // ~d
356 result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF));
357 result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF));
358 result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF));
359 result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF));
360 break;
361
362 case LOGICOP_XOR:
363 // s ^ d
364 result[0] = XOR(src[0], dst[0]);
365 result[1] = XOR(src[1], dst[1]);
366 result[2] = XOR(src[2], dst[2]);
367 result[3] = XOR(src[3], dst[3]);
368 break;
369
370 case LOGICOP_NAND:
371 // ~(s & d)
372 result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
373 result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
374 result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
375 result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
376 break;
377
378 case LOGICOP_AND:
379 // s & d
380 result[0] = AND(src[0], dst[0]);
381 result[1] = AND(src[1], dst[1]);
382 result[2] = AND(src[2], dst[2]);
383 result[3] = AND(src[3], dst[3]);
384 break;
385
386 case LOGICOP_EQUIV:
387 // ~(s ^ d)
388 result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
389 result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
390 result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
391 result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
392 break;
393
394 case LOGICOP_NOOP:
395 result[0] = dst[0];
396 result[1] = dst[1];
397 result[2] = dst[2];
398 result[3] = dst[3];
399 break;
400
401 case LOGICOP_OR_INVERTED:
402 // ~s | d
403 result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
404 result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
405 result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
406 result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
407 break;
408
409 case LOGICOP_COPY:
410 result[0] = src[0];
411 result[1] = src[1];
412 result[2] = src[2];
413 result[3] = src[3];
414 break;
415
416 case LOGICOP_OR_REVERSE:
417 // s | ~d
418 result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
419 result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
420 result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
421 result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
422 break;
423
424 case LOGICOP_OR:
425 // s | d
426 result[0] = OR(src[0], dst[0]);
427 result[1] = OR(src[1], dst[1]);
428 result[2] = OR(src[2], dst[2]);
429 result[3] = OR(src[3], dst[3]);
430 break;
431
432 case LOGICOP_SET:
433 result[0] = VIMMED1(0xFFFFFFFF);
434 result[1] = VIMMED1(0xFFFFFFFF);
435 result[2] = VIMMED1(0xFFFFFFFF);
436 result[3] = VIMMED1(0xFFFFFFFF);
437 break;
438
439 default:
440 SWR_INVALID("Unsupported logic operation: %d", logicOp);
441 result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f);
442 break;
443 }
444 }
445
446 void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* ppAlpha, Value* ppMask)
447 {
448 // load uint32_t reference
449 Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference }));
450
451 // load alpha
452 Value* pAlpha = LOAD(ppAlpha);
453
454 Value* pTest = nullptr;
455 if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
456 {
457 // convert float alpha to unorm8
458 Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
459 pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
460
461 // compare
462 switch (state.alphaTestFunction)
463 {
464 case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
465 case ZFUNC_NEVER: pTest = VIMMED1(false); break;
466 case ZFUNC_LT: pTest = ICMP_ULT(pAlphaU8, pRef); break;
467 case ZFUNC_EQ: pTest = ICMP_EQ(pAlphaU8, pRef); break;
468 case ZFUNC_LE: pTest = ICMP_ULE(pAlphaU8, pRef); break;
469 case ZFUNC_GT: pTest = ICMP_UGT(pAlphaU8, pRef); break;
470 case ZFUNC_NE: pTest = ICMP_NE(pAlphaU8, pRef); break;
471 case ZFUNC_GE: pTest = ICMP_UGE(pAlphaU8, pRef); break;
472 default:
473 SWR_INVALID("Invalid alpha test function");
474 break;
475 }
476 }
477 else
478 {
479 // cast ref to float
480 pRef = BITCAST(pRef, mSimdFP32Ty);
481
482 // compare
483 switch (state.alphaTestFunction)
484 {
485 case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
486 case ZFUNC_NEVER: pTest = VIMMED1(false); break;
487 case ZFUNC_LT: pTest = FCMP_OLT(pAlpha, pRef); break;
488 case ZFUNC_EQ: pTest = FCMP_OEQ(pAlpha, pRef); break;
489 case ZFUNC_LE: pTest = FCMP_OLE(pAlpha, pRef); break;
490 case ZFUNC_GT: pTest = FCMP_OGT(pAlpha, pRef); break;
491 case ZFUNC_NE: pTest = FCMP_ONE(pAlpha, pRef); break;
492 case ZFUNC_GE: pTest = FCMP_OGE(pAlpha, pRef); break;
493 default:
494 SWR_INVALID("Invalid alpha test function");
495 break;
496 }
497 }
498
499 // load current mask
500 Value* pMask = LOAD(ppMask);
501
502 // convert to int1 mask
503 pMask = MASK(pMask);
504
505 // and with alpha test result
506 pMask = AND(pMask, pTest);
507
508 // convert back to vector mask
509 pMask = VMASK(pMask);
510
511 // store new mask
512 STORE(pMask, ppMask);
513 }
514
515 Function* Create(const BLEND_COMPILE_STATE& state)
516 {
517 std::stringstream fnName("BlendShader_", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
518 fnName << ComputeCRC(0, &state, sizeof(state));
519
520 // blend function signature
521 //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
522
523 std::vector<Type*> args{
524 PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE*
525 PointerType::get(mSimdFP32Ty, 0), // simdvector& src
526 PointerType::get(mSimdFP32Ty, 0), // simdvector& src1
527 PointerType::get(mSimdFP32Ty, 0), // src0alpha
528 Type::getInt32Ty(JM()->mContext), // sampleNum
529 PointerType::get(mSimdFP32Ty, 0), // uint8_t* pDst
530 PointerType::get(mSimdFP32Ty, 0), // simdvector& result
531 PointerType::get(mSimdInt32Ty, 0), // simdscalari* oMask
532 PointerType::get(mSimdInt32Ty, 0), // simdscalari* pMask
533 };
534
535 FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
536 Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
537 blendFunc->getParent()->setModuleIdentifier(blendFunc->getName());
538
539 BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
540
541 IRB()->SetInsertPoint(entry);
542
543 // arguments
544 auto argitr = blendFunc->arg_begin();
545 Value* pBlendState = &*argitr++;
546 pBlendState->setName("pBlendState");
547 Value* pSrc = &*argitr++;
548 pSrc->setName("src");
549 Value* pSrc1 = &*argitr++;
550 pSrc1->setName("src1");
551 Value* pSrc0Alpha = &*argitr++;
552 pSrc0Alpha->setName("src0alpha");
553 Value* sampleNum = &*argitr++;
554 sampleNum->setName("sampleNum");
555 Value* pDst = &*argitr++;
556 pDst->setName("pDst");
557 Value* pResult = &*argitr++;
558 pResult->setName("result");
559 Value* ppoMask = &*argitr++;
560 ppoMask->setName("ppoMask");
561 Value* ppMask = &*argitr++;
562 ppMask->setName("pMask");
563
564 static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
565 Value* dst[4];
566 Value* constantColor[4];
567 Value* src[4];
568 Value* src1[4];
569 Value* result[4];
570 for (uint32_t i = 0; i < 4; ++i)
571 {
572 // load hot tile
573 dst[i] = LOAD(pDst, { i });
574
575 // load constant color
576 constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i }));
577
578 // load src
579 src[i] = LOAD(pSrc, { i });
580
581 // load src1
582 src1[i] = LOAD(pSrc1, { i });
583 }
584 Value* currentMask = VIMMED1(-1);
585 if (state.desc.alphaToCoverageEnable)
586 {
587 Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
588 uint32_t bits = (1 << state.desc.numSamples) - 1;
589 currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
590 currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty);
591 }
592
593 // alpha test
594 if (state.desc.alphaTestEnable)
595 {
596 AlphaTest(state, pBlendState, pSrc0Alpha, ppMask);
597 }
598
599 // color blend
600 if (state.blendState.blendEnable)
601 {
602 // clamp sources
603 Clamp(state.format, src);
604 Clamp(state.format, src1);
605 Clamp(state.format, dst);
606 Clamp(state.format, constantColor);
607
608 // apply defaults to hottile contents to take into account missing components
609 ApplyDefaults(state.format, dst);
610
611 // Force defaults for unused 'X' components
612 ApplyUnusedDefaults(state.format, dst);
613
614 // Quantize low precision components
615 Quantize(state.format, dst);
616
617 // special case clamping for R11G11B10_float which has no sign bit
618 if (state.format == R11G11B10_FLOAT)
619 {
620 dst[0] = VMAXPS(dst[0], VIMMED1(0.0f));
621 dst[1] = VMAXPS(dst[1], VIMMED1(0.0f));
622 dst[2] = VMAXPS(dst[2], VIMMED1(0.0f));
623 dst[3] = VMAXPS(dst[3], VIMMED1(0.0f));
624 }
625
626 Value* srcFactor[4];
627 Value* dstFactor[4];
628 if (state.desc.independentAlphaBlendEnable)
629 {
630 GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
631 GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor);
632
633 GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
634 GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor);
635
636 BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
637 BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
638 }
639 else
640 {
641 GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
642 GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
643
644 BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
645 }
646
647 // store results out
648 for (uint32_t i = 0; i < 4; ++i)
649 {
650 STORE(result[i], pResult, { i });
651 }
652 }
653
654 if(state.blendState.logicOpEnable)
655 {
656 const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
657 Value* vMask[4];
658 float scale[4];
659
660 if (!state.blendState.blendEnable)
661 {
662 Clamp(state.format, src);
663 Clamp(state.format, dst);
664 }
665
666 for(uint32_t i = 0; i < 4; i++)
667 {
668 if (info.type[i] == SWR_TYPE_UNUSED)
669 {
670 continue;
671 }
672
673 if (info.bpc[i] >= 32)
674 {
675 vMask[i] = VIMMED1(0xFFFFFFFF);
676 scale[i] = 0xFFFFFFFF;
677 }
678 else
679 {
680 vMask[i] = VIMMED1((1 << info.bpc[i]) - 1);
681 if (info.type[i] == SWR_TYPE_SNORM)
682 scale[i] = (1 << (info.bpc[i] - 1)) - 1;
683 else
684 scale[i] = (1 << info.bpc[i]) - 1;
685 }
686
687 switch (info.type[i])
688 {
689 default:
690 SWR_INVALID("Unsupported type for logic op: %d", info.type[i]);
691 break;
692
693 case SWR_TYPE_UNKNOWN:
694 case SWR_TYPE_UNUSED:
695 // fallthrough
696
697 case SWR_TYPE_UINT:
698 case SWR_TYPE_SINT:
699 src[i] = BITCAST(src[i], mSimdInt32Ty);
700 dst[i] = BITCAST(dst[i], mSimdInt32Ty);
701 break;
702 case SWR_TYPE_SNORM:
703 src[i] = FP_TO_SI(
704 FMUL(src[i], VIMMED1(scale[i])),
705 mSimdInt32Ty);
706 dst[i] = FP_TO_SI(
707 FMUL(dst[i], VIMMED1(scale[i])),
708 mSimdInt32Ty);
709 break;
710 case SWR_TYPE_UNORM:
711 src[i] = FP_TO_UI(
712 FMUL(src[i], VIMMED1(scale[i])),
713 mSimdInt32Ty);
714 dst[i] = FP_TO_UI(
715 FMUL(dst[i], VIMMED1(scale[i])),
716 mSimdInt32Ty);
717 break;
718 }
719 }
720
721 LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
722
723 // store results out
724 for(uint32_t i = 0; i < 4; ++i)
725 {
726 if (info.type[i] == SWR_TYPE_UNUSED)
727 {
728 continue;
729 }
730
731 // clear upper bits from PS output not in RT format after doing logic op
732 result[i] = AND(result[i], vMask[i]);
733
734 switch (info.type[i])
735 {
736 default:
737 SWR_INVALID("Unsupported type for logic op: %d", info.type[i]);
738 break;
739
740 case SWR_TYPE_UNKNOWN:
741 case SWR_TYPE_UNUSED:
742 // fallthrough
743
744 case SWR_TYPE_UINT:
745 case SWR_TYPE_SINT:
746 result[i] = BITCAST(result[i], mSimdFP32Ty);
747 break;
748 case SWR_TYPE_SNORM:
749 result[i] = SHL(result[i], C(32 - info.bpc[i]));
750 result[i] = ASHR(result[i], C(32 - info.bpc[i]));
751 result[i] = FMUL(SI_TO_FP(result[i], mSimdFP32Ty),
752 VIMMED1(1.0f / scale[i]));
753 break;
754 case SWR_TYPE_UNORM:
755 result[i] = FMUL(UI_TO_FP(result[i], mSimdFP32Ty),
756 VIMMED1(1.0f / scale[i]));
757 break;
758 }
759
760 STORE(result[i], pResult, {i});
761 }
762 }
763
764 if(state.desc.oMaskEnable)
765 {
766 assert(!(state.desc.alphaToCoverageEnable));
767 // load current mask
768 Value* oMask = LOAD(ppoMask);
769 Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum));
770 oMask = AND(oMask, sampleMasked);
771 currentMask = AND(oMask, currentMask);
772 }
773
774 if(state.desc.sampleMaskEnable)
775 {
776 Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask});
777 Value* sampleMasked = SHL(C(1), sampleNum);
778 sampleMask = AND(sampleMask, sampleMasked);
779 sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0)));
780 sampleMask = S_EXT(sampleMask, mSimdInt32Ty);
781 currentMask = AND(sampleMask, currentMask);
782 }
783
784 if (state.desc.alphaToCoverageEnable)
785 {
786 Value* sampleMasked = SHL(C(1), sampleNum);
787 currentMask = AND(currentMask, VBROADCAST(sampleMasked));
788 }
789
790 if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
791 state.desc.oMaskEnable)
792 {
793 // load coverage mask
794 Value* pMask = LOAD(ppMask);
795 currentMask = S_EXT(ICMP_UGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty);
796 Value* outputMask = AND(pMask, currentMask);
797 // store new mask
798 STORE(outputMask, GEP(ppMask, C(0)));
799 }
800
801 RET_VOID();
802
803 JitManager::DumpToFile(blendFunc, "");
804
805 ::FunctionPassManager passes(JM()->mpCurrentModule);
806
807 passes.add(createBreakCriticalEdgesPass());
808 passes.add(createCFGSimplificationPass());
809 passes.add(createEarlyCSEPass());
810 passes.add(createPromoteMemoryToRegisterPass());
811 passes.add(createCFGSimplificationPass());
812 passes.add(createEarlyCSEPass());
813 passes.add(createInstructionCombiningPass());
814 passes.add(createInstructionSimplifierPass());
815 passes.add(createConstantPropagationPass());
816 passes.add(createSCCPPass());
817 passes.add(createAggressiveDCEPass());
818
819 passes.run(*blendFunc);
820
821 JitManager::DumpToFile(blendFunc, "optimized");
822
823 return blendFunc;
824 }
825 };
826
827 //////////////////////////////////////////////////////////////////////////
828 /// @brief JITs from fetch shader IR
829 /// @param hJitMgr - JitManager handle
830 /// @param func - LLVM function IR
831 /// @return PFN_FETCH_FUNC - pointer to fetch code
832 PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
833 {
834 const llvm::Function *func = (const llvm::Function*)hFunc;
835 JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
836 PFN_BLEND_JIT_FUNC pfnBlend;
837 pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
838 // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
839 pJitMgr->mIsModuleFinalized = true;
840
841 return pfnBlend;
842 }
843
844 //////////////////////////////////////////////////////////////////////////
845 /// @brief JIT compiles blend shader
846 /// @param hJitMgr - JitManager handle
847 /// @param state - blend state to build function from
848 extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state)
849 {
850 JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
851
852 pJitMgr->SetupNewModule();
853
854 BlendJit theJit(pJitMgr);
855 HANDLE hFunc = theJit.Create(state);
856
857 return JitBlendFunc(hJitMgr, hFunc);
858 }