swr: [rasterizer] add support for llvm-3.9
[mesa.git] / src / gallium / drivers / swr / rasterizer / jitter / blend_jit.cpp
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file blend_jit.cpp
24 *
25 * @brief Implementation of the blend jitter
26 *
27 * Notes:
28 *
29 ******************************************************************************/
30 #include "jit_api.h"
31 #include "blend_jit.h"
32 #include "builder.h"
33 #include "state_llvm.h"
34
35 #include <sstream>
36
37 // components with bit-widths <= the QUANTIZE_THRESHOLD will be quantized
38 #define QUANTIZE_THRESHOLD 2
39
40 //////////////////////////////////////////////////////////////////////////
41 /// Interface to Jitting a blend shader
42 //////////////////////////////////////////////////////////////////////////
43 struct BlendJit : public Builder
44 {
45 BlendJit(JitManager* pJitMgr) : Builder(pJitMgr){};
46
47 template<bool Color, bool Alpha>
48 void GenerateBlendFactor(SWR_BLEND_FACTOR factor, Value* constColor[4], Value* src[4], Value* src1[4], Value* dst[4], Value* result[4])
49 {
50 Value* out[4];
51
52 switch (factor)
53 {
54 case BLENDFACTOR_ONE:
55 out[0] = out[1] = out[2] = out[3] = VIMMED1(1.0f);
56 break;
57 case BLENDFACTOR_SRC_COLOR:
58 out[0] = src[0];
59 out[1] = src[1];
60 out[2] = src[2];
61 out[3] = src[3];
62 break;
63 case BLENDFACTOR_SRC_ALPHA:
64 out[0] = out[1] = out[2] = out[3] = src[3];
65 break;
66 case BLENDFACTOR_DST_ALPHA:
67 out[0] = out[1] = out[2] = out[3] = dst[3];
68 break;
69 case BLENDFACTOR_DST_COLOR:
70 out[0] = dst[0];
71 out[1] = dst[1];
72 out[2] = dst[2];
73 out[3] = dst[3];
74 break;
75 case BLENDFACTOR_SRC_ALPHA_SATURATE:
76 out[0] = out[1] = out[2] = VMINPS(src[3], FSUB(VIMMED1(1.0f), dst[3]));
77 out[3] = VIMMED1(1.0f);
78 break;
79 case BLENDFACTOR_CONST_COLOR:
80 out[0] = constColor[0];
81 out[1] = constColor[1];
82 out[2] = constColor[2];
83 out[3] = constColor[3];
84 break;
85 case BLENDFACTOR_CONST_ALPHA:
86 out[0] = out[1] = out[2] = out[3] = constColor[3];
87 break;
88 case BLENDFACTOR_SRC1_COLOR:
89 out[0] = src1[0];
90 out[1] = src1[1];
91 out[2] = src1[2];
92 out[3] = src1[3];
93 break;
94 case BLENDFACTOR_SRC1_ALPHA:
95 out[0] = out[1] = out[2] = out[3] = src1[3];
96 break;
97 case BLENDFACTOR_ZERO:
98 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
99 break;
100 case BLENDFACTOR_INV_SRC_COLOR:
101 out[0] = FSUB(VIMMED1(1.0f), src[0]);
102 out[1] = FSUB(VIMMED1(1.0f), src[1]);
103 out[2] = FSUB(VIMMED1(1.0f), src[2]);
104 out[3] = FSUB(VIMMED1(1.0f), src[3]);
105 break;
106 case BLENDFACTOR_INV_SRC_ALPHA:
107 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src[3]);
108 break;
109 case BLENDFACTOR_INV_DST_ALPHA:
110 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), dst[3]);
111 break;
112 case BLENDFACTOR_INV_DST_COLOR:
113 out[0] = FSUB(VIMMED1(1.0f), dst[0]);
114 out[1] = FSUB(VIMMED1(1.0f), dst[1]);
115 out[2] = FSUB(VIMMED1(1.0f), dst[2]);
116 out[3] = FSUB(VIMMED1(1.0f), dst[3]);
117 break;
118 case BLENDFACTOR_INV_CONST_COLOR:
119 out[0] = FSUB(VIMMED1(1.0f), constColor[0]);
120 out[1] = FSUB(VIMMED1(1.0f), constColor[1]);
121 out[2] = FSUB(VIMMED1(1.0f), constColor[2]);
122 out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
123 break;
124 case BLENDFACTOR_INV_CONST_ALPHA:
125 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), constColor[3]);
126 break;
127 case BLENDFACTOR_INV_SRC1_COLOR:
128 out[0] = FSUB(VIMMED1(1.0f), src1[0]);
129 out[1] = FSUB(VIMMED1(1.0f), src1[1]);
130 out[2] = FSUB(VIMMED1(1.0f), src1[2]);
131 out[3] = FSUB(VIMMED1(1.0f), src1[3]);
132 break;
133 case BLENDFACTOR_INV_SRC1_ALPHA:
134 out[0] = out[1] = out[2] = out[3] = FSUB(VIMMED1(1.0f), src1[3]);
135 break;
136 default:
137 SWR_ASSERT(false, "Unsupported blend factor: %d", factor);
138 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
139 break;
140 }
141
142 if (Color)
143 {
144 result[0] = out[0];
145 result[1] = out[1];
146 result[2] = out[2];
147 }
148
149 if (Alpha)
150 {
151 result[3] = out[3];
152 }
153 }
154
155 void Clamp(SWR_FORMAT format, Value* src[4])
156 {
157 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
158 SWR_TYPE type = info.type[0];
159
160 switch (type)
161 {
162 case SWR_TYPE_FLOAT:
163 break;
164
165 case SWR_TYPE_UNORM:
166 src[0] = VMINPS(VMAXPS(src[0], VIMMED1(0.0f)), VIMMED1(1.0f));
167 src[1] = VMINPS(VMAXPS(src[1], VIMMED1(0.0f)), VIMMED1(1.0f));
168 src[2] = VMINPS(VMAXPS(src[2], VIMMED1(0.0f)), VIMMED1(1.0f));
169 src[3] = VMINPS(VMAXPS(src[3], VIMMED1(0.0f)), VIMMED1(1.0f));
170 break;
171
172 case SWR_TYPE_SNORM:
173 src[0] = VMINPS(VMAXPS(src[0], VIMMED1(-1.0f)), VIMMED1(1.0f));
174 src[1] = VMINPS(VMAXPS(src[1], VIMMED1(-1.0f)), VIMMED1(1.0f));
175 src[2] = VMINPS(VMAXPS(src[2], VIMMED1(-1.0f)), VIMMED1(1.0f));
176 src[3] = VMINPS(VMAXPS(src[3], VIMMED1(-1.0f)), VIMMED1(1.0f));
177 break;
178
179 default: SWR_ASSERT(false, "Unsupport format type: %d", type);
180 }
181 }
182
183 void ApplyDefaults(SWR_FORMAT format, Value* src[4])
184 {
185 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
186
187 bool valid[] = { false, false, false, false };
188 for (uint32_t c = 0; c < info.numComps; ++c)
189 {
190 valid[info.swizzle[c]] = true;
191 }
192
193 for (uint32_t c = 0; c < 4; ++c)
194 {
195 if (!valid[c])
196 {
197 src[c] = BITCAST(VIMMED1((int)info.defaults[c]), mSimdFP32Ty);
198 }
199 }
200 }
201
202 void ApplyUnusedDefaults(SWR_FORMAT format, Value* src[4])
203 {
204 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
205
206 for (uint32_t c = 0; c < info.numComps; ++c)
207 {
208 if (info.type[c] == SWR_TYPE_UNUSED)
209 {
210 src[info.swizzle[c]] = BITCAST(VIMMED1((int)info.defaults[info.swizzle[c]]), mSimdFP32Ty);
211 }
212 }
213 }
214
215 void Quantize(SWR_FORMAT format, Value* src[4])
216 {
217 const SWR_FORMAT_INFO& info = GetFormatInfo(format);
218 for (uint32_t c = 0; c < info.numComps; ++c)
219 {
220 if (info.bpc[c] <= QUANTIZE_THRESHOLD)
221 {
222 uint32_t swizComp = info.swizzle[c];
223 float factor = (float)((1 << info.bpc[c]) - 1);
224 switch (info.type[c])
225 {
226 case SWR_TYPE_UNORM:
227 src[swizComp] = FADD(FMUL(src[swizComp], VIMMED1(factor)), VIMMED1(0.5f));
228 src[swizComp] = VROUND(src[swizComp], C(_MM_FROUND_TO_ZERO));
229 src[swizComp] = FMUL(src[swizComp], VIMMED1(1.0f /factor));
230 break;
231 default: SWR_ASSERT(false, "Unsupported format type: %d", info.type[c]);
232 }
233 }
234 }
235 }
236
237 template<bool Color, bool Alpha>
238 void BlendFunc(SWR_BLEND_OP blendOp, Value* src[4], Value* srcFactor[4], Value* dst[4], Value* dstFactor[4], Value* result[4])
239 {
240 Value* out[4];
241 Value* srcBlend[4];
242 Value* dstBlend[4];
243 for (uint32_t i = 0; i < 4; ++i)
244 {
245 srcBlend[i] = FMUL(src[i], srcFactor[i]);
246 dstBlend[i] = FMUL(dst[i], dstFactor[i]);
247 }
248
249 switch (blendOp)
250 {
251 case BLENDOP_ADD:
252 out[0] = FADD(srcBlend[0], dstBlend[0]);
253 out[1] = FADD(srcBlend[1], dstBlend[1]);
254 out[2] = FADD(srcBlend[2], dstBlend[2]);
255 out[3] = FADD(srcBlend[3], dstBlend[3]);
256 break;
257
258 case BLENDOP_SUBTRACT:
259 out[0] = FSUB(srcBlend[0], dstBlend[0]);
260 out[1] = FSUB(srcBlend[1], dstBlend[1]);
261 out[2] = FSUB(srcBlend[2], dstBlend[2]);
262 out[3] = FSUB(srcBlend[3], dstBlend[3]);
263 break;
264
265 case BLENDOP_REVSUBTRACT:
266 out[0] = FSUB(dstBlend[0], srcBlend[0]);
267 out[1] = FSUB(dstBlend[1], srcBlend[1]);
268 out[2] = FSUB(dstBlend[2], srcBlend[2]);
269 out[3] = FSUB(dstBlend[3], srcBlend[3]);
270 break;
271
272 case BLENDOP_MIN:
273 out[0] = VMINPS(src[0], dst[0]);
274 out[1] = VMINPS(src[1], dst[1]);
275 out[2] = VMINPS(src[2], dst[2]);
276 out[3] = VMINPS(src[3], dst[3]);
277 break;
278
279 case BLENDOP_MAX:
280 out[0] = VMAXPS(src[0], dst[0]);
281 out[1] = VMAXPS(src[1], dst[1]);
282 out[2] = VMAXPS(src[2], dst[2]);
283 out[3] = VMAXPS(src[3], dst[3]);
284 break;
285
286 default:
287 SWR_ASSERT(false, "Unsupported blend operation: %d", blendOp);
288 out[0] = out[1] = out[2] = out[3] = VIMMED1(0.0f);
289 break;
290 }
291
292 if (Color)
293 {
294 result[0] = out[0];
295 result[1] = out[1];
296 result[2] = out[2];
297 }
298
299 if (Alpha)
300 {
301 result[3] = out[3];
302 }
303 }
304
305 void LogicOpFunc(SWR_LOGIC_OP logicOp, Value* src[4], Value* dst[4], Value* result[4])
306 {
307 // Op: (s == PS output, d = RT contents)
308 switch(logicOp)
309 {
310 case LOGICOP_CLEAR:
311 result[0] = VIMMED1(0);
312 result[1] = VIMMED1(0);
313 result[2] = VIMMED1(0);
314 result[3] = VIMMED1(0);
315 break;
316
317 case LOGICOP_NOR:
318 // ~(s | d)
319 result[0] = XOR(OR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
320 result[1] = XOR(OR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
321 result[2] = XOR(OR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
322 result[3] = XOR(OR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
323 break;
324
325 case LOGICOP_AND_INVERTED:
326 // ~s & d
327 // todo: use avx andnot instr when I can find the intrinsic to call
328 result[0] = AND(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
329 result[1] = AND(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
330 result[2] = AND(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
331 result[3] = AND(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
332 break;
333
334 case LOGICOP_COPY_INVERTED:
335 // ~s
336 result[0] = XOR(src[0], VIMMED1(0xFFFFFFFF));
337 result[1] = XOR(src[1], VIMMED1(0xFFFFFFFF));
338 result[2] = XOR(src[2], VIMMED1(0xFFFFFFFF));
339 result[3] = XOR(src[3], VIMMED1(0xFFFFFFFF));
340 break;
341
342 case LOGICOP_AND_REVERSE:
343 // s & ~d
344 // todo: use avx andnot instr when I can find the intrinsic to call
345 result[0] = AND(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
346 result[1] = AND(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
347 result[2] = AND(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
348 result[3] = AND(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
349 break;
350
351 case LOGICOP_INVERT:
352 // ~d
353 result[0] = XOR(dst[0], VIMMED1(0xFFFFFFFF));
354 result[1] = XOR(dst[1], VIMMED1(0xFFFFFFFF));
355 result[2] = XOR(dst[2], VIMMED1(0xFFFFFFFF));
356 result[3] = XOR(dst[3], VIMMED1(0xFFFFFFFF));
357 break;
358
359 case LOGICOP_XOR:
360 // s ^ d
361 result[0] = XOR(src[0], dst[0]);
362 result[1] = XOR(src[1], dst[1]);
363 result[2] = XOR(src[2], dst[2]);
364 result[3] = XOR(src[3], dst[3]);
365 break;
366
367 case LOGICOP_NAND:
368 // ~(s & d)
369 result[0] = XOR(AND(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
370 result[1] = XOR(AND(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
371 result[2] = XOR(AND(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
372 result[3] = XOR(AND(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
373 break;
374
375 case LOGICOP_AND:
376 // s & d
377 result[0] = AND(src[0], dst[0]);
378 result[1] = AND(src[1], dst[1]);
379 result[2] = AND(src[2], dst[2]);
380 result[3] = AND(src[3], dst[3]);
381 break;
382
383 case LOGICOP_EQUIV:
384 // ~(s ^ d)
385 result[0] = XOR(XOR(src[0], dst[0]), VIMMED1(0xFFFFFFFF));
386 result[1] = XOR(XOR(src[1], dst[1]), VIMMED1(0xFFFFFFFF));
387 result[2] = XOR(XOR(src[2], dst[2]), VIMMED1(0xFFFFFFFF));
388 result[3] = XOR(XOR(src[3], dst[3]), VIMMED1(0xFFFFFFFF));
389 break;
390
391 case LOGICOP_NOOP:
392 result[0] = dst[0];
393 result[1] = dst[1];
394 result[2] = dst[2];
395 result[3] = dst[3];
396 break;
397
398 case LOGICOP_OR_INVERTED:
399 // ~s | d
400 result[0] = OR(XOR(src[0], VIMMED1(0xFFFFFFFF)), dst[0]);
401 result[1] = OR(XOR(src[1], VIMMED1(0xFFFFFFFF)), dst[1]);
402 result[2] = OR(XOR(src[2], VIMMED1(0xFFFFFFFF)), dst[2]);
403 result[3] = OR(XOR(src[3], VIMMED1(0xFFFFFFFF)), dst[3]);
404 break;
405
406 case LOGICOP_COPY:
407 result[0] = src[0];
408 result[1] = src[1];
409 result[2] = src[2];
410 result[3] = src[3];
411 break;
412
413 case LOGICOP_OR_REVERSE:
414 // s | ~d
415 result[0] = OR(XOR(dst[0], VIMMED1(0xFFFFFFFF)), src[0]);
416 result[1] = OR(XOR(dst[1], VIMMED1(0xFFFFFFFF)), src[1]);
417 result[2] = OR(XOR(dst[2], VIMMED1(0xFFFFFFFF)), src[2]);
418 result[3] = OR(XOR(dst[3], VIMMED1(0xFFFFFFFF)), src[3]);
419 break;
420
421 case LOGICOP_OR:
422 // s | d
423 result[0] = OR(src[0], dst[0]);
424 result[1] = OR(src[1], dst[1]);
425 result[2] = OR(src[2], dst[2]);
426 result[3] = OR(src[3], dst[3]);
427 break;
428
429 case LOGICOP_SET:
430 result[0] = VIMMED1(0xFFFFFFFF);
431 result[1] = VIMMED1(0xFFFFFFFF);
432 result[2] = VIMMED1(0xFFFFFFFF);
433 result[3] = VIMMED1(0xFFFFFFFF);
434 break;
435
436 default:
437 SWR_ASSERT(false, "Unsupported logic operation: %d", logicOp);
438 result[0] = result[1] = result[2] = result[3] = VIMMED1(0.0f);
439 break;
440 }
441 }
442
443 void AlphaTest(const BLEND_COMPILE_STATE& state, Value* pBlendState, Value* pAlpha, Value* ppMask)
444 {
445 // load uint32_t reference
446 Value* pRef = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_alphaTestReference }));
447
448 Value* pTest = nullptr;
449 if (state.alphaTestFormat == ALPHA_TEST_UNORM8)
450 {
451 // convert float alpha to unorm8
452 Value* pAlphaU8 = FMUL(pAlpha, VIMMED1(256.0f));
453 pAlphaU8 = FP_TO_UI(pAlphaU8, mSimdInt32Ty);
454
455 // compare
456 switch (state.alphaTestFunction)
457 {
458 case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
459 case ZFUNC_NEVER: pTest = VIMMED1(false); break;
460 case ZFUNC_LT: pTest = ICMP_ULT(pAlphaU8, pRef); break;
461 case ZFUNC_EQ: pTest = ICMP_EQ(pAlphaU8, pRef); break;
462 case ZFUNC_LE: pTest = ICMP_ULE(pAlphaU8, pRef); break;
463 case ZFUNC_GT: pTest = ICMP_UGT(pAlphaU8, pRef); break;
464 case ZFUNC_NE: pTest = ICMP_NE(pAlphaU8, pRef); break;
465 case ZFUNC_GE: pTest = ICMP_UGE(pAlphaU8, pRef); break;
466 default:
467 SWR_ASSERT(false, "Invalid alpha test function");
468 break;
469 }
470 }
471 else
472 {
473 // cast ref to float
474 pRef = BITCAST(pRef, mSimdFP32Ty);
475
476 // compare
477 switch (state.alphaTestFunction)
478 {
479 case ZFUNC_ALWAYS: pTest = VIMMED1(true); break;
480 case ZFUNC_NEVER: pTest = VIMMED1(false); break;
481 case ZFUNC_LT: pTest = FCMP_OLT(pAlpha, pRef); break;
482 case ZFUNC_EQ: pTest = FCMP_OEQ(pAlpha, pRef); break;
483 case ZFUNC_LE: pTest = FCMP_OLE(pAlpha, pRef); break;
484 case ZFUNC_GT: pTest = FCMP_OGT(pAlpha, pRef); break;
485 case ZFUNC_NE: pTest = FCMP_ONE(pAlpha, pRef); break;
486 case ZFUNC_GE: pTest = FCMP_OGE(pAlpha, pRef); break;
487 default:
488 SWR_ASSERT(false, "Invalid alpha test function");
489 break;
490 }
491 }
492
493 // load current mask
494 Value* pMask = LOAD(ppMask);
495
496 // convert to int1 mask
497 pMask = MASK(pMask);
498
499 // and with alpha test result
500 pMask = AND(pMask, pTest);
501
502 // convert back to vector mask
503 pMask = VMASK(pMask);
504
505 // store new mask
506 STORE(pMask, ppMask);
507 }
508
509 Function* Create(const BLEND_COMPILE_STATE& state)
510 {
511 static std::size_t jitNum = 0;
512
513 std::stringstream fnName("BlendShader", std::ios_base::in | std::ios_base::out | std::ios_base::ate);
514 fnName << jitNum++;
515
516 // blend function signature
517 //typedef void(*PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, simdvector&, simdvector&, uint32_t, BYTE*, simdvector&, simdscalari*, simdscalari*);
518
519 std::vector<Type*> args{
520 PointerType::get(Gen_SWR_BLEND_STATE(JM()), 0), // SWR_BLEND_STATE*
521 PointerType::get(mSimdFP32Ty, 0), // simdvector& src
522 PointerType::get(mSimdFP32Ty, 0), // simdvector& src1
523 Type::getInt32Ty(JM()->mContext), // sampleNum
524 PointerType::get(mSimdFP32Ty, 0), // uint8_t* pDst
525 PointerType::get(mSimdFP32Ty, 0), // simdvector& result
526 PointerType::get(mSimdInt32Ty, 0), // simdscalari* oMask
527 PointerType::get(mSimdInt32Ty, 0), // simdscalari* pMask
528 };
529
530 FunctionType* fTy = FunctionType::get(IRB()->getVoidTy(), args, false);
531 Function* blendFunc = Function::Create(fTy, GlobalValue::ExternalLinkage, fnName.str(), JM()->mpCurrentModule);
532
533 BasicBlock* entry = BasicBlock::Create(JM()->mContext, "entry", blendFunc);
534
535 IRB()->SetInsertPoint(entry);
536
537 // arguments
538 auto argitr = blendFunc->getArgumentList().begin();
539 Value* pBlendState = &*argitr++;
540 pBlendState->setName("pBlendState");
541 Value* pSrc = &*argitr++;
542 pSrc->setName("src");
543 Value* pSrc1 = &*argitr++;
544 pSrc1->setName("src1");
545 Value* sampleNum = &*argitr++;
546 sampleNum->setName("sampleNum");
547 Value* pDst = &*argitr++;
548 pDst->setName("pDst");
549 Value* pResult = &*argitr++;
550 pResult->setName("result");
551 Value* ppoMask = &*argitr++;
552 ppoMask->setName("ppoMask");
553 Value* ppMask = &*argitr++;
554 ppMask->setName("pMask");
555
556 static_assert(KNOB_COLOR_HOT_TILE_FORMAT == R32G32B32A32_FLOAT, "Unsupported hot tile format");
557 Value* dst[4];
558 Value* constantColor[4];
559 Value* src[4];
560 Value* src1[4];
561 Value* result[4];
562 for (uint32_t i = 0; i < 4; ++i)
563 {
564 // load hot tile
565 dst[i] = LOAD(pDst, { i });
566
567 // load constant color
568 constantColor[i] = VBROADCAST(LOAD(pBlendState, { 0, SWR_BLEND_STATE_constantColor, i }));
569
570 // load src
571 src[i] = LOAD(pSrc, { i });
572
573 // load src1
574 src1[i] = LOAD(pSrc1, { i });
575 }
576 Value* currentMask = VIMMED1(-1);
577 if (state.desc.alphaToCoverageEnable)
578 {
579 Value* pClampedSrc = FCLAMP(src[3], 0.0f, 1.0f);
580 uint32_t bits = (1 << state.desc.numSamples) - 1;
581 currentMask = FMUL(pClampedSrc, VBROADCAST(C((float)bits)));
582 currentMask = FP_TO_SI(FADD(currentMask, VIMMED1(0.5f)), mSimdInt32Ty);
583 }
584
585 // alpha test
586 if (state.desc.alphaTestEnable)
587 {
588 AlphaTest(state, pBlendState, src[3], ppMask);
589 }
590
591 // color blend
592 if (state.blendState.blendEnable)
593 {
594 // clamp sources
595 Clamp(state.format, src);
596 Clamp(state.format, src1);
597 Clamp(state.format, dst);
598 Clamp(state.format, constantColor);
599
600 // apply defaults to hottile contents to take into account missing components
601 ApplyDefaults(state.format, dst);
602
603 // Force defaults for unused 'X' components
604 ApplyUnusedDefaults(state.format, dst);
605
606 // Quantize low precision components
607 Quantize(state.format, dst);
608
609 // special case clamping for R11G11B10_float which has no sign bit
610 if (state.format == R11G11B10_FLOAT)
611 {
612 dst[0] = VMAXPS(dst[0], VIMMED1(0.0f));
613 dst[1] = VMAXPS(dst[1], VIMMED1(0.0f));
614 dst[2] = VMAXPS(dst[2], VIMMED1(0.0f));
615 dst[3] = VMAXPS(dst[3], VIMMED1(0.0f));
616 }
617
618 Value* srcFactor[4];
619 Value* dstFactor[4];
620 if (state.desc.independentAlphaBlendEnable)
621 {
622 GenerateBlendFactor<true, false>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
623 GenerateBlendFactor<false, true>(state.blendState.sourceAlphaBlendFactor, constantColor, src, src1, dst, srcFactor);
624
625 GenerateBlendFactor<true, false>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
626 GenerateBlendFactor<false, true>(state.blendState.destAlphaBlendFactor, constantColor, src, src1, dst, dstFactor);
627
628 BlendFunc<true, false>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
629 BlendFunc<false, true>(state.blendState.alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
630 }
631 else
632 {
633 GenerateBlendFactor<true, true>(state.blendState.sourceBlendFactor, constantColor, src, src1, dst, srcFactor);
634 GenerateBlendFactor<true, true>(state.blendState.destBlendFactor, constantColor, src, src1, dst, dstFactor);
635
636 BlendFunc<true, true>(state.blendState.colorBlendFunc, src, srcFactor, dst, dstFactor, result);
637 }
638
639 // store results out
640 for (uint32_t i = 0; i < 4; ++i)
641 {
642 STORE(result[i], pResult, { i });
643 }
644 }
645
646 if(state.blendState.logicOpEnable)
647 {
648 const SWR_FORMAT_INFO& info = GetFormatInfo(state.format);
649 SWR_ASSERT(info.type[0] == SWR_TYPE_UINT);
650 Value* vMask[4];
651 for(uint32_t i = 0; i < 4; i++)
652 {
653 switch(info.bpc[i])
654 {
655 case 0: vMask[i] = VIMMED1(0x00000000); break;
656 case 2: vMask[i] = VIMMED1(0x00000003); break;
657 case 5: vMask[i] = VIMMED1(0x0000001F); break;
658 case 6: vMask[i] = VIMMED1(0x0000003F); break;
659 case 8: vMask[i] = VIMMED1(0x000000FF); break;
660 case 10: vMask[i] = VIMMED1(0x000003FF); break;
661 case 11: vMask[i] = VIMMED1(0x000007FF); break;
662 case 16: vMask[i] = VIMMED1(0x0000FFFF); break;
663 case 24: vMask[i] = VIMMED1(0x00FFFFFF); break;
664 case 32: vMask[i] = VIMMED1(0xFFFFFFFF); break;
665 default:
666 vMask[i] = VIMMED1(0x0);
667 SWR_ASSERT(0, "Unsupported bpc for logic op\n");
668 break;
669 }
670 src[i] = BITCAST(src[i], mSimdInt32Ty);//, vMask[i]);
671 dst[i] = BITCAST(dst[i], mSimdInt32Ty);
672 }
673
674 LogicOpFunc(state.blendState.logicOpFunc, src, dst, result);
675
676 // store results out
677 for(uint32_t i = 0; i < 4; ++i)
678 {
679 // clear upper bits from PS output not in RT format after doing logic op
680 result[i] = AND(result[i], vMask[i]);
681
682 STORE(BITCAST(result[i], mSimdFP32Ty), pResult, {i});
683 }
684 }
685
686 if(state.desc.oMaskEnable)
687 {
688 assert(!(state.desc.alphaToCoverageEnable));
689 // load current mask
690 Value* oMask = LOAD(ppoMask);
691 Value* sampleMasked = VBROADCAST(SHL(C(1), sampleNum));
692 oMask = AND(oMask, sampleMasked);
693 currentMask = AND(oMask, currentMask);
694 }
695
696 if(state.desc.sampleMaskEnable)
697 {
698 Value* sampleMask = LOAD(pBlendState, { 0, SWR_BLEND_STATE_sampleMask});
699 Value* sampleMasked = SHL(C(1), sampleNum);
700 sampleMask = AND(sampleMask, sampleMasked);
701 sampleMask = VBROADCAST(ICMP_SGT(sampleMask, C(0)));
702 sampleMask = S_EXT(sampleMask, mSimdInt32Ty);
703 currentMask = AND(sampleMask, currentMask);
704 }
705
706 if (state.desc.alphaToCoverageEnable)
707 {
708 Value* sampleMasked = SHL(C(1), sampleNum);
709 currentMask = AND(currentMask, VBROADCAST(sampleMasked));
710 }
711
712 if(state.desc.sampleMaskEnable || state.desc.alphaToCoverageEnable ||
713 state.desc.oMaskEnable)
714 {
715 // load current mask
716 Value* pMask = LOAD(ppMask);
717 currentMask = S_EXT(ICMP_SGT(currentMask, VBROADCAST(C(0))), mSimdInt32Ty);
718 Value* outputMask = AND(pMask, currentMask);
719 // store new mask
720 STORE(outputMask, GEP(ppMask, C(0)));
721 }
722
723 RET_VOID();
724
725 JitManager::DumpToFile(blendFunc, "");
726
727 ::FunctionPassManager passes(JM()->mpCurrentModule);
728
729 passes.add(createBreakCriticalEdgesPass());
730 passes.add(createCFGSimplificationPass());
731 passes.add(createEarlyCSEPass());
732 passes.add(createPromoteMemoryToRegisterPass());
733 passes.add(createCFGSimplificationPass());
734 passes.add(createEarlyCSEPass());
735 passes.add(createInstructionCombiningPass());
736 passes.add(createInstructionSimplifierPass());
737 passes.add(createConstantPropagationPass());
738 passes.add(createSCCPPass());
739 passes.add(createAggressiveDCEPass());
740
741 passes.run(*blendFunc);
742
743 JitManager::DumpToFile(blendFunc, "optimized");
744
745 return blendFunc;
746 }
747 };
748
749 //////////////////////////////////////////////////////////////////////////
750 /// @brief JITs from fetch shader IR
751 /// @param hJitMgr - JitManager handle
752 /// @param func - LLVM function IR
753 /// @return PFN_FETCH_FUNC - pointer to fetch code
754 PFN_BLEND_JIT_FUNC JitBlendFunc(HANDLE hJitMgr, const HANDLE hFunc)
755 {
756 const llvm::Function *func = (const llvm::Function*)hFunc;
757 JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
758 PFN_BLEND_JIT_FUNC pfnBlend;
759 pfnBlend = (PFN_BLEND_JIT_FUNC)(pJitMgr->mpExec->getFunctionAddress(func->getName().str()));
760 // MCJIT finalizes modules the first time you JIT code from them. After finalized, you cannot add new IR to the module
761 pJitMgr->mIsModuleFinalized = true;
762
763 return pfnBlend;
764 }
765
766 //////////////////////////////////////////////////////////////////////////
767 /// @brief JIT compiles blend shader
768 /// @param hJitMgr - JitManager handle
769 /// @param state - blend state to build function from
770 extern "C" PFN_BLEND_JIT_FUNC JITCALL JitCompileBlend(HANDLE hJitMgr, const BLEND_COMPILE_STATE& state)
771 {
772 JitManager* pJitMgr = reinterpret_cast<JitManager*>(hJitMgr);
773
774 pJitMgr->SetupNewModule();
775
776 BlendJit theJit(pJitMgr);
777 HANDLE hFunc = theJit.Create(state);
778
779 return JitBlendFunc(hJitMgr, hFunc);
780 }