1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
27 #include "JitManager.h"
28 #include "llvm-c/Core.h"
29 #include "llvm/Support/CBindingWrapping.h"
30 #pragma pop_macro("DEBUG")
33 #include "gen_state_llvm.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "util/u_format.h"
38 #include "util/u_prim.h"
39 #include "gallivm/lp_bld_init.h"
40 #include "gallivm/lp_bld_flow.h"
41 #include "gallivm/lp_bld_struct.h"
42 #include "gallivm/lp_bld_tgsi.h"
44 #include "swr_context.h"
45 #include "gen_swr_context_llvm.h"
46 #include "swr_resource.h"
47 #include "swr_state.h"
48 #include "swr_screen.h"
50 #if HAVE_LLVM < 0x0500
52 typedef AttributeSet AttributeList
;
56 using namespace SwrJit
;
60 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
62 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
64 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
67 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
69 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
72 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
74 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
77 bool operator==(const swr_jit_gs_key
&lhs
, const swr_jit_gs_key
&rhs
)
79 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
83 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
84 struct swr_context
*ctx
,
85 enum pipe_shader_type shader_type
,
86 struct swr_jit_sampler_key
&key
)
88 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
90 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
91 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
92 lp_sampler_static_sampler_state(
93 &key
.sampler
[i
].sampler_state
,
94 ctx
->samplers
[shader_type
][i
]);
99 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
100 * are dx10-style? Can't really have mixed opcodes, at least not
101 * if we want to skip the holes here (without rescanning tgsi).
103 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
104 key
.nr_sampler_views
=
105 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
106 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
107 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1 << i
)) {
108 const struct pipe_sampler_view
*view
=
109 ctx
->sampler_views
[shader_type
][i
];
110 lp_sampler_static_texture_state(
111 &key
.sampler
[i
].texture_state
, view
);
113 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
114 const struct util_format_description
*desc
=
115 util_format_description(view
->format
);
116 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
117 !util_format_has_depth(desc
))
118 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
123 key
.nr_sampler_views
= key
.nr_samplers
;
124 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
125 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
126 const struct pipe_sampler_view
*view
=
127 ctx
->sampler_views
[shader_type
][i
];
128 lp_sampler_static_texture_state(
129 &key
.sampler
[i
].texture_state
, view
);
131 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
132 const struct util_format_description
*desc
=
133 util_format_description(view
->format
);
134 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
135 !util_format_has_depth(desc
))
136 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
144 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
145 struct swr_context
*ctx
,
146 swr_fragment_shader
*swr_fs
)
148 memset(&key
, 0, sizeof(key
));
150 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
151 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
152 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
154 struct tgsi_shader_info
*pPrevShader
;
156 pPrevShader
= &ctx
->gs
->info
.base
;
158 pPrevShader
= &ctx
->vs
->info
.base
;
160 memcpy(&key
.vs_output_semantic_name
,
161 &pPrevShader
->output_semantic_name
,
162 sizeof(key
.vs_output_semantic_name
));
163 memcpy(&key
.vs_output_semantic_idx
,
164 &pPrevShader
->output_semantic_index
,
165 sizeof(key
.vs_output_semantic_idx
));
167 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
169 key
.poly_stipple_enable
= ctx
->rasterizer
->poly_stipple_enable
&&
170 ctx
->poly_stipple
.prim_is_poly
;
174 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
175 struct swr_context
*ctx
,
176 swr_vertex_shader
*swr_vs
)
178 memset(&key
, 0, sizeof(key
));
180 key
.clip_plane_mask
=
181 swr_vs
->info
.base
.clipdist_writemask
?
182 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
183 ctx
->rasterizer
->clip_plane_enable
;
185 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
189 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
190 struct swr_vertex_element_state
*velems
)
192 memset(&key
, 0, sizeof(key
));
194 key
.fsState
= velems
->fsState
;
198 swr_generate_gs_key(struct swr_jit_gs_key
&key
,
199 struct swr_context
*ctx
,
200 swr_geometry_shader
*swr_gs
)
202 memset(&key
, 0, sizeof(key
));
204 struct tgsi_shader_info
*pPrevShader
= &ctx
->vs
->info
.base
;
206 memcpy(&key
.vs_output_semantic_name
,
207 &pPrevShader
->output_semantic_name
,
208 sizeof(key
.vs_output_semantic_name
));
209 memcpy(&key
.vs_output_semantic_idx
,
210 &pPrevShader
->output_semantic_index
,
211 sizeof(key
.vs_output_semantic_idx
));
213 swr_generate_sampler_key(swr_gs
->info
, ctx
, PIPE_SHADER_GEOMETRY
, key
);
216 struct BuilderSWR
: public Builder
{
217 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
220 pJitMgr
->SetupNewModule();
221 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
));
222 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
226 gallivm_free_ir(gallivm
);
229 void WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
,
230 unsigned slot
, unsigned channel
);
232 struct gallivm_state
*gallivm
;
233 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
234 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
235 PFN_GS_FUNC
CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
);
238 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface
*gs_iface
,
239 struct lp_build_tgsi_context
* bld_base
,
240 boolean is_vindex_indirect
,
241 LLVMValueRef vertex_index
,
242 boolean is_aindex_indirect
,
243 LLVMValueRef attrib_index
,
244 LLVMValueRef swizzle_index
);
246 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface
*gs_base
,
247 struct lp_build_tgsi_context
* bld_base
,
248 LLVMValueRef (*outputs
)[4],
249 LLVMValueRef emitted_vertices_vec
);
252 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface
*gs_base
,
253 struct lp_build_tgsi_context
* bld_base
,
254 LLVMValueRef verts_per_prim_vec
,
255 LLVMValueRef emitted_prims_vec
);
258 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface
*gs_base
,
259 struct lp_build_tgsi_context
* bld_base
,
260 LLVMValueRef total_emitted_vertices_vec
,
261 LLVMValueRef emitted_prims_vec
);
265 struct swr_gs_llvm_iface
{
266 struct lp_build_tgsi_gs_iface base
;
267 struct tgsi_shader_info
*info
;
269 BuilderSWR
*pBuilder
;
272 SWR_GS_STATE
*pGsState
;
273 uint32_t num_outputs
;
274 uint32_t num_verts_per_prim
;
276 Value
*pVtxAttribMap
;
279 // trampoline functions so we can use the builder llvm construction methods
281 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface
*gs_iface
,
282 struct lp_build_tgsi_context
* bld_base
,
283 boolean is_vindex_indirect
,
284 LLVMValueRef vertex_index
,
285 boolean is_aindex_indirect
,
286 LLVMValueRef attrib_index
,
287 LLVMValueRef swizzle_index
)
289 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
291 return iface
->pBuilder
->swr_gs_llvm_fetch_input(gs_iface
, bld_base
,
300 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface
*gs_base
,
301 struct lp_build_tgsi_context
* bld_base
,
302 LLVMValueRef (*outputs
)[4],
303 LLVMValueRef emitted_vertices_vec
)
305 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
307 iface
->pBuilder
->swr_gs_llvm_emit_vertex(gs_base
, bld_base
,
309 emitted_vertices_vec
);
313 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface
*gs_base
,
314 struct lp_build_tgsi_context
* bld_base
,
315 LLVMValueRef verts_per_prim_vec
,
316 LLVMValueRef emitted_prims_vec
)
318 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
320 iface
->pBuilder
->swr_gs_llvm_end_primitive(gs_base
, bld_base
,
326 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface
*gs_base
,
327 struct lp_build_tgsi_context
* bld_base
,
328 LLVMValueRef total_emitted_vertices_vec
,
329 LLVMValueRef emitted_prims_vec
)
331 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
333 iface
->pBuilder
->swr_gs_llvm_epilogue(gs_base
, bld_base
,
334 total_emitted_vertices_vec
,
339 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface
*gs_iface
,
340 struct lp_build_tgsi_context
* bld_base
,
341 boolean is_vindex_indirect
,
342 LLVMValueRef vertex_index
,
343 boolean is_aindex_indirect
,
344 LLVMValueRef attrib_index
,
345 LLVMValueRef swizzle_index
)
347 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
349 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
351 assert(is_vindex_indirect
== false && is_aindex_indirect
== false);
354 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), unwrap(attrib_index
)}));
357 LOAD(GEP(iface
->pGsCtx
,
359 C(SWR_GS_CONTEXT_vert
),
360 unwrap(vertex_index
),
363 unwrap(swizzle_index
)}));
369 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface
*gs_base
,
370 struct lp_build_tgsi_context
* bld_base
,
371 LLVMValueRef (*outputs
)[4],
372 LLVMValueRef emitted_vertices_vec
)
374 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
375 SWR_GS_STATE
*pGS
= iface
->pGsState
;
377 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
379 const uint32_t simdVertexStride
= sizeof(simdvertex
);
380 const uint32_t numSimdBatches
= (pGS
->maxNumVerts
+ 7) / 8;
381 const uint32_t inputPrimStride
= numSimdBatches
* simdVertexStride
;
383 Value
*pStream
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_pStream
});
384 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
385 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, 8));
387 Value
*vOffsets
= C({
395 inputPrimStride
* 7 } );
397 Value
*vVertexSlot
= ASHR(unwrap(emitted_vertices_vec
), 3);
398 Value
*vSimdSlot
= AND(unwrap(emitted_vertices_vec
), 7);
400 for (uint32_t attrib
= 0; attrib
< iface
->num_outputs
; ++attrib
) {
401 uint32_t attribSlot
= attrib
;
402 if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
)
403 attribSlot
= VERTEX_POINT_SIZE_SLOT
;
404 else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PRIMID
)
405 attribSlot
= VERTEX_PRIMID_SLOT
;
406 else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_LAYER
)
407 attribSlot
= VERTEX_RTAI_SLOT
;
409 Value
*vOffsetsAttrib
=
410 ADD(vOffsets
, MUL(vVertexSlot
, VIMMED1((uint32_t)sizeof(simdvertex
))));
412 ADD(vOffsetsAttrib
, VIMMED1((uint32_t)(attribSlot
*sizeof(simdvector
))));
414 ADD(vOffsetsAttrib
, MUL(vSimdSlot
, VIMMED1((uint32_t)sizeof(float))));
416 for (uint32_t channel
= 0; channel
< 4; ++channel
) {
417 Value
*vData
= LOAD(unwrap(outputs
[attrib
][channel
]));
418 Value
*vPtrs
= GEP(pStream
, vOffsetsAttrib
);
420 vPtrs
= BITCAST(vPtrs
,
421 VectorType::get(PointerType::get(mFP32Ty
, 0), 8));
423 MASKED_SCATTER(vData
, vPtrs
, 32, vMask1
);
426 ADD(vOffsetsAttrib
, VIMMED1((uint32_t)sizeof(simdscalar
)));
432 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface
*gs_base
,
433 struct lp_build_tgsi_context
* bld_base
,
434 LLVMValueRef verts_per_prim_vec
,
435 LLVMValueRef emitted_prims_vec
)
437 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
438 SWR_GS_STATE
*pGS
= iface
->pGsState
;
440 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
443 LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pCutOrStreamIdBuffer
});
444 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
445 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, 8));
447 uint32_t vertsPerPrim
= iface
->num_verts_per_prim
;
450 ADD(MUL(unwrap(emitted_prims_vec
), VIMMED1(vertsPerPrim
)),
451 unwrap(verts_per_prim_vec
));
453 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
454 vCount
= LOAD(unwrap(bld
->total_emitted_vertices_vec_ptr
));
456 struct lp_exec_mask
*exec_mask
= &bld
->exec_mask
;
457 Value
*mask
= unwrap(lp_build_mask_value(bld
->mask
));
458 if (exec_mask
->has_mask
)
459 mask
= AND(mask
, unwrap(exec_mask
->exec_mask
));
461 Value
*cmpMask
= VMASK(ICMP_NE(unwrap(verts_per_prim_vec
), VIMMED1(0)));
462 mask
= AND(mask
, cmpMask
);
463 vMask1
= TRUNC(mask
, VectorType::get(mInt1Ty
, 8));
465 const uint32_t cutPrimStride
=
466 (pGS
->maxNumVerts
+ JM()->mVWidth
- 1) / JM()->mVWidth
;
467 Value
*vOffsets
= C({
468 (uint32_t)(cutPrimStride
* 0),
469 (uint32_t)(cutPrimStride
* 1),
470 (uint32_t)(cutPrimStride
* 2),
471 (uint32_t)(cutPrimStride
* 3),
472 (uint32_t)(cutPrimStride
* 4),
473 (uint32_t)(cutPrimStride
* 5),
474 (uint32_t)(cutPrimStride
* 6),
475 (uint32_t)(cutPrimStride
* 7) } );
477 vCount
= SUB(vCount
, VIMMED1(1));
478 Value
*vOffset
= ADD(UDIV(vCount
, VIMMED1(8)), vOffsets
);
479 Value
*vValue
= SHL(VIMMED1(1), UREM(vCount
, VIMMED1(8)));
481 vValue
= TRUNC(vValue
, VectorType::get(mInt8Ty
, 8));
483 Value
*vPtrs
= GEP(pCutBuffer
, vOffset
);
485 BITCAST(vPtrs
, VectorType::get(PointerType::get(mInt8Ty
, 0), JM()->mVWidth
));
487 Value
*vGather
= MASKED_GATHER(vPtrs
, 32, vMask1
);
488 vValue
= OR(vGather
, vValue
);
489 MASKED_SCATTER(vValue
, vPtrs
, 32, vMask1
);
493 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface
*gs_base
,
494 struct lp_build_tgsi_context
* bld_base
,
495 LLVMValueRef total_emitted_vertices_vec
,
496 LLVMValueRef emitted_prims_vec
)
498 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
500 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
502 STORE(unwrap(total_emitted_vertices_vec
), iface
->pGsCtx
, {0, SWR_GS_CONTEXT_vertexCount
});
506 BuilderSWR::CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
508 SWR_GS_STATE
*pGS
= &ctx
->gs
->gsState
;
509 struct tgsi_shader_info
*info
= &ctx
->gs
->info
.base
;
511 pGS
->gsEnable
= true;
513 pGS
->numInputAttribs
= info
->num_inputs
;
514 pGS
->outputTopology
=
515 swr_convert_prim_topology(info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
516 pGS
->maxNumVerts
= info
->properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
];
517 pGS
->instanceCount
= info
->properties
[TGSI_PROPERTY_GS_INVOCATIONS
];
519 pGS
->emitsRenderTargetArrayIndex
= info
->writes_layer
;
520 pGS
->emitsPrimitiveID
= info
->writes_primid
;
521 pGS
->emitsViewportArrayIndex
= info
->writes_viewport_index
;
523 // XXX: single stream for now...
524 pGS
->isSingleStream
= true;
525 pGS
->singleStreamID
= 0;
527 struct swr_geometry_shader
*gs
= ctx
->gs
;
529 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
530 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
532 memset(outputs
, 0, sizeof(outputs
));
534 AttrBuilder attrBuilder
;
535 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
536 AttributeList attrSet
= AttributeList::get(
537 JM()->mContext
, AttributeList::FunctionIndex
, attrBuilder
);
539 std::vector
<Type
*> gsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
540 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
541 FunctionType
*vsFuncType
=
542 FunctionType::get(Type::getVoidTy(JM()->mContext
), gsArgs
, false);
544 // create new vertex shader function
545 auto pFunction
= Function::Create(vsFuncType
,
546 GlobalValue::ExternalLinkage
,
548 JM()->mpCurrentModule
);
549 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrSet
);
551 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
552 IRB()->SetInsertPoint(block
);
553 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
555 auto argitr
= pFunction
->arg_begin();
556 Value
*hPrivateData
= &*argitr
++;
557 hPrivateData
->setName("hPrivateData");
558 Value
*pGsCtx
= &*argitr
++;
559 pGsCtx
->setName("gsCtx");
562 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantGS
)});
563 consts_ptr
->setName("gs_constants");
564 Value
*const_sizes_ptr
=
565 GEP(hPrivateData
, {0, swr_draw_context_num_constantsGS
});
566 const_sizes_ptr
->setName("num_gs_constants");
568 struct lp_build_sampler_soa
*sampler
=
569 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_GEOMETRY
);
571 struct lp_bld_tgsi_system_values system_values
;
572 memset(&system_values
, 0, sizeof(system_values
));
573 system_values
.prim_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_PrimitiveID
}));
574 system_values
.instance_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_InstanceID
}));
576 std::vector
<Constant
*> mapConstants
;
577 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
578 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
579 ubyte semantic_name
= info
->input_semantic_name
[slot
];
580 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
583 locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
) + 1;
585 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
586 mapConstants
.push_back(C(vs_slot
));
589 struct lp_build_mask_context mask
;
590 Value
*mask_val
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_mask
}, "gsMask");
591 lp_build_mask_begin(&mask
, gallivm
,
592 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
594 // zero out cut buffer so we can load/modify/store bits
595 MEMSET(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_pCutOrStreamIdBuffer
}),
597 pGS
->instanceCount
* ((pGS
->maxNumVerts
+ 7) / 8) * JM()->mVWidth
,
598 sizeof(float) * KNOB_SIMD_WIDTH
);
600 struct swr_gs_llvm_iface gs_iface
;
601 gs_iface
.base
.fetch_input
= ::swr_gs_llvm_fetch_input
;
602 gs_iface
.base
.emit_vertex
= ::swr_gs_llvm_emit_vertex
;
603 gs_iface
.base
.end_primitive
= ::swr_gs_llvm_end_primitive
;
604 gs_iface
.base
.gs_epilogue
= ::swr_gs_llvm_epilogue
;
605 gs_iface
.pBuilder
= this;
606 gs_iface
.pGsCtx
= pGsCtx
;
607 gs_iface
.pGsState
= pGS
;
608 gs_iface
.num_outputs
= gs
->info
.base
.num_outputs
;
609 gs_iface
.num_verts_per_prim
=
610 u_vertices_per_prim((pipe_prim_type
)info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
611 gs_iface
.info
= info
;
612 gs_iface
.pVtxAttribMap
= vtxAttribMap
;
614 lp_build_tgsi_soa(gallivm
,
616 lp_type_float_vec(32, 32 * 8),
619 wrap(const_sizes_ptr
),
623 wrap(hPrivateData
), // (sampler context)
629 lp_build_mask_end(&mask
);
631 sampler
->destroy(sampler
);
633 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
637 gallivm_verify_function(gallivm
, wrap(pFunction
));
638 gallivm_compile_module(gallivm
);
641 (PFN_GS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
643 debug_printf("geom shader %p\n", pFunc
);
644 assert(pFunc
&& "Error: GeomShader = NULL");
646 JM()->mIsModuleFinalized
= true;
652 swr_compile_gs(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
655 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
657 PFN_GS_FUNC func
= builder
.CompileGS(ctx
, key
);
659 ctx
->gs
->map
.insert(std::make_pair(key
, make_unique
<VariantGS
>(builder
.gallivm
, func
)));
664 BuilderSWR::WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
, unsigned slot
, unsigned channel
)
666 #if USE_SIMD16_FRONTEND
667 // interleave the simdvertex components into the dest simd16vertex
668 // slot16offset = slot8offset * 2
669 // comp16offset = comp8offset * 2 + alternateOffset
671 Value
*offset
= LOAD(pVsContext
, { 0, SWR_VS_CONTEXT_AlternateOffset
});
672 Value
*pOut
= GEP(pVtxOutput
, { C(0), C(0), C(slot
* 2), offset
} );
673 STORE(pVal
, pOut
, {channel
* 2});
675 Value
*pOut
= GEP(pVtxOutput
, {0, 0, slot
});
676 STORE(pVal
, pOut
, {0, channel
});
681 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
683 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
685 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
686 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
688 memset(outputs
, 0, sizeof(outputs
));
690 AttrBuilder attrBuilder
;
691 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
692 AttributeList attrSet
= AttributeList::get(
693 JM()->mContext
, AttributeList::FunctionIndex
, attrBuilder
);
695 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
696 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
697 FunctionType
*vsFuncType
=
698 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
700 // create new vertex shader function
701 auto pFunction
= Function::Create(vsFuncType
,
702 GlobalValue::ExternalLinkage
,
704 JM()->mpCurrentModule
);
705 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrSet
);
707 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
708 IRB()->SetInsertPoint(block
);
709 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
711 auto argitr
= pFunction
->arg_begin();
712 Value
*hPrivateData
= &*argitr
++;
713 hPrivateData
->setName("hPrivateData");
714 Value
*pVsCtx
= &*argitr
++;
715 pVsCtx
->setName("vsCtx");
717 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
719 consts_ptr
->setName("vs_constants");
720 Value
*const_sizes_ptr
=
721 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
722 const_sizes_ptr
->setName("num_vs_constants");
724 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
726 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
727 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
728 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
729 if (mask
& (1 << channel
)) {
730 inputs
[attrib
][channel
] =
731 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
736 struct lp_build_sampler_soa
*sampler
=
737 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
739 struct lp_bld_tgsi_system_values system_values
;
740 memset(&system_values
, 0, sizeof(system_values
));
741 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
742 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
744 lp_build_tgsi_soa(gallivm
,
746 lp_type_float_vec(32, 32 * 8),
749 wrap(const_sizes_ptr
),
753 wrap(hPrivateData
), // (sampler context)
757 NULL
); // geometry shader face
759 sampler
->destroy(sampler
);
761 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
763 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
765 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
766 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
767 if (!outputs
[attrib
][channel
])
770 Value
*val
= LOAD(unwrap(outputs
[attrib
][channel
]));
772 uint32_t outSlot
= attrib
;
773 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
)
774 outSlot
= VERTEX_POINT_SIZE_SLOT
;
775 WriteVS(val
, pVsCtx
, vtxOutput
, outSlot
, channel
);
779 if (ctx
->rasterizer
->clip_plane_enable
||
780 swr_vs
->info
.base
.culldist_writemask
) {
781 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
784 if (swr_vs
->info
.base
.writes_clipvertex
) {
785 cv
= 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
788 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
789 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
790 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
796 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
797 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
798 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
799 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
801 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
802 // clip distance overrides user clip planes
803 if ((swr_vs
->info
.base
.clipdist_writemask
& clip_mask
& (1 << val
)) ||
804 ((swr_vs
->info
.base
.culldist_writemask
<< swr_vs
->info
.base
.num_written_clipdistance
) & (1 << val
))) {
805 unsigned cv
= 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1,
808 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
809 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
811 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
812 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
817 if (!(clip_mask
& (1 << val
)))
820 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
821 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
822 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
823 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
824 Value
*dist
= FADD(FMUL(unwrap(cx
), VBROADCAST(px
)),
825 FADD(FMUL(unwrap(cy
), VBROADCAST(py
)),
826 FADD(FMUL(unwrap(cz
), VBROADCAST(pz
)),
827 FMUL(unwrap(cw
), VBROADCAST(pw
)))));
830 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
832 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
838 gallivm_verify_function(gallivm
, wrap(pFunction
));
839 gallivm_compile_module(gallivm
);
841 // lp_debug_dump_value(func);
843 PFN_VERTEX_FUNC pFunc
=
844 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
846 debug_printf("vert shader %p\n", pFunc
);
847 assert(pFunc
&& "Error: VertShader = NULL");
849 JM()->mIsModuleFinalized
= true;
855 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
857 if (!ctx
->vs
->pipe
.tokens
)
861 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
863 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
865 ctx
->vs
->map
.insert(std::make_pair(key
, make_unique
<VariantVS
>(builder
.gallivm
, func
)));
870 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
872 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
873 if ((info
->output_semantic_name
[i
] == name
)
874 && (info
->output_semantic_index
[i
] == index
)) {
875 return i
- 1; // position is not part of the linkage
883 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
885 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
887 struct tgsi_shader_info
*pPrevShader
;
889 pPrevShader
= &ctx
->gs
->info
.base
;
891 pPrevShader
= &ctx
->vs
->info
.base
;
893 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
894 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
896 memset(inputs
, 0, sizeof(inputs
));
897 memset(outputs
, 0, sizeof(outputs
));
899 struct lp_build_sampler_soa
*sampler
= NULL
;
901 AttrBuilder attrBuilder
;
902 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
903 AttributeList attrSet
= AttributeList::get(
904 JM()->mContext
, AttributeList::FunctionIndex
, attrBuilder
);
906 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
907 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
908 FunctionType
*funcType
=
909 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
911 auto pFunction
= Function::Create(funcType
,
912 GlobalValue::ExternalLinkage
,
914 JM()->mpCurrentModule
);
915 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrSet
);
917 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
918 IRB()->SetInsertPoint(block
);
919 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
921 auto args
= pFunction
->arg_begin();
922 Value
*hPrivateData
= &*args
++;
923 hPrivateData
->setName("hPrivateData");
924 Value
*pPS
= &*args
++;
925 pPS
->setName("psCtx");
927 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
928 consts_ptr
->setName("fs_constants");
929 Value
*const_sizes_ptr
=
930 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
931 const_sizes_ptr
->setName("num_fs_constants");
933 // load *pAttribs, *pPerspAttribs
934 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
935 Value
*pPerspAttribs
=
936 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
938 swr_fs
->constantMask
= 0;
939 swr_fs
->flatConstantMask
= 0;
940 swr_fs
->pointSpriteMask
= 0;
942 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
943 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
944 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
945 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
951 Value
*vi
= nullptr, *vj
= nullptr;
953 case TGSI_INTERPOLATE_LOC_CENTER
:
954 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
955 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
957 case TGSI_INTERPOLATE_LOC_CENTROID
:
958 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
959 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
961 case TGSI_INTERPOLATE_LOC_SAMPLE
:
962 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
963 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
968 Value
*vw
= nullptr, *pAttribs
;
969 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
970 interpMode
== TGSI_INTERPOLATE_COLOR
) {
971 pAttribs
= pPerspAttribs
;
973 case TGSI_INTERPOLATE_LOC_CENTER
:
974 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
976 case TGSI_INTERPOLATE_LOC_CENTROID
:
977 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
979 case TGSI_INTERPOLATE_LOC_SAMPLE
:
980 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
984 pAttribs
= pRawAttribs
;
990 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
991 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
993 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
995 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
996 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
997 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
999 inputs
[attrib
][0] = wrap(ff
);
1000 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1001 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1002 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
1004 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
1005 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
1006 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
1007 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
1008 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
1010 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
1011 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
1013 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
1015 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
1017 } else if (semantic_name
== TGSI_SEMANTIC_PRIMID
) {
1018 Value
*primID
= LOAD(pPS
, {0, SWR_PS_CONTEXT_primID
}, "primID");
1019 inputs
[attrib
][0] = wrap(VECTOR_SPLAT(JM()->mVWidth
, primID
));
1020 inputs
[attrib
][1] = wrap(VIMMED1(0));
1021 inputs
[attrib
][2] = wrap(VIMMED1(0));
1022 inputs
[attrib
][3] = wrap(VIMMED1(0));
1026 unsigned linkedAttrib
=
1027 locate_linkage(semantic_name
, semantic_idx
, pPrevShader
);
1029 if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
1030 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
1031 /* we add an extra attrib to the backendState in swr_update_derived. */
1032 linkedAttrib
= pPrevShader
->num_outputs
- 1;
1033 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
1034 } else if (linkedAttrib
== 0xFFFFFFFF) {
1035 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
1036 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1037 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1038 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
1039 /* If we're reading in color and 2-sided lighting is enabled, we have
1042 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
1045 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1046 swr_fs
->constantMask
|= 1 << linkedAttrib
;
1047 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
1048 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
1052 unsigned bcolorAttrib
= 0xFFFFFFFF;
1053 Value
*offset
= NULL
;
1054 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
1055 bcolorAttrib
= locate_linkage(
1056 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, pPrevShader
);
1057 /* Neither front nor back colors were available. Nothing to load. */
1058 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
1060 /* If there is no front color, just always use the back color. */
1061 if (linkedAttrib
== 0xFFFFFFFF)
1062 linkedAttrib
= bcolorAttrib
;
1064 if (bcolorAttrib
!= 0xFFFFFFFF) {
1065 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1066 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
1067 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
1068 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
1071 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
1075 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
1077 offset
= MUL(back
, C(diff
));
1078 offset
->setName("offset");
1083 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1084 if (mask
& (1 << channel
)) {
1085 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
1086 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
1087 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
1090 indexA
= ADD(indexA
, offset
);
1091 indexB
= ADD(indexB
, offset
);
1092 indexC
= ADD(indexC
, offset
);
1095 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
1096 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
1097 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
1099 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1100 inputs
[attrib
][channel
] = wrap(va
);
1102 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
1106 Value
*interp
= FMUL(va
, vi
);
1107 Value
*interp1
= FMUL(vb
, vj
);
1108 interp
= FADD(interp
, interp1
);
1109 interp
= FADD(interp
, vc
);
1110 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
1111 interpMode
== TGSI_INTERPOLATE_COLOR
)
1112 interp
= FMUL(interp
, vw
);
1113 inputs
[attrib
][channel
] = wrap(interp
);
1119 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
1121 struct lp_bld_tgsi_system_values system_values
;
1122 memset(&system_values
, 0, sizeof(system_values
));
1124 struct lp_build_mask_context mask
;
1125 bool uses_mask
= false;
1127 if (swr_fs
->info
.base
.uses_kill
||
1128 key
.poly_stipple_enable
) {
1129 Value
*vActiveMask
= NULL
;
1130 if (swr_fs
->info
.base
.uses_kill
) {
1131 vActiveMask
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
1133 if (key
.poly_stipple_enable
) {
1134 // first get fragment xy coords and clip to stipple bounds
1135 Value
*vXf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
});
1136 Value
*vYf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
});
1137 Value
*vXu
= FP_TO_UI(vXf
, mSimdInt32Ty
);
1138 Value
*vYu
= FP_TO_UI(vYf
, mSimdInt32Ty
);
1140 // stipple pattern is 32x32, which means that one line of stipple
1141 // is stored in one word:
1142 // vXstipple is bit offset inside 32-bit stipple word
1143 // vYstipple is word index is stipple array
1144 Value
*vXstipple
= AND(vXu
, VIMMED1(0x1f)); // & (32-1)
1145 Value
*vYstipple
= AND(vYu
, VIMMED1(0x1f)); // & (32-1)
1147 // grab stipple pattern base address
1148 Value
*stipplePtr
= GEP(hPrivateData
, {0, swr_draw_context_polyStipple
, 0});
1149 stipplePtr
= BITCAST(stipplePtr
, mInt8PtrTy
);
1151 // peform a gather to grab stipple words for each lane
1152 Value
*vStipple
= GATHERDD(VUNDEF_I(), stipplePtr
, vYstipple
,
1153 VIMMED1(0xffffffff), C((char)4));
1155 // create a mask with one bit corresponding to the x stipple
1156 // and AND it with the pattern, to see if we have a bit
1157 Value
*vBitMask
= LSHR(VIMMED1(0x80000000), vXstipple
);
1158 Value
*vStippleMask
= AND(vStipple
, vBitMask
);
1159 vStippleMask
= ICMP_NE(vStippleMask
, VIMMED1(0));
1160 vStippleMask
= VMASK(vStippleMask
);
1162 if (swr_fs
->info
.base
.uses_kill
) {
1163 vActiveMask
= AND(vActiveMask
, vStippleMask
);
1165 vActiveMask
= vStippleMask
;
1168 lp_build_mask_begin(
1169 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask
));
1173 lp_build_tgsi_soa(gallivm
,
1174 swr_fs
->pipe
.tokens
,
1175 lp_type_float_vec(32, 32 * 8),
1176 uses_mask
? &mask
: NULL
, // mask
1178 wrap(const_sizes_ptr
),
1183 NULL
, // thread data
1186 NULL
); // geometry shader face
1188 sampler
->destroy(sampler
);
1190 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1192 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
1194 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
1195 case TGSI_SEMANTIC_POSITION
: {
1198 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
1199 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
1202 case TGSI_SEMANTIC_COLOR
: {
1203 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1204 if (!outputs
[attrib
][channel
])
1208 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
1209 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
1210 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
1211 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
1214 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
1220 SWR_PS_CONTEXT_shaded
,
1221 swr_fs
->info
.base
.output_semantic_index
[attrib
],
1229 "unknown output from FS %s[%d]\n",
1230 tgsi_semantic_names
[swr_fs
->info
.base
1231 .output_semantic_name
[attrib
]],
1232 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
1238 LLVMValueRef mask_result
= 0;
1240 mask_result
= lp_build_mask_end(&mask
);
1243 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1246 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
1251 gallivm_verify_function(gallivm
, wrap(pFunction
));
1253 gallivm_compile_module(gallivm
);
1255 PFN_PIXEL_KERNEL kernel
=
1256 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1257 debug_printf("frag shader %p\n", kernel
);
1258 assert(kernel
&& "Error: FragShader = NULL");
1260 JM()->mIsModuleFinalized
= true;
1266 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
1268 if (!ctx
->fs
->pipe
.tokens
)
1272 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
1274 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
1276 ctx
->fs
->map
.insert(std::make_pair(key
, make_unique
<VariantFS
>(builder
.gallivm
, func
)));