1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
27 #include "JitManager.h"
28 #include "llvm-c/Core.h"
29 #include "llvm/Support/CBindingWrapping.h"
30 #include "llvm/IR/LegacyPassManager.h"
31 #pragma pop_macro("DEBUG")
34 #include "gen_state_llvm.h"
36 #include "functionpasses/passes.h"
38 #include "tgsi/tgsi_strings.h"
39 #include "util/u_format.h"
40 #include "util/u_prim.h"
41 #include "gallivm/lp_bld_init.h"
42 #include "gallivm/lp_bld_flow.h"
43 #include "gallivm/lp_bld_struct.h"
44 #include "gallivm/lp_bld_tgsi.h"
46 #include "swr_context.h"
47 #include "gen_surf_state_llvm.h"
48 #include "gen_swr_context_llvm.h"
49 #include "swr_resource.h"
50 #include "swr_state.h"
51 #include "swr_screen.h"
53 using namespace SwrJit
;
57 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
59 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
61 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
64 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
66 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
69 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
71 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
74 bool operator==(const swr_jit_gs_key
&lhs
, const swr_jit_gs_key
&rhs
)
76 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
80 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
81 struct swr_context
*ctx
,
82 enum pipe_shader_type shader_type
,
83 struct swr_jit_sampler_key
&key
)
85 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
87 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
88 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
89 lp_sampler_static_sampler_state(
90 &key
.sampler
[i
].sampler_state
,
91 ctx
->samplers
[shader_type
][i
]);
96 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
97 * are dx10-style? Can't really have mixed opcodes, at least not
98 * if we want to skip the holes here (without rescanning tgsi).
100 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
101 key
.nr_sampler_views
=
102 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
103 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
104 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1u << (i
& 31))) {
105 const struct pipe_sampler_view
*view
=
106 ctx
->sampler_views
[shader_type
][i
];
107 lp_sampler_static_texture_state(
108 &key
.sampler
[i
].texture_state
, view
);
110 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
111 const struct util_format_description
*desc
=
112 util_format_description(view
->format
);
113 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
114 !util_format_has_depth(desc
))
115 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
120 key
.nr_sampler_views
= key
.nr_samplers
;
121 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
122 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
123 const struct pipe_sampler_view
*view
=
124 ctx
->sampler_views
[shader_type
][i
];
125 lp_sampler_static_texture_state(
126 &key
.sampler
[i
].texture_state
, view
);
128 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
129 const struct util_format_description
*desc
=
130 util_format_description(view
->format
);
131 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
132 !util_format_has_depth(desc
))
133 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
141 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
142 struct swr_context
*ctx
,
143 swr_fragment_shader
*swr_fs
)
145 memset(&key
, 0, sizeof(key
));
147 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
148 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
149 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
151 struct tgsi_shader_info
*pPrevShader
;
153 pPrevShader
= &ctx
->gs
->info
.base
;
155 pPrevShader
= &ctx
->vs
->info
.base
;
157 memcpy(&key
.vs_output_semantic_name
,
158 &pPrevShader
->output_semantic_name
,
159 sizeof(key
.vs_output_semantic_name
));
160 memcpy(&key
.vs_output_semantic_idx
,
161 &pPrevShader
->output_semantic_index
,
162 sizeof(key
.vs_output_semantic_idx
));
164 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
166 key
.poly_stipple_enable
= ctx
->rasterizer
->poly_stipple_enable
&&
167 ctx
->poly_stipple
.prim_is_poly
;
171 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
172 struct swr_context
*ctx
,
173 swr_vertex_shader
*swr_vs
)
175 memset(&key
, 0, sizeof(key
));
177 key
.clip_plane_mask
=
178 swr_vs
->info
.base
.clipdist_writemask
?
179 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
180 ctx
->rasterizer
->clip_plane_enable
;
182 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
186 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
187 struct swr_vertex_element_state
*velems
)
189 memset(&key
, 0, sizeof(key
));
191 key
.fsState
= velems
->fsState
;
195 swr_generate_gs_key(struct swr_jit_gs_key
&key
,
196 struct swr_context
*ctx
,
197 swr_geometry_shader
*swr_gs
)
199 memset(&key
, 0, sizeof(key
));
201 struct tgsi_shader_info
*pPrevShader
= &ctx
->vs
->info
.base
;
203 memcpy(&key
.vs_output_semantic_name
,
204 &pPrevShader
->output_semantic_name
,
205 sizeof(key
.vs_output_semantic_name
));
206 memcpy(&key
.vs_output_semantic_idx
,
207 &pPrevShader
->output_semantic_index
,
208 sizeof(key
.vs_output_semantic_idx
));
210 swr_generate_sampler_key(swr_gs
->info
, ctx
, PIPE_SHADER_GEOMETRY
, key
);
213 struct BuilderSWR
: public Builder
{
214 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
217 pJitMgr
->SetupNewModule();
218 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
));
219 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
223 gallivm_free_ir(gallivm
);
226 void WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
,
227 unsigned slot
, unsigned channel
);
229 struct gallivm_state
*gallivm
;
230 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
231 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
232 PFN_GS_FUNC
CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
);
235 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface
*gs_iface
,
236 struct lp_build_tgsi_context
* bld_base
,
237 boolean is_vindex_indirect
,
238 LLVMValueRef vertex_index
,
239 boolean is_aindex_indirect
,
240 LLVMValueRef attrib_index
,
241 LLVMValueRef swizzle_index
);
243 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface
*gs_base
,
244 struct lp_build_tgsi_context
* bld_base
,
245 LLVMValueRef (*outputs
)[4],
246 LLVMValueRef emitted_vertices_vec
);
249 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface
*gs_base
,
250 struct lp_build_tgsi_context
* bld_base
,
251 LLVMValueRef verts_per_prim_vec
,
252 LLVMValueRef emitted_prims_vec
);
255 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface
*gs_base
,
256 struct lp_build_tgsi_context
* bld_base
,
257 LLVMValueRef total_emitted_vertices_vec
,
258 LLVMValueRef emitted_prims_vec
);
262 struct swr_gs_llvm_iface
{
263 struct lp_build_tgsi_gs_iface base
;
264 struct tgsi_shader_info
*info
;
266 BuilderSWR
*pBuilder
;
269 SWR_GS_STATE
*pGsState
;
270 uint32_t num_outputs
;
271 uint32_t num_verts_per_prim
;
273 Value
*pVtxAttribMap
;
276 // trampoline functions so we can use the builder llvm construction methods
278 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface
*gs_iface
,
279 struct lp_build_tgsi_context
* bld_base
,
280 boolean is_vindex_indirect
,
281 LLVMValueRef vertex_index
,
282 boolean is_aindex_indirect
,
283 LLVMValueRef attrib_index
,
284 LLVMValueRef swizzle_index
)
286 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
288 return iface
->pBuilder
->swr_gs_llvm_fetch_input(gs_iface
, bld_base
,
297 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface
*gs_base
,
298 struct lp_build_tgsi_context
* bld_base
,
299 LLVMValueRef (*outputs
)[4],
300 LLVMValueRef emitted_vertices_vec
)
302 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
304 iface
->pBuilder
->swr_gs_llvm_emit_vertex(gs_base
, bld_base
,
306 emitted_vertices_vec
);
310 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface
*gs_base
,
311 struct lp_build_tgsi_context
* bld_base
,
312 LLVMValueRef verts_per_prim_vec
,
313 LLVMValueRef emitted_prims_vec
)
315 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
317 iface
->pBuilder
->swr_gs_llvm_end_primitive(gs_base
, bld_base
,
323 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface
*gs_base
,
324 struct lp_build_tgsi_context
* bld_base
,
325 LLVMValueRef total_emitted_vertices_vec
,
326 LLVMValueRef emitted_prims_vec
)
328 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
330 iface
->pBuilder
->swr_gs_llvm_epilogue(gs_base
, bld_base
,
331 total_emitted_vertices_vec
,
336 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface
*gs_iface
,
337 struct lp_build_tgsi_context
* bld_base
,
338 boolean is_vindex_indirect
,
339 LLVMValueRef vertex_index
,
340 boolean is_aindex_indirect
,
341 LLVMValueRef attrib_index
,
342 LLVMValueRef swizzle_index
)
344 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
345 Value
*vert_index
= unwrap(vertex_index
);
346 Value
*attr_index
= unwrap(attrib_index
);
348 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
350 if (is_vindex_indirect
|| is_aindex_indirect
) {
352 Value
*res
= unwrap(bld_base
->base
.zero
);
353 struct lp_type type
= bld_base
->base
.type
;
355 for (i
= 0; i
< type
.length
; i
++) {
356 Value
*vert_chan_index
= vert_index
;
357 Value
*attr_chan_index
= attr_index
;
359 if (is_vindex_indirect
) {
360 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
362 if (is_aindex_indirect
) {
363 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
367 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
369 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
370 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
372 Value
*pVector
= ADD(MUL(vert_chan_index
, pInputVertStride
), attrib
);
373 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
375 Value
*value
= VEXTRACT(pInput
, C(i
));
376 res
= VINSERT(res
, value
, C(i
));
381 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
383 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
384 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
386 Value
*pVector
= ADD(MUL(vert_index
, pInputVertStride
), attrib
);
388 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
394 // GS output stream layout
395 #define VERTEX_COUNT_SIZE 32
396 #define CONTROL_HEADER_SIZE (8*32)
399 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface
*gs_base
,
400 struct lp_build_tgsi_context
* bld_base
,
401 LLVMValueRef (*outputs
)[4],
402 LLVMValueRef emitted_vertices_vec
)
404 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
406 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
408 const uint32_t headerSize
= VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
;
409 const uint32_t attribSize
= 4 * sizeof(float);
410 const uint32_t vertSize
= attribSize
* SWR_VTX_NUM_SLOTS
;
411 Value
*pVertexOffset
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(vertSize
));
413 Value
*vMask
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_mask
});
414 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, mVWidth
));
416 Value
*pStack
= STACKSAVE();
417 Value
*pTmpPtr
= ALLOCA(mFP32Ty
, C(4)); // used for dummy write for lane masking
419 for (uint32_t attrib
= 0; attrib
< iface
->num_outputs
; ++attrib
) {
420 uint32_t attribSlot
= attrib
;
421 uint32_t sgvChannel
= 0;
422 if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
423 attribSlot
= VERTEX_SGV_SLOT
;
424 sgvChannel
= VERTEX_SGV_POINT_SIZE_COMP
;
425 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_LAYER
) {
426 attribSlot
= VERTEX_SGV_SLOT
;
427 sgvChannel
= VERTEX_SGV_RTAI_COMP
;
428 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_VIEWPORT_INDEX
) {
429 attribSlot
= VERTEX_SGV_SLOT
;
430 sgvChannel
= VERTEX_SGV_VAI_COMP
;
431 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
432 attribSlot
= VERTEX_POSITION_SLOT
;
434 attribSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
435 if (iface
->info
->writes_position
) {
440 Value
*pOutputOffset
= ADD(pVertexOffset
, VIMMED1(headerSize
+ attribSize
* attribSlot
)); // + sgvChannel ?
442 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
443 Value
*pLaneOffset
= VEXTRACT(pOutputOffset
, C(lane
));
444 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
445 Value
*pStreamOffset
= GEP(pStream
, pLaneOffset
);
446 pStreamOffset
= BITCAST(pStreamOffset
, mFP32PtrTy
);
448 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
449 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
451 for (uint32_t channel
= 0; channel
< 4; ++channel
) {
454 if (attribSlot
== VERTEX_SGV_SLOT
)
455 vData
= LOAD(unwrap(outputs
[attrib
][0]));
457 vData
= LOAD(unwrap(outputs
[attrib
][channel
]));
459 if (attribSlot
!= VERTEX_SGV_SLOT
||
460 sgvChannel
== channel
) {
461 vData
= VEXTRACT(vData
, C(lane
));
462 STORE(vData
, pStreamOffset
);
464 pStreamOffset
= GEP(pStreamOffset
, C(1));
469 STACKRESTORE(pStack
);
473 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface
*gs_base
,
474 struct lp_build_tgsi_context
* bld_base
,
475 LLVMValueRef verts_per_prim_vec
,
476 LLVMValueRef emitted_prims_vec
)
478 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
480 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
482 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
483 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, 8));
485 uint32_t vertsPerPrim
= iface
->num_verts_per_prim
;
488 ADD(MUL(unwrap(emitted_prims_vec
), VIMMED1(vertsPerPrim
)),
489 unwrap(verts_per_prim_vec
));
491 struct lp_build_tgsi_soa_context
*bld
= lp_soa_context(bld_base
);
492 vCount
= LOAD(unwrap(bld
->total_emitted_vertices_vec_ptr
));
494 struct lp_exec_mask
*exec_mask
= &bld
->exec_mask
;
495 Value
*mask
= unwrap(lp_build_mask_value(bld
->mask
));
496 if (exec_mask
->has_mask
)
497 mask
= AND(mask
, unwrap(exec_mask
->exec_mask
));
499 Value
*cmpMask
= VMASK(ICMP_NE(unwrap(verts_per_prim_vec
), VIMMED1(0)));
500 mask
= AND(mask
, cmpMask
);
501 vMask1
= TRUNC(mask
, VectorType::get(mInt1Ty
, 8));
503 vCount
= SUB(vCount
, VIMMED1(1));
504 Value
*vOffset
= ADD(UDIV(vCount
, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE
));
505 Value
*vValue
= SHL(VIMMED1(1), UREM(vCount
, VIMMED1(8)));
507 vValue
= TRUNC(vValue
, VectorType::get(mInt8Ty
, 8));
509 Value
*pStack
= STACKSAVE();
510 Value
*pTmpPtr
= ALLOCA(mInt8Ty
, C(4)); // used for dummy read/write for lane masking
512 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
513 Value
*vLaneOffset
= VEXTRACT(vOffset
, C(lane
));
514 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
515 Value
*pStreamOffset
= GEP(pStream
, vLaneOffset
);
517 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
518 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
520 Value
*vVal
= LOAD(pStreamOffset
);
521 vVal
= OR(vVal
, VEXTRACT(vValue
, C(lane
)));
522 STORE(vVal
, pStreamOffset
);
525 STACKRESTORE(pStack
);
529 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface
*gs_base
,
530 struct lp_build_tgsi_context
* bld_base
,
531 LLVMValueRef total_emitted_vertices_vec
,
532 LLVMValueRef emitted_prims_vec
)
534 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
536 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
538 // Store emit count to each output stream in the first DWORD
539 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
541 Value
* pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
542 pStream
= BITCAST(pStream
, mInt32PtrTy
);
543 Value
* pLaneCount
= VEXTRACT(unwrap(total_emitted_vertices_vec
), C(lane
));
544 STORE(pLaneCount
, pStream
);
549 BuilderSWR::CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
551 SWR_GS_STATE
*pGS
= &ctx
->gs
->gsState
;
552 struct tgsi_shader_info
*info
= &ctx
->gs
->info
.base
;
554 memset(pGS
, 0, sizeof(*pGS
));
556 pGS
->gsEnable
= true;
558 pGS
->numInputAttribs
= info
->num_inputs
;
559 pGS
->outputTopology
=
560 swr_convert_prim_topology(info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
561 pGS
->maxNumVerts
= info
->properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
];
562 pGS
->instanceCount
= info
->properties
[TGSI_PROPERTY_GS_INVOCATIONS
];
564 // XXX: single stream for now...
565 pGS
->isSingleStream
= true;
566 pGS
->singleStreamID
= 0;
568 pGS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
; // TODO: optimize
569 pGS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
; // TODO: optimize
570 pGS
->inputVertStride
= pGS
->numInputAttribs
+ pGS
->vertexAttribOffset
;
571 pGS
->outputVertexSize
= SWR_VTX_NUM_SLOTS
;
572 pGS
->controlDataSize
= 8; // GS ouputs max of 8 32B units
573 pGS
->controlDataOffset
= VERTEX_COUNT_SIZE
;
574 pGS
->outputVertexOffset
= pGS
->controlDataOffset
+ CONTROL_HEADER_SIZE
;
576 pGS
->allocationSize
=
577 VERTEX_COUNT_SIZE
+ // vertex count
578 CONTROL_HEADER_SIZE
+ // control header
579 (SWR_VTX_NUM_SLOTS
* 16) * // sizeof vertex
580 pGS
->maxNumVerts
; // num verts
582 struct swr_geometry_shader
*gs
= ctx
->gs
;
584 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
585 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
587 memset(outputs
, 0, sizeof(outputs
));
589 AttrBuilder attrBuilder
;
590 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
592 std::vector
<Type
*> gsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
593 PointerType::get(mInt8Ty
, 0),
594 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
595 FunctionType
*vsFuncType
=
596 FunctionType::get(Type::getVoidTy(JM()->mContext
), gsArgs
, false);
598 // create new vertex shader function
599 auto pFunction
= Function::Create(vsFuncType
,
600 GlobalValue::ExternalLinkage
,
602 JM()->mpCurrentModule
);
603 #if HAVE_LLVM < 0x0500
604 AttributeSet attrSet
= AttributeSet::get(
605 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
606 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
608 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
611 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
612 IRB()->SetInsertPoint(block
);
613 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
615 auto argitr
= pFunction
->arg_begin();
616 Value
*hPrivateData
= &*argitr
++;
617 hPrivateData
->setName("hPrivateData");
618 Value
*pWorkerData
= &*argitr
++;
619 pWorkerData
->setName("pWorkerData");
620 Value
*pGsCtx
= &*argitr
++;
621 pGsCtx
->setName("gsCtx");
624 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantGS
)});
625 consts_ptr
->setName("gs_constants");
626 Value
*const_sizes_ptr
=
627 GEP(hPrivateData
, {0, swr_draw_context_num_constantsGS
});
628 const_sizes_ptr
->setName("num_gs_constants");
630 struct lp_build_sampler_soa
*sampler
=
631 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_GEOMETRY
);
633 struct lp_bld_tgsi_system_values system_values
;
634 memset(&system_values
, 0, sizeof(system_values
));
635 system_values
.prim_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_PrimitiveID
}));
636 system_values
.instance_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_InstanceID
}));
638 std::vector
<Constant
*> mapConstants
;
639 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
640 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
641 ubyte semantic_name
= info
->input_semantic_name
[slot
];
642 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
644 unsigned vs_slot
= locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
646 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
648 if (ctx
->vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
651 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
652 vs_slot
= VERTEX_POSITION_SLOT
;
654 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
655 mapConstants
.push_back(C(vs_slot
));
658 struct lp_build_mask_context mask
;
659 Value
*mask_val
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_mask
}, "gsMask");
660 lp_build_mask_begin(&mask
, gallivm
,
661 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
663 // zero out cut buffer so we can load/modify/store bits
664 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
666 Value
* pStream
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
667 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, sizeof(float) * KNOB_SIMD_WIDTH
);
670 struct swr_gs_llvm_iface gs_iface
;
671 gs_iface
.base
.fetch_input
= ::swr_gs_llvm_fetch_input
;
672 gs_iface
.base
.emit_vertex
= ::swr_gs_llvm_emit_vertex
;
673 gs_iface
.base
.end_primitive
= ::swr_gs_llvm_end_primitive
;
674 gs_iface
.base
.gs_epilogue
= ::swr_gs_llvm_epilogue
;
675 gs_iface
.pBuilder
= this;
676 gs_iface
.pGsCtx
= pGsCtx
;
677 gs_iface
.pGsState
= pGS
;
678 gs_iface
.num_outputs
= gs
->info
.base
.num_outputs
;
679 gs_iface
.num_verts_per_prim
=
680 u_vertices_per_prim((pipe_prim_type
)info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
681 gs_iface
.info
= info
;
682 gs_iface
.pVtxAttribMap
= vtxAttribMap
;
684 struct lp_build_tgsi_params params
;
685 memset(¶ms
, 0, sizeof(params
));
686 params
.type
= lp_type_float_vec(32, 32 * 8);
687 params
.mask
= & mask
;
688 params
.consts_ptr
= wrap(consts_ptr
);
689 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
690 params
.system_values
= &system_values
;
691 params
.inputs
= inputs
;
692 params
.context_ptr
= wrap(hPrivateData
);
693 params
.sampler
= sampler
;
694 params
.info
= &gs
->info
.base
;
695 params
.gs_iface
= &gs_iface
.base
;
697 lp_build_tgsi_soa(gallivm
,
702 lp_build_mask_end(&mask
);
704 sampler
->destroy(sampler
);
706 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
710 gallivm_verify_function(gallivm
, wrap(pFunction
));
711 gallivm_compile_module(gallivm
);
714 (PFN_GS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
716 debug_printf("geom shader %p\n", pFunc
);
717 assert(pFunc
&& "Error: GeomShader = NULL");
719 JM()->mIsModuleFinalized
= true;
725 swr_compile_gs(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
728 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
730 PFN_GS_FUNC func
= builder
.CompileGS(ctx
, key
);
732 ctx
->gs
->map
.insert(std::make_pair(key
, make_unique
<VariantGS
>(builder
.gallivm
, func
)));
737 BuilderSWR::WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
, unsigned slot
, unsigned channel
)
739 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
740 // interleave the simdvertex components into the dest simd16vertex
741 // slot16offset = slot8offset * 2
742 // comp16offset = comp8offset * 2 + alternateOffset
744 Value
*offset
= LOAD(pVsContext
, { 0, SWR_VS_CONTEXT_AlternateOffset
});
745 Value
*pOut
= GEP(pVtxOutput
, { C(0), C(0), C(slot
* 2), offset
} );
746 STORE(pVal
, pOut
, {channel
* 2});
748 Value
*pOut
= GEP(pVtxOutput
, {0, 0, slot
});
749 STORE(pVal
, pOut
, {0, channel
});
754 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
756 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
758 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
759 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
761 memset(outputs
, 0, sizeof(outputs
));
763 AttrBuilder attrBuilder
;
764 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
766 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
767 PointerType::get(mInt8Ty
, 0),
768 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
769 FunctionType
*vsFuncType
=
770 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
772 // create new vertex shader function
773 auto pFunction
= Function::Create(vsFuncType
,
774 GlobalValue::ExternalLinkage
,
776 JM()->mpCurrentModule
);
777 #if HAVE_LLVM < 0x0500
778 AttributeSet attrSet
= AttributeSet::get(
779 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
780 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
782 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
785 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
786 IRB()->SetInsertPoint(block
);
787 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
789 auto argitr
= pFunction
->arg_begin();
790 Value
*hPrivateData
= &*argitr
++;
791 hPrivateData
->setName("hPrivateData");
792 Value
*pWorkerData
= &*argitr
++;
793 pWorkerData
->setName("pWorkerData");
794 Value
*pVsCtx
= &*argitr
++;
795 pVsCtx
->setName("vsCtx");
797 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
799 consts_ptr
->setName("vs_constants");
800 Value
*const_sizes_ptr
=
801 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
802 const_sizes_ptr
->setName("num_vs_constants");
804 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
806 vtxInput
= BITCAST(vtxInput
, PointerType::get(Gen_simd16vertex(JM()), 0));
809 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
810 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
811 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
812 if (mask
& (1 << channel
)) {
813 inputs
[attrib
][channel
] =
814 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
819 struct lp_build_sampler_soa
*sampler
=
820 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
822 struct lp_bld_tgsi_system_values system_values
;
823 memset(&system_values
, 0, sizeof(system_values
));
824 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
827 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID16
}));
829 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
833 uint32_t vectorWidth
= mVWidth16
;
835 uint32_t vectorWidth
= mVWidth
;
838 struct lp_build_tgsi_params params
;
839 memset(¶ms
, 0, sizeof(params
));
840 params
.type
= lp_type_float_vec(32, 32 * vectorWidth
);
841 params
.consts_ptr
= wrap(consts_ptr
);
842 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
843 params
.system_values
= &system_values
;
844 params
.inputs
= inputs
;
845 params
.context_ptr
= wrap(hPrivateData
);
846 params
.sampler
= sampler
;
847 params
.info
= &swr_vs
->info
.base
;
849 lp_build_tgsi_soa(gallivm
,
854 sampler
->destroy(sampler
);
856 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
858 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
860 vtxOutput
= BITCAST(vtxOutput
, PointerType::get(Gen_simd16vertex(JM()), 0));
863 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
864 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
865 if (!outputs
[attrib
][channel
])
871 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
872 if (channel
!= VERTEX_SGV_POINT_SIZE_COMP
)
874 val
= LOAD(unwrap(outputs
[attrib
][0]));
875 outSlot
= VERTEX_SGV_SLOT
;
876 } else if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
877 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
878 outSlot
= VERTEX_POSITION_SLOT
;
880 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
881 outSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
882 if (swr_vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
886 WriteVS(val
, pVsCtx
, vtxOutput
, outSlot
, channel
);
890 if (ctx
->rasterizer
->clip_plane_enable
||
891 swr_vs
->info
.base
.culldist_writemask
) {
892 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
895 if (swr_vs
->info
.base
.writes_clipvertex
) {
896 cv
= locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
899 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
900 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
901 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
907 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
908 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
909 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
910 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
912 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
913 // clip distance overrides user clip planes
914 if ((swr_vs
->info
.base
.clipdist_writemask
& clip_mask
& (1 << val
)) ||
915 ((swr_vs
->info
.base
.culldist_writemask
<< swr_vs
->info
.base
.num_written_clipdistance
) & (1 << val
))) {
916 unsigned cv
= locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1,
919 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
920 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
922 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
923 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
928 if (!(clip_mask
& (1 << val
)))
931 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
932 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
933 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
934 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
936 Value
*bpx
= VBROADCAST_16(px
);
937 Value
*bpy
= VBROADCAST_16(py
);
938 Value
*bpz
= VBROADCAST_16(pz
);
939 Value
*bpw
= VBROADCAST_16(pw
);
941 Value
*bpx
= VBROADCAST(px
);
942 Value
*bpy
= VBROADCAST(py
);
943 Value
*bpz
= VBROADCAST(pz
);
944 Value
*bpw
= VBROADCAST(pw
);
946 Value
*dist
= FADD(FMUL(unwrap(cx
), bpx
),
947 FADD(FMUL(unwrap(cy
), bpy
),
948 FADD(FMUL(unwrap(cz
), bpz
),
949 FMUL(unwrap(cw
), bpw
))));
952 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
954 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
960 gallivm_verify_function(gallivm
, wrap(pFunction
));
961 gallivm_compile_module(gallivm
);
963 // lp_debug_dump_value(func);
965 PFN_VERTEX_FUNC pFunc
=
966 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
968 debug_printf("vert shader %p\n", pFunc
);
969 assert(pFunc
&& "Error: VertShader = NULL");
971 JM()->mIsModuleFinalized
= true;
977 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
979 if (!ctx
->vs
->pipe
.tokens
)
983 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
985 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
987 ctx
->vs
->map
.insert(std::make_pair(key
, make_unique
<VariantVS
>(builder
.gallivm
, func
)));
992 swr_so_adjust_attrib(unsigned in_attrib
,
993 swr_vertex_shader
*swr_vs
)
998 attrib
= in_attrib
+ VERTEX_ATTRIB_START_SLOT
;
1001 semantic_name
= swr_vs
->info
.base
.output_semantic_name
[in_attrib
];
1002 if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
1003 attrib
= VERTEX_POSITION_SLOT
;
1004 } else if (semantic_name
== TGSI_SEMANTIC_PSIZE
) {
1005 attrib
= VERTEX_SGV_SLOT
;
1006 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) {
1007 attrib
= VERTEX_SGV_SLOT
;
1009 if (swr_vs
->info
.base
.writes_position
) {
1019 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
1021 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
1022 if ((info
->output_semantic_name
[i
] == name
)
1023 && (info
->output_semantic_index
[i
] == index
)) {
1032 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
1034 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
1036 struct tgsi_shader_info
*pPrevShader
;
1038 pPrevShader
= &ctx
->gs
->info
.base
;
1040 pPrevShader
= &ctx
->vs
->info
.base
;
1042 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1043 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1045 memset(inputs
, 0, sizeof(inputs
));
1046 memset(outputs
, 0, sizeof(outputs
));
1048 struct lp_build_sampler_soa
*sampler
= NULL
;
1050 AttrBuilder attrBuilder
;
1051 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1053 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1054 PointerType::get(mInt8Ty
, 0),
1055 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
1056 FunctionType
*funcType
=
1057 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
1059 auto pFunction
= Function::Create(funcType
,
1060 GlobalValue::ExternalLinkage
,
1062 JM()->mpCurrentModule
);
1063 #if HAVE_LLVM < 0x0500
1064 AttributeSet attrSet
= AttributeSet::get(
1065 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1066 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1068 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1071 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1072 IRB()->SetInsertPoint(block
);
1073 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1075 auto args
= pFunction
->arg_begin();
1076 Value
*hPrivateData
= &*args
++;
1077 hPrivateData
->setName("hPrivateData");
1078 Value
*pWorkerData
= &*args
++;
1079 pWorkerData
->setName("pWorkerData");
1080 Value
*pPS
= &*args
++;
1081 pPS
->setName("psCtx");
1083 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
1084 consts_ptr
->setName("fs_constants");
1085 Value
*const_sizes_ptr
=
1086 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
1087 const_sizes_ptr
->setName("num_fs_constants");
1089 // load *pAttribs, *pPerspAttribs
1090 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
1091 Value
*pPerspAttribs
=
1092 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
1094 swr_fs
->constantMask
= 0;
1095 swr_fs
->flatConstantMask
= 0;
1096 swr_fs
->pointSpriteMask
= 0;
1098 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
1099 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
1100 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
1101 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
1107 Value
*vi
= nullptr, *vj
= nullptr;
1108 switch (interpLoc
) {
1109 case TGSI_INTERPOLATE_LOC_CENTER
:
1110 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
1111 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
1113 case TGSI_INTERPOLATE_LOC_CENTROID
:
1114 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
1115 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
1117 case TGSI_INTERPOLATE_LOC_SAMPLE
:
1118 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
1119 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
1124 Value
*vw
= nullptr, *pAttribs
;
1125 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
1126 interpMode
== TGSI_INTERPOLATE_COLOR
) {
1127 pAttribs
= pPerspAttribs
;
1128 switch (interpLoc
) {
1129 case TGSI_INTERPOLATE_LOC_CENTER
:
1130 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
1132 case TGSI_INTERPOLATE_LOC_CENTROID
:
1133 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
1135 case TGSI_INTERPOLATE_LOC_SAMPLE
:
1136 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
1140 pAttribs
= pRawAttribs
;
1146 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
1147 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
1149 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
1151 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
1152 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
1153 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
1155 inputs
[attrib
][0] = wrap(ff
);
1156 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1157 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1158 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
1160 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
1161 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
1162 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
1163 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
1164 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
1166 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
1167 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
1169 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
1171 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
1175 unsigned linkedAttrib
=
1176 locate_linkage(semantic_name
, semantic_idx
, pPrevShader
) - 1;
1178 uint32_t extraAttribs
= 0;
1179 if (semantic_name
== TGSI_SEMANTIC_PRIMID
&& !ctx
->gs
) {
1180 /* non-gs generated primID - need to grab from swizzleMap override */
1181 linkedAttrib
= pPrevShader
->num_outputs
- 1;
1182 swr_fs
->constantMask
|= 1 << linkedAttrib
;
1184 } else if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
1185 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
1186 /* we add an extra attrib to the backendState in swr_update_derived. */
1187 linkedAttrib
= pPrevShader
->num_outputs
+ extraAttribs
- 1;
1188 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
1190 } else if (linkedAttrib
== 0xFFFFFFFF) {
1191 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
1192 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1193 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1194 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
1195 /* If we're reading in color and 2-sided lighting is enabled, we have
1198 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
1201 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1202 swr_fs
->constantMask
|= 1 << linkedAttrib
;
1203 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
1204 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
1208 unsigned bcolorAttrib
= 0xFFFFFFFF;
1209 Value
*offset
= NULL
;
1210 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
1211 bcolorAttrib
= locate_linkage(
1212 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, pPrevShader
) - 1;
1213 /* Neither front nor back colors were available. Nothing to load. */
1214 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
1216 /* If there is no front color, just always use the back color. */
1217 if (linkedAttrib
== 0xFFFFFFFF)
1218 linkedAttrib
= bcolorAttrib
;
1220 if (bcolorAttrib
!= 0xFFFFFFFF) {
1221 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1222 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
1223 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
1224 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
1227 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
1231 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
1233 offset
= MUL(back
, C(diff
));
1234 offset
->setName("offset");
1239 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1240 if (mask
& (1 << channel
)) {
1241 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
1242 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
1243 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
1246 indexA
= ADD(indexA
, offset
);
1247 indexB
= ADD(indexB
, offset
);
1248 indexC
= ADD(indexC
, offset
);
1251 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
1252 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
1253 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
1255 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1256 inputs
[attrib
][channel
] = wrap(va
);
1258 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
1262 Value
*interp
= FMUL(va
, vi
);
1263 Value
*interp1
= FMUL(vb
, vj
);
1264 interp
= FADD(interp
, interp1
);
1265 interp
= FADD(interp
, vc
);
1266 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
1267 interpMode
== TGSI_INTERPOLATE_COLOR
)
1268 interp
= FMUL(interp
, vw
);
1269 inputs
[attrib
][channel
] = wrap(interp
);
1275 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
1277 struct lp_bld_tgsi_system_values system_values
;
1278 memset(&system_values
, 0, sizeof(system_values
));
1280 struct lp_build_mask_context mask
;
1281 bool uses_mask
= false;
1283 if (swr_fs
->info
.base
.uses_kill
||
1284 key
.poly_stipple_enable
) {
1285 Value
*vActiveMask
= NULL
;
1286 if (swr_fs
->info
.base
.uses_kill
) {
1287 vActiveMask
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
1289 if (key
.poly_stipple_enable
) {
1290 // first get fragment xy coords and clip to stipple bounds
1291 Value
*vXf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
});
1292 Value
*vYf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
});
1293 Value
*vXu
= FP_TO_UI(vXf
, mSimdInt32Ty
);
1294 Value
*vYu
= FP_TO_UI(vYf
, mSimdInt32Ty
);
1296 // stipple pattern is 32x32, which means that one line of stipple
1297 // is stored in one word:
1298 // vXstipple is bit offset inside 32-bit stipple word
1299 // vYstipple is word index is stipple array
1300 Value
*vXstipple
= AND(vXu
, VIMMED1(0x1f)); // & (32-1)
1301 Value
*vYstipple
= AND(vYu
, VIMMED1(0x1f)); // & (32-1)
1303 // grab stipple pattern base address
1304 Value
*stipplePtr
= GEP(hPrivateData
, {0, swr_draw_context_polyStipple
, 0});
1305 stipplePtr
= BITCAST(stipplePtr
, mInt8PtrTy
);
1307 // peform a gather to grab stipple words for each lane
1308 Value
*vStipple
= GATHERDD(VUNDEF_I(), stipplePtr
, vYstipple
,
1309 VIMMED1(0xffffffff), 4);
1311 // create a mask with one bit corresponding to the x stipple
1312 // and AND it with the pattern, to see if we have a bit
1313 Value
*vBitMask
= LSHR(VIMMED1(0x80000000), vXstipple
);
1314 Value
*vStippleMask
= AND(vStipple
, vBitMask
);
1315 vStippleMask
= ICMP_NE(vStippleMask
, VIMMED1(0));
1316 vStippleMask
= VMASK(vStippleMask
);
1318 if (swr_fs
->info
.base
.uses_kill
) {
1319 vActiveMask
= AND(vActiveMask
, vStippleMask
);
1321 vActiveMask
= vStippleMask
;
1324 lp_build_mask_begin(
1325 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask
));
1329 struct lp_build_tgsi_params params
;
1330 memset(¶ms
, 0, sizeof(params
));
1331 params
.type
= lp_type_float_vec(32, 32 * 8);
1332 params
.mask
= uses_mask
? &mask
: NULL
;
1333 params
.consts_ptr
= wrap(consts_ptr
);
1334 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1335 params
.system_values
= &system_values
;
1336 params
.inputs
= inputs
;
1337 params
.context_ptr
= wrap(hPrivateData
);
1338 params
.sampler
= sampler
;
1339 params
.info
= &swr_fs
->info
.base
;
1341 lp_build_tgsi_soa(gallivm
,
1342 swr_fs
->pipe
.tokens
,
1346 sampler
->destroy(sampler
);
1348 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1350 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
1352 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
1353 case TGSI_SEMANTIC_POSITION
: {
1356 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
1357 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
1360 case TGSI_SEMANTIC_COLOR
: {
1361 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1362 if (!outputs
[attrib
][channel
])
1366 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
1367 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
1368 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
1369 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
1372 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
1378 SWR_PS_CONTEXT_shaded
,
1379 swr_fs
->info
.base
.output_semantic_index
[attrib
],
1387 "unknown output from FS %s[%d]\n",
1388 tgsi_semantic_names
[swr_fs
->info
.base
1389 .output_semantic_name
[attrib
]],
1390 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
1396 LLVMValueRef mask_result
= 0;
1398 mask_result
= lp_build_mask_end(&mask
);
1401 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1404 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
1409 gallivm_verify_function(gallivm
, wrap(pFunction
));
1411 gallivm_compile_module(gallivm
);
1413 // after the gallivm passes, we have to lower the core's intrinsics
1414 llvm::legacy::FunctionPassManager
lowerPass(JM()->mpCurrentModule
);
1415 lowerPass
.add(createLowerX86Pass(this));
1416 lowerPass
.run(*pFunction
);
1418 PFN_PIXEL_KERNEL kernel
=
1419 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1420 debug_printf("frag shader %p\n", kernel
);
1421 assert(kernel
&& "Error: FragShader = NULL");
1423 JM()->mIsModuleFinalized
= true;
1429 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
1431 if (!ctx
->fs
->pipe
.tokens
)
1435 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
1437 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
1439 ctx
->fs
->map
.insert(std::make_pair(key
, make_unique
<VariantFS
>(builder
.gallivm
, func
)));