1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 #include <llvm/Config/llvm-config.h>
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
42 #include "gen_state_llvm.h"
44 #include "functionpasses/passes.h"
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
54 #include "swr_context.h"
55 #include "gen_surf_state_llvm.h"
56 #include "gen_swr_context_llvm.h"
57 #include "swr_resource.h"
58 #include "swr_state.h"
59 #include "swr_screen.h"
61 using namespace SwrJit
;
65 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
67 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
69 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
72 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
74 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
77 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
79 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
82 bool operator==(const swr_jit_gs_key
&lhs
, const swr_jit_gs_key
&rhs
)
84 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
88 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
89 struct swr_context
*ctx
,
90 enum pipe_shader_type shader_type
,
91 struct swr_jit_sampler_key
&key
)
93 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
95 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
96 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
97 lp_sampler_static_sampler_state(
98 &key
.sampler
[i
].sampler_state
,
99 ctx
->samplers
[shader_type
][i
]);
104 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
105 * are dx10-style? Can't really have mixed opcodes, at least not
106 * if we want to skip the holes here (without rescanning tgsi).
108 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
109 key
.nr_sampler_views
=
110 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
111 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
112 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1u << (i
& 31))) {
113 const struct pipe_sampler_view
*view
=
114 ctx
->sampler_views
[shader_type
][i
];
115 lp_sampler_static_texture_state(
116 &key
.sampler
[i
].texture_state
, view
);
118 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
119 const struct util_format_description
*desc
=
120 util_format_description(view
->format
);
121 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
122 !util_format_has_depth(desc
))
123 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
128 key
.nr_sampler_views
= key
.nr_samplers
;
129 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
130 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
131 const struct pipe_sampler_view
*view
=
132 ctx
->sampler_views
[shader_type
][i
];
133 lp_sampler_static_texture_state(
134 &key
.sampler
[i
].texture_state
, view
);
136 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
137 const struct util_format_description
*desc
=
138 util_format_description(view
->format
);
139 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
140 !util_format_has_depth(desc
))
141 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
149 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
150 struct swr_context
*ctx
,
151 swr_fragment_shader
*swr_fs
)
153 memset(&key
, 0, sizeof(key
));
155 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
156 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
157 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
159 struct tgsi_shader_info
*pPrevShader
;
161 pPrevShader
= &ctx
->gs
->info
.base
;
163 pPrevShader
= &ctx
->vs
->info
.base
;
165 memcpy(&key
.vs_output_semantic_name
,
166 &pPrevShader
->output_semantic_name
,
167 sizeof(key
.vs_output_semantic_name
));
168 memcpy(&key
.vs_output_semantic_idx
,
169 &pPrevShader
->output_semantic_index
,
170 sizeof(key
.vs_output_semantic_idx
));
172 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
174 key
.poly_stipple_enable
= ctx
->rasterizer
->poly_stipple_enable
&&
175 ctx
->poly_stipple
.prim_is_poly
;
179 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
180 struct swr_context
*ctx
,
181 swr_vertex_shader
*swr_vs
)
183 memset(&key
, 0, sizeof(key
));
185 key
.clip_plane_mask
=
186 swr_vs
->info
.base
.clipdist_writemask
?
187 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
188 ctx
->rasterizer
->clip_plane_enable
;
190 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
194 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
195 struct swr_vertex_element_state
*velems
)
197 memset(&key
, 0, sizeof(key
));
199 key
.fsState
= velems
->fsState
;
203 swr_generate_gs_key(struct swr_jit_gs_key
&key
,
204 struct swr_context
*ctx
,
205 swr_geometry_shader
*swr_gs
)
207 memset(&key
, 0, sizeof(key
));
209 struct tgsi_shader_info
*pPrevShader
= &ctx
->vs
->info
.base
;
211 memcpy(&key
.vs_output_semantic_name
,
212 &pPrevShader
->output_semantic_name
,
213 sizeof(key
.vs_output_semantic_name
));
214 memcpy(&key
.vs_output_semantic_idx
,
215 &pPrevShader
->output_semantic_index
,
216 sizeof(key
.vs_output_semantic_idx
));
218 swr_generate_sampler_key(swr_gs
->info
, ctx
, PIPE_SHADER_GEOMETRY
, key
);
221 struct BuilderSWR
: public Builder
{
222 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
225 pJitMgr
->SetupNewModule();
226 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
));
227 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
231 gallivm_free_ir(gallivm
);
234 void WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
,
235 unsigned slot
, unsigned channel
);
237 struct gallivm_state
*gallivm
;
238 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
239 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
240 PFN_GS_FUNC
CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
);
243 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
244 struct lp_build_context
* bld
,
245 boolean is_vindex_indirect
,
246 LLVMValueRef vertex_index
,
247 boolean is_aindex_indirect
,
248 LLVMValueRef attrib_index
,
249 LLVMValueRef swizzle_index
);
251 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
252 struct lp_build_context
* bld
,
253 LLVMValueRef (*outputs
)[4],
254 LLVMValueRef emitted_vertices_vec
,
255 LLVMValueRef stream_id
);
258 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
259 struct lp_build_context
* bld
,
260 LLVMValueRef total_emitted_vertices_vec_ptr
,
261 LLVMValueRef verts_per_prim_vec
,
262 LLVMValueRef emitted_prims_vec
,
263 LLVMValueRef mask_vec
);
266 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
267 LLVMValueRef total_emitted_vertices_vec
,
268 LLVMValueRef emitted_prims_vec
);
272 struct swr_gs_llvm_iface
{
273 struct lp_build_gs_iface base
;
274 struct tgsi_shader_info
*info
;
276 BuilderSWR
*pBuilder
;
279 SWR_GS_STATE
*pGsState
;
280 uint32_t num_outputs
;
281 uint32_t num_verts_per_prim
;
283 Value
*pVtxAttribMap
;
286 // trampoline functions so we can use the builder llvm construction methods
288 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
289 struct lp_build_context
* bld
,
290 boolean is_vindex_indirect
,
291 LLVMValueRef vertex_index
,
292 boolean is_aindex_indirect
,
293 LLVMValueRef attrib_index
,
294 LLVMValueRef swizzle_index
)
296 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
298 return iface
->pBuilder
->swr_gs_llvm_fetch_input(gs_iface
, bld
,
307 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
308 struct lp_build_context
* bld
,
309 LLVMValueRef (*outputs
)[4],
310 LLVMValueRef emitted_vertices_vec
,
311 LLVMValueRef stream_id
)
313 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
315 iface
->pBuilder
->swr_gs_llvm_emit_vertex(gs_base
, bld
,
317 emitted_vertices_vec
,
322 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
323 struct lp_build_context
* bld
,
324 LLVMValueRef total_emitted_vertices_vec_ptr
,
325 LLVMValueRef verts_per_prim_vec
,
326 LLVMValueRef emitted_prims_vec
,
327 LLVMValueRef mask_vec
)
329 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
331 iface
->pBuilder
->swr_gs_llvm_end_primitive(gs_base
, bld
,
332 total_emitted_vertices_vec_ptr
,
339 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
340 LLVMValueRef total_emitted_vertices_vec
,
341 LLVMValueRef emitted_prims_vec
)
343 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
345 iface
->pBuilder
->swr_gs_llvm_epilogue(gs_base
,
346 total_emitted_vertices_vec
,
351 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
352 struct lp_build_context
* bld
,
353 boolean is_vindex_indirect
,
354 LLVMValueRef vertex_index
,
355 boolean is_aindex_indirect
,
356 LLVMValueRef attrib_index
,
357 LLVMValueRef swizzle_index
)
359 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
360 Value
*vert_index
= unwrap(vertex_index
);
361 Value
*attr_index
= unwrap(attrib_index
);
363 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
365 if (is_vindex_indirect
|| is_aindex_indirect
) {
367 Value
*res
= unwrap(bld
->zero
);
368 struct lp_type type
= bld
->type
;
370 for (i
= 0; i
< type
.length
; i
++) {
371 Value
*vert_chan_index
= vert_index
;
372 Value
*attr_chan_index
= attr_index
;
374 if (is_vindex_indirect
) {
375 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
377 if (is_aindex_indirect
) {
378 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
382 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
384 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
385 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
387 Value
*pVector
= ADD(MUL(vert_chan_index
, pInputVertStride
), attrib
);
388 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
390 Value
*value
= VEXTRACT(pInput
, C(i
));
391 res
= VINSERT(res
, value
, C(i
));
396 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
398 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
399 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
401 Value
*pVector
= ADD(MUL(vert_index
, pInputVertStride
), attrib
);
403 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
409 // GS output stream layout
410 #define VERTEX_COUNT_SIZE 32
411 #define CONTROL_HEADER_SIZE (8*32)
414 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
415 struct lp_build_context
* bld
,
416 LLVMValueRef (*outputs
)[4],
417 LLVMValueRef emitted_vertices_vec
,
418 LLVMValueRef stream_id
)
420 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
422 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
423 const uint32_t headerSize
= VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
;
424 const uint32_t attribSize
= 4 * sizeof(float);
425 const uint32_t vertSize
= attribSize
* SWR_VTX_NUM_SLOTS
;
426 Value
*pVertexOffset
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(vertSize
));
428 Value
*vMask
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_mask
});
429 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, mVWidth
));
431 Value
*pStack
= STACKSAVE();
432 Value
*pTmpPtr
= ALLOCA(mFP32Ty
, C(4)); // used for dummy write for lane masking
434 for (uint32_t attrib
= 0; attrib
< iface
->num_outputs
; ++attrib
) {
435 uint32_t attribSlot
= attrib
;
436 uint32_t sgvChannel
= 0;
437 if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
438 attribSlot
= VERTEX_SGV_SLOT
;
439 sgvChannel
= VERTEX_SGV_POINT_SIZE_COMP
;
440 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_LAYER
) {
441 attribSlot
= VERTEX_SGV_SLOT
;
442 sgvChannel
= VERTEX_SGV_RTAI_COMP
;
443 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_VIEWPORT_INDEX
) {
444 attribSlot
= VERTEX_SGV_SLOT
;
445 sgvChannel
= VERTEX_SGV_VAI_COMP
;
446 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
447 attribSlot
= VERTEX_POSITION_SLOT
;
449 attribSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
450 if (iface
->info
->writes_position
) {
455 Value
*pOutputOffset
= ADD(pVertexOffset
, VIMMED1(headerSize
+ attribSize
* attribSlot
)); // + sgvChannel ?
457 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
458 Value
*pLaneOffset
= VEXTRACT(pOutputOffset
, C(lane
));
459 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
460 Value
*pStreamOffset
= GEP(pStream
, pLaneOffset
);
461 pStreamOffset
= BITCAST(pStreamOffset
, mFP32PtrTy
);
463 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
464 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
466 for (uint32_t channel
= 0; channel
< 4; ++channel
) {
469 if (attribSlot
== VERTEX_SGV_SLOT
)
470 vData
= LOAD(unwrap(outputs
[attrib
][0]));
472 vData
= LOAD(unwrap(outputs
[attrib
][channel
]));
474 if (attribSlot
!= VERTEX_SGV_SLOT
||
475 sgvChannel
== channel
) {
476 vData
= VEXTRACT(vData
, C(lane
));
477 STORE(vData
, pStreamOffset
);
479 pStreamOffset
= GEP(pStreamOffset
, C(1));
484 /* When the output type is not points, the geometry shader may not
485 * output data to multiple streams. So early exit here.
487 if(iface
->pGsState
->outputTopology
!= TOP_POINT_LIST
) {
488 STACKRESTORE(pStack
);
492 // Info about stream id for each vertex
493 // is coded in 2 bits (4 vert per byte "box"):
494 // ----------------- ----------------- ----
495 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
496 // ----------------- ----------------- ----
498 // Calculate where need to put stream id for current vert
500 Value
*pShiftControl
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(2));
502 // Calculate in which box put stream id for current vert.
503 Value
*pOffsetControl
= LSHR(unwrap(emitted_vertices_vec
), VIMMED1(2));
506 Value
*pStreamIdOffset
= ADD(pOffsetControl
, VIMMED1(VERTEX_COUNT_SIZE
));
508 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
509 Value
*pShift
= TRUNC(VEXTRACT(pShiftControl
, C(lane
)), mInt8Ty
);
510 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
512 Value
*pStreamOffset
= GEP(pStream
, VEXTRACT(pStreamIdOffset
, C(lane
)));
514 // Just make sure that not overflow max - stream id = (0,1,2,3)
515 Value
*vVal
= TRUNC(AND(VEXTRACT(unwrap(stream_id
), C(0)), C(0x3)), mInt8Ty
);
517 // Shift it to correct position in byte "box"
518 vVal
= SHL(vVal
, pShift
);
520 // Info about other vertices can be already stored
521 // so we need to read and add bits from current vert info.
522 Value
*storedValue
= LOAD(pStreamOffset
);
523 vVal
= OR(storedValue
, vVal
);
524 STORE(vVal
, pStreamOffset
);
527 STACKRESTORE(pStack
);
531 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
532 struct lp_build_context
* bld
,
533 LLVMValueRef total_emitted_vertices_vec
,
534 LLVMValueRef verts_per_prim_vec
,
535 LLVMValueRef emitted_prims_vec
,
536 LLVMValueRef mask_vec
)
538 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
540 /* When the output type is points, the geometry shader may output data
541 * to multiple streams, and end_primitive has no effect. Info about
542 * stream id for vertices is stored into the same place in memory where
543 * end primitive info is stored so early exit in this case.
545 if (iface
->pGsState
->outputTopology
== TOP_POINT_LIST
) {
549 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
551 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
552 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, 8));
554 uint32_t vertsPerPrim
= iface
->num_verts_per_prim
;
557 ADD(MUL(unwrap(emitted_prims_vec
), VIMMED1(vertsPerPrim
)),
558 unwrap(verts_per_prim_vec
));
560 vCount
= unwrap(total_emitted_vertices_vec
);
562 Value
*mask
= unwrap(mask_vec
);
563 Value
*cmpMask
= VMASK(ICMP_NE(unwrap(verts_per_prim_vec
), VIMMED1(0)));
564 mask
= AND(mask
, cmpMask
);
565 vMask1
= TRUNC(mask
, VectorType::get(mInt1Ty
, 8));
567 vCount
= SUB(vCount
, VIMMED1(1));
568 Value
*vOffset
= ADD(UDIV(vCount
, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE
));
569 Value
*vValue
= SHL(VIMMED1(1), UREM(vCount
, VIMMED1(8)));
571 vValue
= TRUNC(vValue
, VectorType::get(mInt8Ty
, 8));
573 Value
*pStack
= STACKSAVE();
574 Value
*pTmpPtr
= ALLOCA(mInt8Ty
, C(4)); // used for dummy read/write for lane masking
576 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
577 Value
*vLaneOffset
= VEXTRACT(vOffset
, C(lane
));
578 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
579 Value
*pStreamOffset
= GEP(pStream
, vLaneOffset
);
581 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
582 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
584 Value
*vVal
= LOAD(pStreamOffset
);
585 vVal
= OR(vVal
, VEXTRACT(vValue
, C(lane
)));
586 STORE(vVal
, pStreamOffset
);
589 STACKRESTORE(pStack
);
593 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
594 LLVMValueRef total_emitted_vertices_vec
,
595 LLVMValueRef emitted_prims_vec
)
597 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
599 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
601 // Store emit count to each output stream in the first DWORD
602 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
604 Value
* pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
605 pStream
= BITCAST(pStream
, mInt32PtrTy
);
606 Value
* pLaneCount
= VEXTRACT(unwrap(total_emitted_vertices_vec
), C(lane
));
607 STORE(pLaneCount
, pStream
);
612 BuilderSWR::CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
614 SWR_GS_STATE
*pGS
= &ctx
->gs
->gsState
;
615 struct tgsi_shader_info
*info
= &ctx
->gs
->info
.base
;
617 memset(pGS
, 0, sizeof(*pGS
));
619 pGS
->gsEnable
= true;
621 pGS
->numInputAttribs
= (VERTEX_ATTRIB_START_SLOT
- VERTEX_POSITION_SLOT
) + info
->num_inputs
;
622 pGS
->outputTopology
=
623 swr_convert_prim_topology(info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
625 /* It's +1 because emit_vertex in swr is always called exactly one time more
626 * than max_vertices passed in Geometry Shader. We need to allocate more memory
627 * to avoid crash/memory overwritten.
629 pGS
->maxNumVerts
= info
->properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
] + 1;
630 pGS
->instanceCount
= info
->properties
[TGSI_PROPERTY_GS_INVOCATIONS
];
632 // If point primitive then assume to use multiple streams
633 if(pGS
->outputTopology
== TOP_POINT_LIST
) {
634 pGS
->isSingleStream
= false;
636 pGS
->isSingleStream
= true;
637 pGS
->singleStreamID
= 0;
640 pGS
->vertexAttribOffset
= VERTEX_POSITION_SLOT
;
641 pGS
->inputVertStride
= pGS
->numInputAttribs
+ pGS
->vertexAttribOffset
;
642 pGS
->outputVertexSize
= SWR_VTX_NUM_SLOTS
;
643 pGS
->controlDataSize
= 8; // GS ouputs max of 8 32B units
644 pGS
->controlDataOffset
= VERTEX_COUNT_SIZE
;
645 pGS
->outputVertexOffset
= pGS
->controlDataOffset
+ CONTROL_HEADER_SIZE
;
647 pGS
->allocationSize
=
648 VERTEX_COUNT_SIZE
+ // vertex count
649 CONTROL_HEADER_SIZE
+ // control header
650 (SWR_VTX_NUM_SLOTS
* 16) * // sizeof vertex
651 pGS
->maxNumVerts
; // num verts
653 struct swr_geometry_shader
*gs
= ctx
->gs
;
655 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
656 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
658 memset(outputs
, 0, sizeof(outputs
));
660 AttrBuilder attrBuilder
;
661 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
663 std::vector
<Type
*> gsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
664 PointerType::get(mInt8Ty
, 0),
665 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
666 FunctionType
*vsFuncType
=
667 FunctionType::get(Type::getVoidTy(JM()->mContext
), gsArgs
, false);
669 // create new vertex shader function
670 auto pFunction
= Function::Create(vsFuncType
,
671 GlobalValue::ExternalLinkage
,
673 JM()->mpCurrentModule
);
674 #if LLVM_VERSION_MAJOR < 5
675 AttributeSet attrSet
= AttributeSet::get(
676 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
677 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
679 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
682 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
683 IRB()->SetInsertPoint(block
);
684 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
686 auto argitr
= pFunction
->arg_begin();
687 Value
*hPrivateData
= &*argitr
++;
688 hPrivateData
->setName("hPrivateData");
689 Value
*pWorkerData
= &*argitr
++;
690 pWorkerData
->setName("pWorkerData");
691 Value
*pGsCtx
= &*argitr
++;
692 pGsCtx
->setName("gsCtx");
695 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantGS
)});
696 consts_ptr
->setName("gs_constants");
697 Value
*const_sizes_ptr
=
698 GEP(hPrivateData
, {0, swr_draw_context_num_constantsGS
});
699 const_sizes_ptr
->setName("num_gs_constants");
701 struct lp_build_sampler_soa
*sampler
=
702 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_GEOMETRY
);
704 struct lp_bld_tgsi_system_values system_values
;
705 memset(&system_values
, 0, sizeof(system_values
));
706 system_values
.prim_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_PrimitiveID
}));
707 system_values
.invocation_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_InstanceID
}));
709 std::vector
<Constant
*> mapConstants
;
710 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
711 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
712 ubyte semantic_name
= info
->input_semantic_name
[slot
];
713 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
715 unsigned vs_slot
= locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
717 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
719 if (ctx
->vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
722 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
723 vs_slot
= VERTEX_POSITION_SLOT
;
725 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
726 mapConstants
.push_back(C(vs_slot
));
729 struct lp_build_mask_context mask
;
730 Value
*mask_val
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_mask
}, "gsMask");
731 lp_build_mask_begin(&mask
, gallivm
,
732 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
734 // zero out cut buffer so we can load/modify/store bits
735 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
737 Value
* pStream
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
738 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, sizeof(float) * KNOB_SIMD_WIDTH
);
741 struct swr_gs_llvm_iface gs_iface
;
742 gs_iface
.base
.fetch_input
= ::swr_gs_llvm_fetch_input
;
743 gs_iface
.base
.emit_vertex
= ::swr_gs_llvm_emit_vertex
;
744 gs_iface
.base
.end_primitive
= ::swr_gs_llvm_end_primitive
;
745 gs_iface
.base
.gs_epilogue
= ::swr_gs_llvm_epilogue
;
746 gs_iface
.pBuilder
= this;
747 gs_iface
.pGsCtx
= pGsCtx
;
748 gs_iface
.pGsState
= pGS
;
749 gs_iface
.num_outputs
= gs
->info
.base
.num_outputs
;
750 gs_iface
.num_verts_per_prim
=
751 u_vertices_per_prim((pipe_prim_type
)info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
752 gs_iface
.info
= info
;
753 gs_iface
.pVtxAttribMap
= vtxAttribMap
;
755 struct lp_build_tgsi_params params
;
756 memset(¶ms
, 0, sizeof(params
));
757 params
.type
= lp_type_float_vec(32, 32 * 8);
758 params
.mask
= & mask
;
759 params
.consts_ptr
= wrap(consts_ptr
);
760 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
761 params
.system_values
= &system_values
;
762 params
.inputs
= inputs
;
763 params
.context_ptr
= wrap(hPrivateData
);
764 params
.sampler
= sampler
;
765 params
.info
= &gs
->info
.base
;
766 params
.gs_iface
= &gs_iface
.base
;
768 lp_build_tgsi_soa(gallivm
,
773 lp_build_mask_end(&mask
);
775 sampler
->destroy(sampler
);
777 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
781 gallivm_verify_function(gallivm
, wrap(pFunction
));
782 gallivm_compile_module(gallivm
);
785 (PFN_GS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
787 debug_printf("geom shader %p\n", pFunc
);
788 assert(pFunc
&& "Error: GeomShader = NULL");
790 JM()->mIsModuleFinalized
= true;
796 swr_compile_gs(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
799 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
801 PFN_GS_FUNC func
= builder
.CompileGS(ctx
, key
);
803 ctx
->gs
->map
.insert(std::make_pair(key
, std::make_unique
<VariantGS
>(builder
.gallivm
, func
)));
808 BuilderSWR::WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
, unsigned slot
, unsigned channel
)
810 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
811 // interleave the simdvertex components into the dest simd16vertex
812 // slot16offset = slot8offset * 2
813 // comp16offset = comp8offset * 2 + alternateOffset
815 Value
*offset
= LOAD(pVsContext
, { 0, SWR_VS_CONTEXT_AlternateOffset
});
816 Value
*pOut
= GEP(pVtxOutput
, { C(0), C(0), C(slot
* 2), offset
} );
817 STORE(pVal
, pOut
, {channel
* 2});
819 Value
*pOut
= GEP(pVtxOutput
, {0, 0, slot
});
820 STORE(pVal
, pOut
, {0, channel
});
825 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
827 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
829 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
830 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
832 memset(outputs
, 0, sizeof(outputs
));
834 AttrBuilder attrBuilder
;
835 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
837 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
838 PointerType::get(mInt8Ty
, 0),
839 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
840 FunctionType
*vsFuncType
=
841 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
843 // create new vertex shader function
844 auto pFunction
= Function::Create(vsFuncType
,
845 GlobalValue::ExternalLinkage
,
847 JM()->mpCurrentModule
);
848 #if LLVM_VERSION_MAJOR < 5
849 AttributeSet attrSet
= AttributeSet::get(
850 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
851 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
853 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
856 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
857 IRB()->SetInsertPoint(block
);
858 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
860 auto argitr
= pFunction
->arg_begin();
861 Value
*hPrivateData
= &*argitr
++;
862 hPrivateData
->setName("hPrivateData");
863 Value
*pWorkerData
= &*argitr
++;
864 pWorkerData
->setName("pWorkerData");
865 Value
*pVsCtx
= &*argitr
++;
866 pVsCtx
->setName("vsCtx");
868 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
870 consts_ptr
->setName("vs_constants");
871 Value
*const_sizes_ptr
=
872 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
873 const_sizes_ptr
->setName("num_vs_constants");
875 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
877 vtxInput
= BITCAST(vtxInput
, PointerType::get(Gen_simd16vertex(JM()), 0));
880 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
881 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
882 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
883 if (mask
& (1 << channel
)) {
884 inputs
[attrib
][channel
] =
885 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
890 struct lp_build_sampler_soa
*sampler
=
891 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
893 struct lp_bld_tgsi_system_values system_values
;
894 memset(&system_values
, 0, sizeof(system_values
));
895 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
898 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID16
}));
900 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
904 uint32_t vectorWidth
= mVWidth16
;
906 uint32_t vectorWidth
= mVWidth
;
909 struct lp_build_tgsi_params params
;
910 memset(¶ms
, 0, sizeof(params
));
911 params
.type
= lp_type_float_vec(32, 32 * vectorWidth
);
912 params
.consts_ptr
= wrap(consts_ptr
);
913 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
914 params
.system_values
= &system_values
;
915 params
.inputs
= inputs
;
916 params
.context_ptr
= wrap(hPrivateData
);
917 params
.sampler
= sampler
;
918 params
.info
= &swr_vs
->info
.base
;
920 lp_build_tgsi_soa(gallivm
,
925 sampler
->destroy(sampler
);
927 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
929 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
931 vtxOutput
= BITCAST(vtxOutput
, PointerType::get(Gen_simd16vertex(JM()), 0));
934 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
935 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
936 if (!outputs
[attrib
][channel
])
942 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
943 if (channel
!= VERTEX_SGV_POINT_SIZE_COMP
)
945 val
= LOAD(unwrap(outputs
[attrib
][0]));
946 outSlot
= VERTEX_SGV_SLOT
;
947 } else if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
948 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
949 outSlot
= VERTEX_POSITION_SLOT
;
951 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
952 outSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
953 if (swr_vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
957 WriteVS(val
, pVsCtx
, vtxOutput
, outSlot
, channel
);
961 if (ctx
->rasterizer
->clip_plane_enable
||
962 swr_vs
->info
.base
.culldist_writemask
) {
963 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
966 if (swr_vs
->info
.base
.writes_clipvertex
) {
967 cv
= locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
970 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
971 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
972 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
978 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
979 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
980 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
981 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
983 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
984 // clip distance overrides user clip planes
985 if ((swr_vs
->info
.base
.clipdist_writemask
& clip_mask
& (1 << val
)) ||
986 ((swr_vs
->info
.base
.culldist_writemask
<< swr_vs
->info
.base
.num_written_clipdistance
) & (1 << val
))) {
987 unsigned cv
= locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1,
990 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
991 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
993 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
994 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
999 if (!(clip_mask
& (1 << val
)))
1002 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
1003 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
1004 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
1005 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
1007 Value
*bpx
= VBROADCAST_16(px
);
1008 Value
*bpy
= VBROADCAST_16(py
);
1009 Value
*bpz
= VBROADCAST_16(pz
);
1010 Value
*bpw
= VBROADCAST_16(pw
);
1012 Value
*bpx
= VBROADCAST(px
);
1013 Value
*bpy
= VBROADCAST(py
);
1014 Value
*bpz
= VBROADCAST(pz
);
1015 Value
*bpw
= VBROADCAST(pw
);
1017 Value
*dist
= FADD(FMUL(unwrap(cx
), bpx
),
1018 FADD(FMUL(unwrap(cy
), bpy
),
1019 FADD(FMUL(unwrap(cz
), bpz
),
1020 FMUL(unwrap(cw
), bpw
))));
1023 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
1025 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
1031 gallivm_verify_function(gallivm
, wrap(pFunction
));
1032 gallivm_compile_module(gallivm
);
1034 // lp_debug_dump_value(func);
1036 PFN_VERTEX_FUNC pFunc
=
1037 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1039 debug_printf("vert shader %p\n", pFunc
);
1040 assert(pFunc
&& "Error: VertShader = NULL");
1042 JM()->mIsModuleFinalized
= true;
1048 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
1050 if (!ctx
->vs
->pipe
.tokens
)
1054 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
1056 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
1058 ctx
->vs
->map
.insert(std::make_pair(key
, std::make_unique
<VariantVS
>(builder
.gallivm
, func
)));
1063 swr_so_adjust_attrib(unsigned in_attrib
,
1064 swr_vertex_shader
*swr_vs
)
1066 ubyte semantic_name
;
1069 attrib
= in_attrib
+ VERTEX_ATTRIB_START_SLOT
;
1072 semantic_name
= swr_vs
->info
.base
.output_semantic_name
[in_attrib
];
1073 if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
1074 attrib
= VERTEX_POSITION_SLOT
;
1075 } else if (semantic_name
== TGSI_SEMANTIC_PSIZE
) {
1076 attrib
= VERTEX_SGV_SLOT
;
1077 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) {
1078 attrib
= VERTEX_SGV_SLOT
;
1080 if (swr_vs
->info
.base
.writes_position
) {
1090 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
1092 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
1093 if ((info
->output_semantic_name
[i
] == name
)
1094 && (info
->output_semantic_index
[i
] == index
)) {
1103 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
1105 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
1107 struct tgsi_shader_info
*pPrevShader
;
1109 pPrevShader
= &ctx
->gs
->info
.base
;
1111 pPrevShader
= &ctx
->vs
->info
.base
;
1113 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1114 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1116 memset(inputs
, 0, sizeof(inputs
));
1117 memset(outputs
, 0, sizeof(outputs
));
1119 struct lp_build_sampler_soa
*sampler
= NULL
;
1121 AttrBuilder attrBuilder
;
1122 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1124 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1125 PointerType::get(mInt8Ty
, 0),
1126 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
1127 FunctionType
*funcType
=
1128 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
1130 auto pFunction
= Function::Create(funcType
,
1131 GlobalValue::ExternalLinkage
,
1133 JM()->mpCurrentModule
);
1134 #if LLVM_VERSION_MAJOR < 5
1135 AttributeSet attrSet
= AttributeSet::get(
1136 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1137 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1139 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1142 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1143 IRB()->SetInsertPoint(block
);
1144 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1146 auto args
= pFunction
->arg_begin();
1147 Value
*hPrivateData
= &*args
++;
1148 hPrivateData
->setName("hPrivateData");
1149 Value
*pWorkerData
= &*args
++;
1150 pWorkerData
->setName("pWorkerData");
1151 Value
*pPS
= &*args
++;
1152 pPS
->setName("psCtx");
1154 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
1155 consts_ptr
->setName("fs_constants");
1156 Value
*const_sizes_ptr
=
1157 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
1158 const_sizes_ptr
->setName("num_fs_constants");
1160 // load *pAttribs, *pPerspAttribs
1161 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
1162 Value
*pPerspAttribs
=
1163 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
1165 swr_fs
->constantMask
= 0;
1166 swr_fs
->flatConstantMask
= 0;
1167 swr_fs
->pointSpriteMask
= 0;
1169 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
1170 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
1171 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
1172 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
1178 Value
*vi
= nullptr, *vj
= nullptr;
1179 switch (interpLoc
) {
1180 case TGSI_INTERPOLATE_LOC_CENTER
:
1181 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
1182 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
1184 case TGSI_INTERPOLATE_LOC_CENTROID
:
1185 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
1186 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
1188 case TGSI_INTERPOLATE_LOC_SAMPLE
:
1189 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
1190 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
1195 Value
*vw
= nullptr, *pAttribs
;
1196 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
1197 interpMode
== TGSI_INTERPOLATE_COLOR
) {
1198 pAttribs
= pPerspAttribs
;
1199 switch (interpLoc
) {
1200 case TGSI_INTERPOLATE_LOC_CENTER
:
1201 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
1203 case TGSI_INTERPOLATE_LOC_CENTROID
:
1204 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
1206 case TGSI_INTERPOLATE_LOC_SAMPLE
:
1207 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
1211 pAttribs
= pRawAttribs
;
1217 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
1218 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
1220 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
1222 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
1223 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
1224 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
1226 inputs
[attrib
][0] = wrap(ff
);
1227 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1228 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1229 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
1231 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
1232 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
1233 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
1234 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
1235 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
1237 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
1238 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
1240 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
1242 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
1244 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) { // gl_Layer
1245 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_renderTargetArrayIndex
});
1246 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vRenderTargetArrayIndex");
1247 inputs
[attrib
][0] = wrap(ff
);
1248 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1249 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1250 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
1252 } else if (semantic_name
== TGSI_SEMANTIC_VIEWPORT_INDEX
) { // gl_ViewportIndex
1253 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_viewportIndex
});
1254 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vViewportIndex");
1255 inputs
[attrib
][0] = wrap(ff
);
1256 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1257 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1258 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
1261 unsigned linkedAttrib
=
1262 locate_linkage(semantic_name
, semantic_idx
, pPrevShader
) - 1;
1264 uint32_t extraAttribs
= 0;
1265 if (semantic_name
== TGSI_SEMANTIC_PRIMID
&& !ctx
->gs
) {
1266 /* non-gs generated primID - need to grab from swizzleMap override */
1267 linkedAttrib
= pPrevShader
->num_outputs
- 1;
1268 swr_fs
->constantMask
|= 1 << linkedAttrib
;
1270 } else if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
1271 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
1272 /* we add an extra attrib to the backendState in swr_update_derived. */
1273 linkedAttrib
= pPrevShader
->num_outputs
+ extraAttribs
- 1;
1274 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
1276 } else if (linkedAttrib
== 0xFFFFFFFF) {
1277 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
1278 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
1279 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
1280 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
1281 /* If we're reading in color and 2-sided lighting is enabled, we have
1284 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
1287 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1288 swr_fs
->constantMask
|= 1 << linkedAttrib
;
1289 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
1290 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
1294 unsigned bcolorAttrib
= 0xFFFFFFFF;
1295 Value
*offset
= NULL
;
1296 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
1297 bcolorAttrib
= locate_linkage(
1298 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, pPrevShader
) - 1;
1299 /* Neither front nor back colors were available. Nothing to load. */
1300 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
1302 /* If there is no front color, just always use the back color. */
1303 if (linkedAttrib
== 0xFFFFFFFF)
1304 linkedAttrib
= bcolorAttrib
;
1306 if (bcolorAttrib
!= 0xFFFFFFFF) {
1307 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1308 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
1309 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
1310 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
1313 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
1317 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
1319 offset
= MUL(back
, C(diff
));
1320 offset
->setName("offset");
1325 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1326 if (mask
& (1 << channel
)) {
1327 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
1328 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
1329 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
1332 indexA
= ADD(indexA
, offset
);
1333 indexB
= ADD(indexB
, offset
);
1334 indexC
= ADD(indexC
, offset
);
1337 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
1338 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
1339 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
1341 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
1342 inputs
[attrib
][channel
] = wrap(va
);
1344 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
1348 Value
*interp
= FMUL(va
, vi
);
1349 Value
*interp1
= FMUL(vb
, vj
);
1350 interp
= FADD(interp
, interp1
);
1351 interp
= FADD(interp
, vc
);
1352 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
1353 interpMode
== TGSI_INTERPOLATE_COLOR
)
1354 interp
= FMUL(interp
, vw
);
1355 inputs
[attrib
][channel
] = wrap(interp
);
1361 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
1363 struct lp_bld_tgsi_system_values system_values
;
1364 memset(&system_values
, 0, sizeof(system_values
));
1366 struct lp_build_mask_context mask
;
1367 bool uses_mask
= false;
1369 if (swr_fs
->info
.base
.uses_kill
||
1370 key
.poly_stipple_enable
) {
1371 Value
*vActiveMask
= NULL
;
1372 if (swr_fs
->info
.base
.uses_kill
) {
1373 vActiveMask
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
1375 if (key
.poly_stipple_enable
) {
1376 // first get fragment xy coords and clip to stipple bounds
1377 Value
*vXf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
});
1378 Value
*vYf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
});
1379 Value
*vXu
= FP_TO_UI(vXf
, mSimdInt32Ty
);
1380 Value
*vYu
= FP_TO_UI(vYf
, mSimdInt32Ty
);
1382 // stipple pattern is 32x32, which means that one line of stipple
1383 // is stored in one word:
1384 // vXstipple is bit offset inside 32-bit stipple word
1385 // vYstipple is word index is stipple array
1386 Value
*vXstipple
= AND(vXu
, VIMMED1(0x1f)); // & (32-1)
1387 Value
*vYstipple
= AND(vYu
, VIMMED1(0x1f)); // & (32-1)
1389 // grab stipple pattern base address
1390 Value
*stipplePtr
= GEP(hPrivateData
, {0, swr_draw_context_polyStipple
, 0});
1391 stipplePtr
= BITCAST(stipplePtr
, mInt8PtrTy
);
1393 // peform a gather to grab stipple words for each lane
1394 Value
*vStipple
= GATHERDD(VUNDEF_I(), stipplePtr
, vYstipple
,
1395 VIMMED1(0xffffffff), 4);
1397 // create a mask with one bit corresponding to the x stipple
1398 // and AND it with the pattern, to see if we have a bit
1399 Value
*vBitMask
= LSHR(VIMMED1(0x80000000), vXstipple
);
1400 Value
*vStippleMask
= AND(vStipple
, vBitMask
);
1401 vStippleMask
= ICMP_NE(vStippleMask
, VIMMED1(0));
1402 vStippleMask
= VMASK(vStippleMask
);
1404 if (swr_fs
->info
.base
.uses_kill
) {
1405 vActiveMask
= AND(vActiveMask
, vStippleMask
);
1407 vActiveMask
= vStippleMask
;
1410 lp_build_mask_begin(
1411 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask
));
1415 struct lp_build_tgsi_params params
;
1416 memset(¶ms
, 0, sizeof(params
));
1417 params
.type
= lp_type_float_vec(32, 32 * 8);
1418 params
.mask
= uses_mask
? &mask
: NULL
;
1419 params
.consts_ptr
= wrap(consts_ptr
);
1420 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1421 params
.system_values
= &system_values
;
1422 params
.inputs
= inputs
;
1423 params
.context_ptr
= wrap(hPrivateData
);
1424 params
.sampler
= sampler
;
1425 params
.info
= &swr_fs
->info
.base
;
1427 lp_build_tgsi_soa(gallivm
,
1428 swr_fs
->pipe
.tokens
,
1432 sampler
->destroy(sampler
);
1434 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1436 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
1438 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
1439 case TGSI_SEMANTIC_POSITION
: {
1442 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
1443 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
1446 case TGSI_SEMANTIC_COLOR
: {
1447 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1448 if (!outputs
[attrib
][channel
])
1452 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
1453 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
1454 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
1455 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
1458 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
1464 SWR_PS_CONTEXT_shaded
,
1465 swr_fs
->info
.base
.output_semantic_index
[attrib
],
1473 "unknown output from FS %s[%d]\n",
1474 tgsi_semantic_names
[swr_fs
->info
.base
1475 .output_semantic_name
[attrib
]],
1476 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
1482 LLVMValueRef mask_result
= 0;
1484 mask_result
= lp_build_mask_end(&mask
);
1487 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1490 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
1495 gallivm_verify_function(gallivm
, wrap(pFunction
));
1497 gallivm_compile_module(gallivm
);
1499 // after the gallivm passes, we have to lower the core's intrinsics
1500 llvm::legacy::FunctionPassManager
lowerPass(JM()->mpCurrentModule
);
1501 lowerPass
.add(createLowerX86Pass(this));
1502 lowerPass
.run(*pFunction
);
1504 PFN_PIXEL_KERNEL kernel
=
1505 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1506 debug_printf("frag shader %p\n", kernel
);
1507 assert(kernel
&& "Error: FragShader = NULL");
1509 JM()->mIsModuleFinalized
= true;
1515 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
1517 if (!ctx
->fs
->pipe
.tokens
)
1521 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
1523 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
1525 ctx
->fs
->map
.insert(std::make_pair(key
, std::make_unique
<VariantFS
>(builder
.gallivm
, func
)));