1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 #include <llvm/Config/llvm-config.h>
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
42 #include "gen_state_llvm.h"
44 #include "functionpasses/passes.h"
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53 #include "gallivm/lp_bld_const.h"
54 #include "gallivm/lp_bld_printf.h"
56 #include "swr_context.h"
57 #include "gen_surf_state_llvm.h"
58 #include "gen_swr_context_llvm.h"
59 #include "swr_resource.h"
60 #include "swr_state.h"
61 #include "swr_screen.h"
64 /////////////////////////////////////////////////////////////////////////
69 #include "util/u_debug.h"
70 #include "util/u_memory.h"
71 #include "util/u_string.h"
73 #include "gallivm/lp_bld_type.h"
76 constexpr bool verbose_shader
= true;
78 constexpr bool verbose_shader
= false;
81 using namespace SwrJit
;
84 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
86 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
88 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
91 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
93 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
96 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
98 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
101 bool operator==(const swr_jit_gs_key
&lhs
, const swr_jit_gs_key
&rhs
)
103 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
106 bool operator==(const swr_jit_tcs_key
&lhs
, const swr_jit_tcs_key
&rhs
)
108 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
111 bool operator==(const swr_jit_tes_key
&lhs
, const swr_jit_tes_key
&rhs
)
113 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
118 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
119 struct swr_context
*ctx
,
120 enum pipe_shader_type shader_type
,
121 struct swr_jit_sampler_key
&key
)
123 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
125 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
126 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
127 lp_sampler_static_sampler_state(
128 &key
.sampler
[i
].sampler_state
,
129 ctx
->samplers
[shader_type
][i
]);
134 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
135 * are dx10-style? Can't really have mixed opcodes, at least not
136 * if we want to skip the holes here (without rescanning tgsi).
138 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
139 key
.nr_sampler_views
=
140 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
141 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
142 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1u << (i
& 31))) {
143 const struct pipe_sampler_view
*view
=
144 ctx
->sampler_views
[shader_type
][i
];
145 lp_sampler_static_texture_state(
146 &key
.sampler
[i
].texture_state
, view
);
148 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
149 const struct util_format_description
*desc
=
150 util_format_description(view
->format
);
151 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
152 !util_format_has_depth(desc
))
153 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
158 key
.nr_sampler_views
= key
.nr_samplers
;
159 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
160 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
161 const struct pipe_sampler_view
*view
=
162 ctx
->sampler_views
[shader_type
][i
];
163 lp_sampler_static_texture_state(
164 &key
.sampler
[i
].texture_state
, view
);
166 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
167 const struct util_format_description
*desc
=
168 util_format_description(view
->format
);
169 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
170 !util_format_has_depth(desc
))
171 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
179 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
180 struct swr_context
*ctx
,
181 swr_fragment_shader
*swr_fs
)
183 memset(&key
, 0, sizeof(key
));
185 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
186 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
187 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
189 struct tgsi_shader_info
*pPrevShader
;
191 pPrevShader
= &ctx
->gs
->info
.base
;
193 pPrevShader
= &ctx
->tes
->info
.base
;
195 pPrevShader
= &ctx
->vs
->info
.base
;
197 memcpy(&key
.vs_output_semantic_name
,
198 &pPrevShader
->output_semantic_name
,
199 sizeof(key
.vs_output_semantic_name
));
200 memcpy(&key
.vs_output_semantic_idx
,
201 &pPrevShader
->output_semantic_index
,
202 sizeof(key
.vs_output_semantic_idx
));
204 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
206 key
.poly_stipple_enable
= ctx
->rasterizer
->poly_stipple_enable
&&
207 ctx
->poly_stipple
.prim_is_poly
;
211 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
212 struct swr_context
*ctx
,
213 swr_vertex_shader
*swr_vs
)
215 memset(&key
, 0, sizeof(key
));
217 key
.clip_plane_mask
=
218 swr_vs
->info
.base
.clipdist_writemask
?
219 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
220 ctx
->rasterizer
->clip_plane_enable
;
222 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
226 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
227 struct swr_vertex_element_state
*velems
)
229 memset(&key
, 0, sizeof(key
));
231 key
.fsState
= velems
->fsState
;
235 swr_generate_gs_key(struct swr_jit_gs_key
&key
,
236 struct swr_context
*ctx
,
237 swr_geometry_shader
*swr_gs
)
239 memset(&key
, 0, sizeof(key
));
241 struct tgsi_shader_info
*pPrevShader
= nullptr;
244 pPrevShader
= &ctx
->tes
->info
.base
;
246 pPrevShader
= &ctx
->vs
->info
.base
;
249 memcpy(&key
.vs_output_semantic_name
,
250 &pPrevShader
->output_semantic_name
,
251 sizeof(key
.vs_output_semantic_name
));
252 memcpy(&key
.vs_output_semantic_idx
,
253 &pPrevShader
->output_semantic_index
,
254 sizeof(key
.vs_output_semantic_idx
));
256 swr_generate_sampler_key(swr_gs
->info
, ctx
, PIPE_SHADER_GEOMETRY
, key
);
260 swr_generate_tcs_key(struct swr_jit_tcs_key
&key
,
261 struct swr_context
*ctx
,
262 swr_tess_control_shader
*swr_tcs
)
264 memset(&key
, 0, sizeof(key
));
266 struct tgsi_shader_info
*pPrevShader
= &ctx
->vs
->info
.base
;
268 memcpy(&key
.vs_output_semantic_name
,
269 &pPrevShader
->output_semantic_name
,
270 sizeof(key
.vs_output_semantic_name
));
271 memcpy(&key
.vs_output_semantic_idx
,
272 &pPrevShader
->output_semantic_index
,
273 sizeof(key
.vs_output_semantic_idx
));
275 key
.clip_plane_mask
=
276 swr_tcs
->info
.base
.clipdist_writemask
?
277 swr_tcs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
278 ctx
->rasterizer
->clip_plane_enable
;
280 swr_generate_sampler_key(swr_tcs
->info
, ctx
, PIPE_SHADER_TESS_CTRL
, key
);
284 swr_generate_tes_key(struct swr_jit_tes_key
&key
,
285 struct swr_context
*ctx
,
286 swr_tess_evaluation_shader
*swr_tes
)
288 memset(&key
, 0, sizeof(key
));
290 struct tgsi_shader_info
*pPrevShader
= nullptr;
293 pPrevShader
= &ctx
->tcs
->info
.base
;
296 pPrevShader
= &ctx
->vs
->info
.base
;
299 SWR_ASSERT(pPrevShader
!= nullptr, "TES: No TCS or VS defined");
301 memcpy(&key
.prev_output_semantic_name
,
302 &pPrevShader
->output_semantic_name
,
303 sizeof(key
.prev_output_semantic_name
));
304 memcpy(&key
.prev_output_semantic_idx
,
305 &pPrevShader
->output_semantic_index
,
306 sizeof(key
.prev_output_semantic_idx
));
308 key
.clip_plane_mask
=
309 swr_tes
->info
.base
.clipdist_writemask
?
310 swr_tes
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
311 ctx
->rasterizer
->clip_plane_enable
;
313 swr_generate_sampler_key(swr_tes
->info
, ctx
, PIPE_SHADER_TESS_EVAL
, key
);
316 struct BuilderSWR
: public Builder
{
317 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
320 pJitMgr
->SetupNewModule();
321 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
));
322 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
326 gallivm_free_ir(gallivm
);
329 void WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
,
330 unsigned slot
, unsigned channel
);
332 struct gallivm_state
*gallivm
;
333 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
334 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
335 PFN_GS_FUNC
CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
);
336 PFN_TCS_FUNC
CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
);
337 PFN_TES_FUNC
CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
);
339 // GS-specific emit functions
341 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
342 struct lp_build_context
* bld
,
343 boolean is_vindex_indirect
,
344 LLVMValueRef vertex_index
,
345 boolean is_aindex_indirect
,
346 LLVMValueRef attrib_index
,
347 LLVMValueRef swizzle_index
);
349 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
350 struct lp_build_context
* bld
,
351 LLVMValueRef (*outputs
)[4],
352 LLVMValueRef emitted_vertices_vec
,
353 LLVMValueRef stream_id
);
356 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
357 struct lp_build_context
* bld
,
358 LLVMValueRef total_emitted_vertices_vec_ptr
,
359 LLVMValueRef verts_per_prim_vec
,
360 LLVMValueRef emitted_prims_vec
,
361 LLVMValueRef mask_vec
);
364 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
365 LLVMValueRef total_emitted_vertices_vec
,
366 LLVMValueRef emitted_prims_vec
);
368 // TCS-specific emit functions
369 void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
);
370 void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
);
373 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
374 struct lp_build_tgsi_context
* bld_base
,
375 boolean is_vindex_indirect
,
376 LLVMValueRef vertex_index
,
377 boolean is_aindex_indirect
,
378 LLVMValueRef attrib_index
,
379 LLVMValueRef swizzle_index
);
382 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
383 struct lp_build_tgsi_context
* bld_base
,
384 boolean is_vindex_indirect
,
385 LLVMValueRef vertex_index
,
386 boolean is_aindex_indirect
,
387 LLVMValueRef attrib_index
,
388 LLVMValueRef swizzle_index
,
392 swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
393 struct lp_build_tgsi_context
* bld_base
,
395 boolean is_vindex_indirect
,
396 LLVMValueRef vertex_index
,
397 boolean is_aindex_indirect
,
398 LLVMValueRef attrib_index
,
399 LLVMValueRef swizzle_index
,
402 // Barrier implementation (available only in TCS)
404 swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
405 struct lp_build_tgsi_context
*bld_base
);
407 // TES-specific emit functions
409 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
410 struct lp_build_tgsi_context
* bld_base
,
411 boolean is_vindex_indirect
,
412 LLVMValueRef vertex_index
,
413 boolean is_aindex_indirect
,
414 LLVMValueRef attrib_index
,
415 LLVMValueRef swizzle_index
);
418 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
419 struct lp_build_tgsi_context
* bld_base
,
420 boolean is_aindex_indirect
,
421 LLVMValueRef attrib_index
,
422 LLVMValueRef swizzle_index
);
425 struct swr_gs_llvm_iface
{
426 struct lp_build_gs_iface base
;
427 struct tgsi_shader_info
*info
;
429 BuilderSWR
*pBuilder
;
432 SWR_GS_STATE
*pGsState
;
433 uint32_t num_outputs
;
434 uint32_t num_verts_per_prim
;
436 Value
*pVtxAttribMap
;
439 struct swr_tcs_llvm_iface
{
440 struct lp_build_tcs_iface base
;
441 struct tgsi_shader_info
*info
;
443 BuilderSWR
*pBuilder
;
446 SWR_TS_STATE
*pTsState
;
448 uint32_t output_vertices
;
450 struct lp_build_for_loop_state loop_state
;
452 Value
*pVtxAttribMap
;
453 Value
*pVtxOutputAttribMap
;
454 Value
*pPatchOutputAttribMap
;
457 struct swr_tes_llvm_iface
{
458 struct lp_build_tes_iface base
;
459 struct tgsi_shader_info
*info
;
461 BuilderSWR
*pBuilder
;
464 SWR_TS_STATE
*pTsState
;
466 uint32_t num_outputs
;
468 Value
*pVtxAttribMap
;
469 Value
*pPatchAttribMap
;
472 // trampoline functions so we can use the builder llvm construction methods
474 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
475 struct lp_build_context
* bld
,
476 boolean is_vindex_indirect
,
477 LLVMValueRef vertex_index
,
478 boolean is_aindex_indirect
,
479 LLVMValueRef attrib_index
,
480 LLVMValueRef swizzle_index
)
482 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
484 return iface
->pBuilder
->swr_gs_llvm_fetch_input(gs_iface
, bld
,
493 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
494 struct lp_build_context
* bld
,
495 LLVMValueRef (*outputs
)[4],
496 LLVMValueRef emitted_vertices_vec
,
497 LLVMValueRef stream_id
)
499 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
501 iface
->pBuilder
->swr_gs_llvm_emit_vertex(gs_base
, bld
,
503 emitted_vertices_vec
,
508 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
509 struct lp_build_context
* bld
,
510 LLVMValueRef total_emitted_vertices_vec_ptr
,
511 LLVMValueRef verts_per_prim_vec
,
512 LLVMValueRef emitted_prims_vec
,
513 LLVMValueRef mask_vec
)
515 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
517 iface
->pBuilder
->swr_gs_llvm_end_primitive(gs_base
, bld
,
518 total_emitted_vertices_vec_ptr
,
525 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
526 LLVMValueRef total_emitted_vertices_vec
,
527 LLVMValueRef emitted_prims_vec
)
529 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
531 iface
->pBuilder
->swr_gs_llvm_epilogue(gs_base
,
532 total_emitted_vertices_vec
,
537 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
538 struct lp_build_context
* bld
,
539 boolean is_vindex_indirect
,
540 LLVMValueRef vertex_index
,
541 boolean is_aindex_indirect
,
542 LLVMValueRef attrib_index
,
543 LLVMValueRef swizzle_index
)
545 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
546 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
548 return iface
->pBuilder
->swr_tcs_llvm_fetch_input(tcs_iface
, bld_base
,
557 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
558 struct lp_build_context
* bld
,
559 boolean is_vindex_indirect
,
560 LLVMValueRef vertex_index
,
561 boolean is_aindex_indirect
,
562 LLVMValueRef attrib_index
,
563 LLVMValueRef swizzle_index
,
566 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
567 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
569 return iface
->pBuilder
->swr_tcs_llvm_fetch_output(tcs_iface
, bld_base
,
580 swr_tcs_llvm_emit_prologue(struct lp_build_context
* bld
)
582 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
583 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
584 iface
->pBuilder
->swr_tcs_llvm_emit_prologue(bld_base
);
588 swr_tcs_llvm_emit_epilogue(struct lp_build_context
* bld
)
590 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
591 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
592 iface
->pBuilder
->swr_tcs_llvm_emit_epilogue(bld_base
);
596 void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
597 struct lp_build_context
* bld
,
599 boolean is_vindex_indirect
,
600 LLVMValueRef vertex_index
,
601 boolean is_aindex_indirect
,
602 LLVMValueRef attrib_index
,
603 LLVMValueRef swizzle_index
,
606 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
607 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
609 iface
->pBuilder
->swr_tcs_llvm_store_output(tcs_iface
,
622 void swr_tcs_llvm_emit_barrier(struct lp_build_context
*bld
)
624 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
625 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
627 iface
->pBuilder
->swr_tcs_llvm_emit_barrier(bld_base
->tcs_iface
, &bld_base
->bld_base
);
632 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
633 struct lp_build_context
* bld
,
634 boolean is_vindex_indirect
,
635 LLVMValueRef vertex_index
,
636 boolean is_aindex_indirect
,
637 LLVMValueRef attrib_index
,
638 LLVMValueRef swizzle_index
)
640 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
641 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
643 return iface
->pBuilder
->swr_tes_llvm_fetch_vtx_input(tes_iface
, bld_base
,
652 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
653 struct lp_build_context
* bld
,
654 boolean is_aindex_indirect
,
655 LLVMValueRef attrib_index
,
656 LLVMValueRef swizzle_index
)
658 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
659 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
661 return iface
->pBuilder
->swr_tes_llvm_fetch_patch_input(tes_iface
, bld_base
,
668 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
669 struct lp_build_context
* bld
,
670 boolean is_vindex_indirect
,
671 LLVMValueRef vertex_index
,
672 boolean is_aindex_indirect
,
673 LLVMValueRef attrib_index
,
674 LLVMValueRef swizzle_index
)
676 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
677 Value
*vert_index
= unwrap(vertex_index
);
678 Value
*attr_index
= unwrap(attrib_index
);
680 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
682 if (is_vindex_indirect
|| is_aindex_indirect
) {
684 Value
*res
= unwrap(bld
->zero
);
685 struct lp_type type
= bld
->type
;
687 for (i
= 0; i
< type
.length
; i
++) {
688 Value
*vert_chan_index
= vert_index
;
689 Value
*attr_chan_index
= attr_index
;
691 if (is_vindex_indirect
) {
692 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
694 if (is_aindex_indirect
) {
695 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
699 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
701 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
702 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
704 Value
*pVector
= ADD(MUL(vert_chan_index
, pInputVertStride
), attrib
);
705 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
707 Value
*value
= VEXTRACT(pInput
, C(i
));
708 res
= VINSERT(res
, value
, C(i
));
713 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
715 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
716 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
718 Value
*pVector
= ADD(MUL(vert_index
, pInputVertStride
), attrib
);
720 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
726 // GS output stream layout
727 #define VERTEX_COUNT_SIZE 32
728 #define CONTROL_HEADER_SIZE (8*32)
731 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
732 struct lp_build_context
* bld
,
733 LLVMValueRef (*outputs
)[4],
734 LLVMValueRef emitted_vertices_vec
,
735 LLVMValueRef stream_id
)
737 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
739 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
740 const uint32_t headerSize
= VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
;
741 const uint32_t attribSize
= 4 * sizeof(float);
742 const uint32_t vertSize
= attribSize
* SWR_VTX_NUM_SLOTS
;
743 Value
*pVertexOffset
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(vertSize
));
745 Value
*vMask
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_mask
});
746 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, mVWidth
));
748 Value
*pStack
= STACKSAVE();
749 Value
*pTmpPtr
= ALLOCA(mFP32Ty
, C(4)); // used for dummy write for lane masking
751 for (uint32_t attrib
= 0; attrib
< iface
->num_outputs
; ++attrib
) {
752 uint32_t attribSlot
= attrib
;
753 uint32_t sgvChannel
= 0;
754 if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
755 attribSlot
= VERTEX_SGV_SLOT
;
756 sgvChannel
= VERTEX_SGV_POINT_SIZE_COMP
;
757 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_LAYER
) {
758 attribSlot
= VERTEX_SGV_SLOT
;
759 sgvChannel
= VERTEX_SGV_RTAI_COMP
;
760 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_VIEWPORT_INDEX
) {
761 attribSlot
= VERTEX_SGV_SLOT
;
762 sgvChannel
= VERTEX_SGV_VAI_COMP
;
763 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
764 attribSlot
= VERTEX_POSITION_SLOT
;
766 attribSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
767 if (iface
->info
->writes_position
) {
772 Value
*pOutputOffset
= ADD(pVertexOffset
, VIMMED1(headerSize
+ attribSize
* attribSlot
)); // + sgvChannel ?
774 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
775 Value
*pLaneOffset
= VEXTRACT(pOutputOffset
, C(lane
));
776 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
777 Value
*pStreamOffset
= GEP(pStream
, pLaneOffset
);
778 pStreamOffset
= BITCAST(pStreamOffset
, mFP32PtrTy
);
780 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
781 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
783 for (uint32_t channel
= 0; channel
< 4; ++channel
) {
786 if (attribSlot
== VERTEX_SGV_SLOT
)
787 vData
= LOAD(unwrap(outputs
[attrib
][0]));
789 vData
= LOAD(unwrap(outputs
[attrib
][channel
]));
791 if (attribSlot
!= VERTEX_SGV_SLOT
||
792 sgvChannel
== channel
) {
793 vData
= VEXTRACT(vData
, C(lane
));
794 STORE(vData
, pStreamOffset
);
796 pStreamOffset
= GEP(pStreamOffset
, C(1));
801 /* When the output type is not points, the geometry shader may not
802 * output data to multiple streams. So early exit here.
804 if(iface
->pGsState
->outputTopology
!= TOP_POINT_LIST
) {
805 STACKRESTORE(pStack
);
809 // Info about stream id for each vertex
810 // is coded in 2 bits (4 vert per byte "box"):
811 // ----------------- ----------------- ----
812 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
813 // ----------------- ----------------- ----
815 // Calculate where need to put stream id for current vert
817 Value
*pShiftControl
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(2));
819 // Calculate in which box put stream id for current vert.
820 Value
*pOffsetControl
= LSHR(unwrap(emitted_vertices_vec
), VIMMED1(2));
823 Value
*pStreamIdOffset
= ADD(pOffsetControl
, VIMMED1(VERTEX_COUNT_SIZE
));
825 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
826 Value
*pShift
= TRUNC(VEXTRACT(pShiftControl
, C(lane
)), mInt8Ty
);
827 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
829 Value
*pStreamOffset
= GEP(pStream
, VEXTRACT(pStreamIdOffset
, C(lane
)));
831 // Just make sure that not overflow max - stream id = (0,1,2,3)
832 Value
*vVal
= TRUNC(AND(VEXTRACT(unwrap(stream_id
), C(0)), C(0x3)), mInt8Ty
);
834 // Shift it to correct position in byte "box"
835 vVal
= SHL(vVal
, pShift
);
837 // Info about other vertices can be already stored
838 // so we need to read and add bits from current vert info.
839 Value
*storedValue
= LOAD(pStreamOffset
);
840 vVal
= OR(storedValue
, vVal
);
841 STORE(vVal
, pStreamOffset
);
844 STACKRESTORE(pStack
);
848 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
849 struct lp_build_context
* bld
,
850 LLVMValueRef total_emitted_vertices_vec
,
851 LLVMValueRef verts_per_prim_vec
,
852 LLVMValueRef emitted_prims_vec
,
853 LLVMValueRef mask_vec
)
855 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
857 /* When the output type is points, the geometry shader may output data
858 * to multiple streams, and end_primitive has no effect. Info about
859 * stream id for vertices is stored into the same place in memory where
860 * end primitive info is stored so early exit in this case.
862 if (iface
->pGsState
->outputTopology
== TOP_POINT_LIST
) {
866 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
868 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
869 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, 8));
871 uint32_t vertsPerPrim
= iface
->num_verts_per_prim
;
874 ADD(MUL(unwrap(emitted_prims_vec
), VIMMED1(vertsPerPrim
)),
875 unwrap(verts_per_prim_vec
));
877 vCount
= unwrap(total_emitted_vertices_vec
);
879 Value
*mask
= unwrap(mask_vec
);
880 Value
*cmpMask
= VMASK(ICMP_NE(unwrap(verts_per_prim_vec
), VIMMED1(0)));
881 mask
= AND(mask
, cmpMask
);
882 vMask1
= TRUNC(mask
, VectorType::get(mInt1Ty
, 8));
884 vCount
= SUB(vCount
, VIMMED1(1));
885 Value
*vOffset
= ADD(UDIV(vCount
, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE
));
886 Value
*vValue
= SHL(VIMMED1(1), UREM(vCount
, VIMMED1(8)));
888 vValue
= TRUNC(vValue
, VectorType::get(mInt8Ty
, 8));
890 Value
*pStack
= STACKSAVE();
891 Value
*pTmpPtr
= ALLOCA(mInt8Ty
, C(4)); // used for dummy read/write for lane masking
893 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
894 Value
*vLaneOffset
= VEXTRACT(vOffset
, C(lane
));
895 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
896 Value
*pStreamOffset
= GEP(pStream
, vLaneOffset
);
898 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
899 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
901 Value
*vVal
= LOAD(pStreamOffset
);
902 vVal
= OR(vVal
, VEXTRACT(vValue
, C(lane
)));
903 STORE(vVal
, pStreamOffset
);
906 STACKRESTORE(pStack
);
910 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
911 LLVMValueRef total_emitted_vertices_vec
,
912 LLVMValueRef emitted_prims_vec
)
914 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
916 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
918 // Store emit count to each output stream in the first DWORD
919 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
921 Value
* pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
922 pStream
= BITCAST(pStream
, mInt32PtrTy
);
923 Value
* pLaneCount
= VEXTRACT(unwrap(total_emitted_vertices_vec
), C(lane
));
924 STORE(pLaneCount
, pStream
);
929 BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
)
931 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
933 // Iterate for all the vertices in the output patch
934 lp_build_for_loop_begin(&iface
->loop_state
, gallivm
,
935 lp_build_const_int32(gallivm
, 0),
937 lp_build_const_int32(gallivm
, iface
->output_vertices
),
938 lp_build_const_int32(gallivm
, 1));
940 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
941 bld
->system_values
.invocation_id
= wrap(VBROADCAST(unwrap(iface
->loop_state
.counter
)));
943 if (verbose_shader
) {
944 lp_build_printf(gallivm
, "Prologue LOOP: Iteration %d BEGIN\n", iface
->loop_state
.counter
);
945 lp_build_print_value(gallivm
, "LOOP: InvocationId: \n", bld
->system_values
.invocation_id
);
950 BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
)
952 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
954 if (verbose_shader
) {
955 lp_build_printf(gallivm
, "Epilogue LOOP: Iteration %d END\n", iface
->loop_state
.counter
);
957 lp_build_for_loop_end(&iface
->loop_state
);
961 BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
962 struct lp_build_tgsi_context
* bld_base
,
963 boolean is_vindex_indirect
,
964 LLVMValueRef vertex_index
,
965 boolean is_aindex_indirect
,
966 LLVMValueRef attrib_index
,
967 LLVMValueRef swizzle_index
)
969 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
970 Value
*vert_index
= unwrap(vertex_index
);
971 Value
*attr_index
= unwrap(attrib_index
);
973 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
975 if (verbose_shader
) {
976 lp_build_print_value(gallivm
, "TCS: Vertex index: ", vertex_index
);
977 lp_build_print_value(gallivm
, "TCS: Attrib index: ", attrib_index
);
978 lp_build_print_value(gallivm
, "TCS: Swizzle index: ", swizzle_index
);
981 if (is_vindex_indirect
) {
982 vert_index
= VEXTRACT(vert_index
, C(0));
983 if (verbose_shader
) {
984 lp_build_print_value(gallivm
, "TCS: Extracted vertex index: ", vertex_index
);
988 if (is_aindex_indirect
) {
989 attr_index
= VEXTRACT(attr_index
, C(0));
990 if (verbose_shader
) {
991 lp_build_print_value(gallivm
, "TCS: Extracted attrib index: ", attrib_index
);
995 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
996 if (verbose_shader
) {
997 lp_build_print_value(gallivm
, "TCS: Attrib index loaded from map: ", wrap(attrib
));
1000 Value
*pBase
= GEP(iface
->pTcsCtx
,
1001 { C(0), C(SWR_HS_CONTEXT_vert
), vert_index
,
1002 C(simdvertex_attrib
), attrib
/*attr_index*/, unwrap(swizzle_index
) });
1004 LLVMValueRef res
= wrap(LOAD(pBase
));
1006 if (verbose_shader
) {
1007 lp_build_print_value(gallivm
, "TCS input fetched: ", res
);
1013 BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
1014 struct lp_build_tgsi_context
* bld_base
,
1015 boolean is_vindex_indirect
,
1016 LLVMValueRef vertex_index
,
1017 boolean is_aindex_indirect
,
1018 LLVMValueRef attrib_index
,
1019 LLVMValueRef swizzle_index
,
1022 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1024 Value
*vert_index
= unwrap(vertex_index
);
1025 Value
*attr_index
= unwrap(attrib_index
);
1027 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1029 if (verbose_shader
) {
1030 lp_build_print_value(gallivm
, "++TCSo: Vertex index: ", vertex_index
);
1031 lp_build_print_value(gallivm
, "++TCSo: Attrib index: ", wrap(attr_index
));
1032 lp_build_print_value(gallivm
, "++TCSo: Swizzle index: ", swizzle_index
);
1035 if (is_vindex_indirect
) {
1036 vert_index
= VEXTRACT(vert_index
, C(0));
1039 lp_build_print_value(gallivm
, "TCSo: Extracted vertex index: ", vertex_index
);
1043 if (is_aindex_indirect
) {
1044 attr_index
= VEXTRACT(attr_index
, C(0));
1045 if (verbose_shader
) {
1046 lp_build_print_value(gallivm
, "TCSo: Extracted attrib index: ", attrib_index
);
1050 Value
* res
= unwrap(bld_base
->base
.zero
);
1052 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1053 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1054 Value
* pCpOut
= GEP(p1
, {lane
});
1056 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1058 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1059 Value
* tessFactorArray
= nullptr;
1060 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1061 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1063 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1065 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1066 res
= VINSERT(res
, LOAD(tessFactor
), C(lane
));
1068 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1069 lp_build_print_value(gallivm
, "bbbbb TCS per-patch attr_index: ", wrap(attr_index
));
1070 Value
* attr
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attr_index
, unwrap(swizzle_index
)});
1071 res
= VINSERT(res
, LOAD(attr
), C(lane
));
1072 if (verbose_shader
) {
1073 lp_build_print_value(gallivm
, "++TCSo per-patch lane (patch-id): ", wrap(C(lane
)));
1074 lp_build_print_value(gallivm
, "++TCSo per-patch loaded value: ", wrap(res
));
1077 // Generic attribute
1079 LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_index
}));
1082 lp_build_print_value(gallivm
, "TCSo: Attrib index from map: ", wrap(attrib
));
1084 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
), vert_index
,
1085 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1087 res
= VINSERT(res
, LOAD(attr_chan
), C(lane
));
1091 if (verbose_shader
) {
1092 lp_build_print_value(gallivm
, "TCSo: output fetched: ", wrap(res
));
1098 BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
1099 struct lp_build_tgsi_context
*bld_base
,
1101 boolean is_vindex_indirect
,
1102 LLVMValueRef vertex_index
,
1103 boolean is_aindex_indirect
,
1104 LLVMValueRef attrib_index
,
1105 LLVMValueRef swizzle_index
,
1108 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1109 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1111 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1113 if (verbose_shader
) {
1114 lp_build_printf(gallivm
, "[TCS OUT] =============================================\n");
1117 if (verbose_shader
) {
1118 lp_build_print_value(gallivm
, "[TCS OUT] Store mask: ", bld
->exec_mask
.exec_mask
);
1119 lp_build_print_value(gallivm
, "[TCS OUT] Store value: ", value
);
1122 Value
*vert_index
= unwrap(vertex_index
);
1123 Value
*attr_index
= unwrap(attrib_index
);
1125 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1127 if (verbose_shader
) {
1128 lp_build_print_value(gallivm
, "[TCS OUT] Vertex index: ", vertex_index
);
1129 lp_build_print_value(gallivm
, "[TCS OUT] Attrib index: ", wrap(attr_index
));
1130 lp_build_print_value(gallivm
, "[TCS OUT] Swizzle index: ", swizzle_index
);
1133 if (is_vindex_indirect
) {
1134 vert_index
= VEXTRACT(vert_index
, C(0));
1137 lp_build_print_value(gallivm
, "[TCS OUT] Extracted vertex index: ", vertex_index
);
1141 if (is_aindex_indirect
) {
1142 attr_index
= VEXTRACT(attr_index
, C(0));
1143 if (verbose_shader
) {
1144 lp_build_print_value(gallivm
, "[TCS OUT] Extracted attrib index: ", wrap(attr_index
));
1148 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1149 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1150 Value
* pCpOut
= GEP(p1
, {lane
});
1152 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1153 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1154 Value
* tessFactorArray
= nullptr;
1155 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1156 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1158 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1160 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1161 Value
* valueToStore
= VEXTRACT(unwrap(value
), C(lane
));
1162 struct lp_exec_mask
*mask
= &bld
->exec_mask
;
1163 if (mask
->has_mask
) {
1164 Value
*originalVal
= LOAD(tessFactor
);
1165 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask
->exec_mask
), C(lane
)), mInt1Ty
);
1166 valueToStore
= SELECT(vMask
, valueToStore
, originalVal
);
1168 STORE(valueToStore
, tessFactor
);
1169 if (verbose_shader
) {
1170 lp_build_print_value(gallivm
, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore
));
1172 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1173 Value
* attrib
= LOAD(GEP(iface
->pPatchOutputAttribMap
, {C(0), attr_index
}));
1174 if (verbose_shader
) {
1175 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index
));
1176 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index
));
1177 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect
)));
1178 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect
)));
1179 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib
));
1181 Value
* attr
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attrib
});
1182 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1183 if (verbose_shader
) {
1184 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane
)));
1185 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] value to store: ", value
);
1186 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store
));
1187 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] chan_index: ", swizzle_index
);
1189 struct lp_exec_mask
*mask
= &bld
->exec_mask
;
1190 if (mask
->has_mask
) {
1191 Value
*originalVal
= LOADV(attr
, {C(0), unwrap(swizzle_index
)});
1192 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask
->exec_mask
), C(lane
)), mInt1Ty
);
1193 value_to_store
= SELECT(vMask
, BITCAST(value_to_store
, mFP32Ty
), originalVal
);
1194 if (verbose_shader
) {
1195 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] store mask: ", bld
->exec_mask
.exec_mask
);
1196 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal
));
1197 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vMask: ", wrap(vMask
));
1198 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store
));
1201 STOREV(value_to_store
, attr
, {C(0), unwrap(swizzle_index
)});
1202 if (verbose_shader
) {
1203 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store
));
1206 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1207 Value
* attrib
= LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_index
}));
1209 if (verbose_shader
) {
1210 lp_build_print_value(gallivm
, "[TCS OUT][VTX] invocation_id: ", bld
->system_values
.invocation_id
);
1211 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index
));
1212 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib
));
1213 lp_build_print_value(gallivm
, "[TCS OUT][VTX] chan_index: ", swizzle_index
);
1214 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value: ", value
);
1215 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store
));
1218 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
),
1219 VEXTRACT(unwrap(bld
->system_values
.invocation_id
), C(0)),
1220 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1222 // Mask output values if needed
1223 struct lp_exec_mask
*mask
= &bld
->exec_mask
;
1224 if (mask
->has_mask
) {
1225 Value
*originalVal
= LOAD(attr_chan
);
1226 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask
->exec_mask
), C(lane
)), mInt1Ty
);
1227 // convert input to float before trying to store
1228 value_to_store
= SELECT(vMask
, BITCAST(value_to_store
, mFP32Ty
), originalVal
);
1230 STORE(value_to_store
, attr_chan
);
1231 if (verbose_shader
) {
1232 lp_build_print_value(gallivm
, "[TCS OUT][VTX] stored: ", wrap(value_to_store
));
1241 BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
1242 struct lp_build_tgsi_context
*bld_base
)
1244 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1245 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1247 if (verbose_shader
) {
1248 lp_build_printf(gallivm
, "Barrier LOOP: Iteration %d END\n", iface
->loop_state
.counter
);
1251 // End previous loop
1252 lp_build_for_loop_end(&iface
->loop_state
);
1255 lp_build_for_loop_begin(&iface
->loop_state
, gallivm
,
1256 lp_build_const_int32(gallivm
, 0),
1258 lp_build_const_int32(gallivm
, iface
->output_vertices
),
1259 lp_build_const_int32(gallivm
, 1));
1262 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1264 bld
->system_values
.invocation_id
= wrap(VBROADCAST(unwrap(iface
->loop_state
.counter
)));
1266 if (verbose_shader
) {
1267 lp_build_printf(gallivm
, "Barrier LOOP: Iteration %d BEGIN\n", iface
->loop_state
.counter
);
1268 lp_build_print_value(gallivm
, "LOOP: InvocationId: \n", bld
->system_values
.invocation_id
);
1274 BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
1275 struct lp_build_tgsi_context
* bld_base
,
1276 boolean is_aindex_indirect
,
1277 LLVMValueRef attrib_index
,
1278 LLVMValueRef swizzle_index
)
1280 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1281 Value
*attr_index
= unwrap(attrib_index
);
1282 Value
*res
= unwrap(bld_base
->base
.zero
);
1284 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1286 if (verbose_shader
) {
1287 lp_build_printf(gallivm
, "[TES IN][PATCH] --------------------------------------\n");
1290 if (is_aindex_indirect
) {
1292 struct lp_type type
= bld_base
->base
.type
;
1294 for (i
= 0; i
< type
.length
; i
++) {
1295 Value
*attr_chan_index
= attr_index
;
1297 if (is_aindex_indirect
) {
1298 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1302 LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_chan_index
}));
1304 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1305 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1306 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1307 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1308 if (verbose_shader
) {
1309 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1310 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index
));
1311 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1312 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1313 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1315 res
= VINSERT(res
, Val
, C(i
));
1318 Value
*attrib
= LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_index
}));
1320 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1321 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1322 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1323 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1324 if (verbose_shader
) {
1325 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1326 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index
));
1327 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1328 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1329 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1331 res
= VBROADCAST(Val
);
1333 if (verbose_shader
) {
1334 lp_build_print_value(gallivm
, "[TES IN][PATCH] returning: ", wrap(res
));
1342 BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
1343 struct lp_build_tgsi_context
* bld_base
,
1344 boolean is_vindex_indirect
,
1345 LLVMValueRef vertex_index
,
1346 boolean is_aindex_indirect
,
1347 LLVMValueRef attrib_index
,
1348 LLVMValueRef swizzle_index
)
1350 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1351 Value
*vert_index
= unwrap(vertex_index
);
1352 Value
*attr_index
= unwrap(attrib_index
);
1354 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1356 if (verbose_shader
) {
1357 lp_build_printf(gallivm
, "[TES IN][VTX] --------------------------------------\n");
1360 Value
*res
= unwrap(bld_base
->base
.zero
);
1361 if (is_vindex_indirect
|| is_aindex_indirect
) {
1363 struct lp_type type
= bld_base
->base
.type
;
1365 for (i
= 0; i
< type
.length
; i
++) {
1366 Value
*vert_chan_index
= vert_index
;
1367 Value
*attr_chan_index
= attr_index
;
1369 if (is_vindex_indirect
) {
1370 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
1372 if (is_aindex_indirect
) {
1373 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1377 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
1379 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1380 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1381 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_chan_index
});
1382 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1383 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1384 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1385 if (verbose_shader
) {
1386 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1387 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1388 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1389 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1390 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1392 res
= VINSERT(res
, Val
, C(i
));
1395 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
1397 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1398 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1399 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_index
});
1400 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1401 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1402 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1403 if (verbose_shader
) {
1404 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1405 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1406 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1407 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1408 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1410 res
= VBROADCAST(Val
);
1412 if (verbose_shader
) {
1413 lp_build_print_value(gallivm
, "[TES IN][VTX] returning: ", wrap(res
));
1422 BuilderSWR::CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
1424 SWR_GS_STATE
*pGS
= &ctx
->gs
->gsState
;
1425 struct tgsi_shader_info
*info
= &ctx
->gs
->info
.base
;
1427 memset(pGS
, 0, sizeof(*pGS
));
1429 pGS
->gsEnable
= true;
1431 pGS
->numInputAttribs
= (VERTEX_ATTRIB_START_SLOT
- VERTEX_POSITION_SLOT
) + info
->num_inputs
;
1432 pGS
->outputTopology
=
1433 swr_convert_prim_topology(info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
], 0);
1435 /* It's +1 because emit_vertex in swr is always called exactly one time more
1436 * than max_vertices passed in Geometry Shader. We need to allocate more memory
1437 * to avoid crash/memory overwritten.
1439 pGS
->maxNumVerts
= info
->properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
] + 1;
1440 pGS
->instanceCount
= info
->properties
[TGSI_PROPERTY_GS_INVOCATIONS
];
1442 // If point primitive then assume to use multiple streams
1443 if(pGS
->outputTopology
== TOP_POINT_LIST
) {
1444 pGS
->isSingleStream
= false;
1446 pGS
->isSingleStream
= true;
1447 pGS
->singleStreamID
= 0;
1450 pGS
->vertexAttribOffset
= VERTEX_POSITION_SLOT
;
1451 pGS
->inputVertStride
= pGS
->numInputAttribs
+ pGS
->vertexAttribOffset
;
1452 pGS
->outputVertexSize
= SWR_VTX_NUM_SLOTS
;
1453 pGS
->controlDataSize
= 8; // GS ouputs max of 8 32B units
1454 pGS
->controlDataOffset
= VERTEX_COUNT_SIZE
;
1455 pGS
->outputVertexOffset
= pGS
->controlDataOffset
+ CONTROL_HEADER_SIZE
;
1457 pGS
->allocationSize
=
1458 VERTEX_COUNT_SIZE
+ // vertex count
1459 CONTROL_HEADER_SIZE
+ // control header
1460 (SWR_VTX_NUM_SLOTS
* 16) * // sizeof vertex
1461 pGS
->maxNumVerts
; // num verts
1463 struct swr_geometry_shader
*gs
= ctx
->gs
;
1465 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1466 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1468 memset(outputs
, 0, sizeof(outputs
));
1470 AttrBuilder attrBuilder
;
1471 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1473 std::vector
<Type
*> gsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1474 PointerType::get(mInt8Ty
, 0),
1475 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
1476 FunctionType
*vsFuncType
=
1477 FunctionType::get(Type::getVoidTy(JM()->mContext
), gsArgs
, false);
1479 // create new vertex shader function
1480 auto pFunction
= Function::Create(vsFuncType
,
1481 GlobalValue::ExternalLinkage
,
1483 JM()->mpCurrentModule
);
1484 #if LLVM_VERSION_MAJOR < 5
1485 AttributeSet attrSet
= AttributeSet::get(
1486 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1487 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1489 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1492 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1493 IRB()->SetInsertPoint(block
);
1494 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1496 auto argitr
= pFunction
->arg_begin();
1497 Value
*hPrivateData
= &*argitr
++;
1498 hPrivateData
->setName("hPrivateData");
1499 Value
*pWorkerData
= &*argitr
++;
1500 pWorkerData
->setName("pWorkerData");
1501 Value
*pGsCtx
= &*argitr
++;
1502 pGsCtx
->setName("gsCtx");
1505 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantGS
)});
1506 consts_ptr
->setName("gs_constants");
1507 Value
*const_sizes_ptr
=
1508 GEP(hPrivateData
, {0, swr_draw_context_num_constantsGS
});
1509 const_sizes_ptr
->setName("num_gs_constants");
1511 struct lp_build_sampler_soa
*sampler
=
1512 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_GEOMETRY
);
1513 assert(sampler
!= nullptr);
1515 struct lp_bld_tgsi_system_values system_values
;
1516 memset(&system_values
, 0, sizeof(system_values
));
1517 system_values
.prim_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_PrimitiveID
}));
1518 system_values
.invocation_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_InstanceID
}));
1520 std::vector
<Constant
*> mapConstants
;
1521 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1522 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1523 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1524 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1526 unsigned vs_slot
= locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
1527 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1529 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
1531 if (ctx
->vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
1534 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
1535 vs_slot
= VERTEX_POSITION_SLOT
;
1537 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
1538 mapConstants
.push_back(C(vs_slot
));
1541 struct lp_build_mask_context mask
;
1542 Value
*mask_val
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_mask
}, "gsMask");
1543 lp_build_mask_begin(&mask
, gallivm
,
1544 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1546 // zero out cut buffer so we can load/modify/store bits
1547 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
1549 Value
* pStream
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
1550 #if LLVM_VERSION_MAJOR >= 10
1551 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH
));
1553 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, sizeof(float) * KNOB_SIMD_WIDTH
);
1557 struct swr_gs_llvm_iface gs_iface
;
1558 gs_iface
.base
.fetch_input
= ::swr_gs_llvm_fetch_input
;
1559 gs_iface
.base
.emit_vertex
= ::swr_gs_llvm_emit_vertex
;
1560 gs_iface
.base
.end_primitive
= ::swr_gs_llvm_end_primitive
;
1561 gs_iface
.base
.gs_epilogue
= ::swr_gs_llvm_epilogue
;
1562 gs_iface
.pBuilder
= this;
1563 gs_iface
.pGsCtx
= pGsCtx
;
1564 gs_iface
.pGsState
= pGS
;
1565 gs_iface
.num_outputs
= gs
->info
.base
.num_outputs
;
1566 gs_iface
.num_verts_per_prim
=
1567 u_vertices_per_prim((pipe_prim_type
)info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
1568 gs_iface
.info
= info
;
1569 gs_iface
.pVtxAttribMap
= vtxAttribMap
;
1571 struct lp_build_tgsi_params params
;
1572 memset(¶ms
, 0, sizeof(params
));
1573 params
.type
= lp_type_float_vec(32, 32 * 8);
1574 params
.mask
= & mask
;
1575 params
.consts_ptr
= wrap(consts_ptr
);
1576 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1577 params
.system_values
= &system_values
;
1578 params
.inputs
= inputs
;
1579 params
.context_ptr
= wrap(hPrivateData
);
1580 params
.sampler
= sampler
;
1581 params
.info
= &gs
->info
.base
;
1582 params
.gs_iface
= &gs_iface
.base
;
1584 lp_build_tgsi_soa(gallivm
,
1589 lp_build_mask_end(&mask
);
1591 sampler
->destroy(sampler
);
1593 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1597 gallivm_verify_function(gallivm
, wrap(pFunction
));
1598 gallivm_compile_module(gallivm
);
1601 (PFN_GS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1603 debug_printf("geom shader %p\n", pFunc
);
1604 assert(pFunc
&& "Error: GeomShader = NULL");
1606 JM()->mIsModuleFinalized
= true;
1612 BuilderSWR::CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
1614 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
1615 struct tgsi_shader_info
*info
= &ctx
->tes
->info
.base
;
1617 // tessellation is enabled if TES is present
1618 // clear tessellation state here then
1619 memset(pTS
, 0, sizeof(*pTS
));
1621 pTS
->tsEnable
= true;
1623 unsigned tes_prim_mode
= info
->properties
[TGSI_PROPERTY_TES_PRIM_MODE
];
1624 unsigned tes_spacing
= info
->properties
[TGSI_PROPERTY_TES_SPACING
];
1625 bool tes_vertex_order_cw
= info
->properties
[TGSI_PROPERTY_TES_VERTEX_ORDER_CW
];
1626 bool tes_point_mode
= info
->properties
[TGSI_PROPERTY_TES_POINT_MODE
];
1627 SWR_TS_DOMAIN type
= SWR_TS_ISOLINE
;
1628 SWR_TS_PARTITIONING partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1629 SWR_TS_OUTPUT_TOPOLOGY topology
= SWR_TS_OUTPUT_POINT
;
1630 PRIMITIVE_TOPOLOGY postDSTopology
= TOP_POINT_LIST
;
1632 // TESS_TODO: move this to helper functions to improve readability
1633 switch (tes_prim_mode
) {
1634 case PIPE_PRIM_LINES
:
1635 type
= SWR_TS_ISOLINE
;
1636 postDSTopology
= TOP_LINE_LIST
;
1638 case PIPE_PRIM_TRIANGLES
:
1640 postDSTopology
= TOP_TRIANGLE_LIST
;
1642 case PIPE_PRIM_QUADS
:
1644 // See OpenGL spec - quads are tessellated into triangles
1645 postDSTopology
= TOP_TRIANGLE_LIST
;
1651 switch (tes_spacing
) {
1652 case PIPE_TESS_SPACING_FRACTIONAL_ODD
:
1653 partitioning
= SWR_TS_ODD_FRACTIONAL
;
1655 case PIPE_TESS_SPACING_FRACTIONAL_EVEN
:
1656 partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1658 case PIPE_TESS_SPACING_EQUAL
:
1659 partitioning
= SWR_TS_INTEGER
;
1665 if (tes_point_mode
) {
1666 topology
= SWR_TS_OUTPUT_POINT
;
1667 postDSTopology
= TOP_POINT_LIST
;
1669 else if (tes_prim_mode
== PIPE_PRIM_LINES
) {
1670 topology
= SWR_TS_OUTPUT_LINE
;
1672 else if (tes_vertex_order_cw
) {
1673 topology
= SWR_TS_OUTPUT_TRI_CW
;
1676 topology
= SWR_TS_OUTPUT_TRI_CCW
;
1680 pTS
->tsOutputTopology
= topology
;
1681 pTS
->partitioning
= partitioning
;
1682 pTS
->numDsOutputAttribs
= info
->num_outputs
;
1683 pTS
->postDSTopology
= postDSTopology
;
1685 pTS
->dsAllocationSize
= SWR_VTX_NUM_SLOTS
* MAX_NUM_VERTS_PER_PRIM
;
1686 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1687 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1688 pTS
->dsOutVtxAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1690 struct swr_tess_evaluation_shader
*tes
= ctx
->tes
;
1692 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1693 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1695 memset(outputs
, 0, sizeof(outputs
));
1697 AttrBuilder attrBuilder
;
1698 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1700 std::vector
<Type
*> tesArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1701 PointerType::get(mInt8Ty
, 0),
1702 PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
1703 FunctionType
*tesFuncType
=
1704 FunctionType::get(Type::getVoidTy(JM()->mContext
), tesArgs
, false);
1706 // create new vertex shader function
1707 auto pFunction
= Function::Create(tesFuncType
,
1708 GlobalValue::ExternalLinkage
,
1710 JM()->mpCurrentModule
);
1712 #if LLVM_VERSION_MAJOR < 5
1713 AttributeSet attrSet
= AttributeSet::get(
1714 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1715 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1717 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1720 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1721 IRB()->SetInsertPoint(block
);
1722 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1724 auto argitr
= pFunction
->arg_begin();
1725 Value
*hPrivateData
= &*argitr
++;
1726 hPrivateData
->setName("hPrivateData");
1727 Value
*pWorkerData
= &*argitr
++;
1728 pWorkerData
->setName("pWorkerData");
1729 Value
*pTesCtx
= &*argitr
++;
1730 pTesCtx
->setName("tesCtx");
1733 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTES
)});
1734 consts_ptr
->setName("tes_constants");
1735 Value
*const_sizes_ptr
=
1736 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTES
});
1737 const_sizes_ptr
->setName("num_tes_constants");
1739 struct lp_build_sampler_soa
*sampler
=
1740 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_EVAL
);
1741 assert(sampler
!= nullptr);
1743 struct lp_bld_tgsi_system_values system_values
;
1744 memset(&system_values
, 0, sizeof(system_values
));
1746 // Load and calculate system values
1747 // Tessellation coordinates (gl_TessCoord)
1748 Value
*vecOffset
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
}, "vecOffset");
1749 Value
*vecStride
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorStride
}, "vecStride");
1750 Value
*vecIndex
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
});
1752 Value
* tess_coord
= ALLOCA(ArrayType::get(mSimdFP32Ty
, 3));
1754 Value
*tessCoordU
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainU
}), {vecIndex
}, "tessCoordU");
1755 STORE(tessCoordU
, tess_coord
, {0, 0});
1756 Value
*tessCoordV
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainV
}), {vecIndex
}, "tessCoordV");
1757 STORE(tessCoordV
, tess_coord
, {0, 1});
1758 Value
*tessCoordW
= FSUB(FSUB(VIMMED1(1.0f
), tessCoordU
), tessCoordV
, "tessCoordW");
1759 STORE(tessCoordW
, tess_coord
, {0, 2});
1760 system_values
.tess_coord
= wrap(tess_coord
);
1763 system_values
.prim_id
= wrap(VBROADCAST(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_PrimitiveID
}), "PrimitiveID"));
1765 // Tessellation factors
1766 Value
* pPatch
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
});
1767 Value
* pTessFactors
= GEP(pPatch
, {C(0), C(ScalarPatch_tessFactors
)});
1769 assert(SWR_NUM_OUTER_TESS_FACTORS
== 4);
1770 Value
* sys_value_outer_factors
= UndefValue::get(VectorType::get(mFP32Ty
, 4));
1771 for (unsigned i
= 0; i
< SWR_NUM_OUTER_TESS_FACTORS
; i
++) {
1772 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors
, i
});
1773 sys_value_outer_factors
= VINSERT(sys_value_outer_factors
, v
, i
, "gl_TessLevelOuter");
1775 system_values
.tess_outer
= wrap(sys_value_outer_factors
);
1777 assert(SWR_NUM_INNER_TESS_FACTORS
== 2);
1778 Value
* sys_value_inner_factors
= UndefValue::get(VectorType::get(mFP32Ty
, 4));
1779 for (unsigned i
= 0; i
< SWR_NUM_INNER_TESS_FACTORS
; i
++) {
1780 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors
, i
});
1781 sys_value_inner_factors
= VINSERT(sys_value_inner_factors
, v
, i
, "gl_TessLevelInner");
1783 system_values
.tess_inner
= wrap(sys_value_inner_factors
);
1787 lp_build_print_value(gallivm
, "tess_coord = ", system_values
.tess_coord
);
1790 struct tgsi_shader_info
*pPrevShader
= nullptr;
1793 pPrevShader
= &ctx
->tcs
->info
.base
;
1796 pPrevShader
= &ctx
->vs
->info
.base
;
1799 // Figure out how many per-patch attributes we have
1800 unsigned perPatchAttrs
= 0;
1801 unsigned genericAttrs
= 0;
1802 unsigned tessLevelAttrs
= 0;
1803 unsigned sgvAttrs
= 0;
1804 for (unsigned slot
= 0; slot
< pPrevShader
->num_outputs
; slot
++) {
1805 switch (pPrevShader
->output_semantic_name
[slot
]) {
1806 case TGSI_SEMANTIC_PATCH
:
1809 case TGSI_SEMANTIC_GENERIC
:
1812 case TGSI_SEMANTIC_TESSINNER
:
1813 case TGSI_SEMANTIC_TESSOUTER
:
1816 case TGSI_SEMANTIC_POSITION
:
1817 case TGSI_SEMANTIC_CLIPDIST
:
1818 case TGSI_SEMANTIC_PSIZE
:
1822 assert(!"Unknown semantic input in TES");
1826 std::vector
<Constant
*> mapConstants
;
1827 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1828 Value
*patchAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1829 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1830 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1831 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1833 // Where in TCS output is my attribute?
1834 // TESS_TODO: revisit after implement pass-through TCS
1835 unsigned tcs_slot
= locate_linkage(semantic_name
, semantic_idx
, pPrevShader
);
1836 assert(tcs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1838 // Skip tessellation levels - these go to the tessellator, not TES
1839 switch (semantic_name
) {
1840 case TGSI_SEMANTIC_GENERIC
:
1841 tcs_slot
= tcs_slot
+ VERTEX_ATTRIB_START_SLOT
- sgvAttrs
- tessLevelAttrs
;
1843 case TGSI_SEMANTIC_PATCH
:
1844 tcs_slot
= semantic_idx
;
1846 case TGSI_SEMANTIC_POSITION
:
1847 tcs_slot
= VERTEX_POSITION_SLOT
;
1849 case TGSI_SEMANTIC_CLIPDIST
:
1850 case TGSI_SEMANTIC_PSIZE
:
1853 assert(!"Unexpected semantic found while builiding TES input map");
1855 if (semantic_name
== TGSI_SEMANTIC_PATCH
) {
1856 STORE(C(tcs_slot
), patchAttribMap
, {0, slot
});
1858 STORE(C(tcs_slot
), vtxAttribMap
, {0, slot
});
1860 mapConstants
.push_back(C(tcs_slot
));
1863 // Build execution mask
1864 struct lp_build_mask_context mask
;
1865 Value
*mask_val
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_mask
}, "tesMask");
1868 lp_build_print_value(gallivm
, "TES execution mask: ", wrap(mask_val
));
1870 lp_build_mask_begin(&mask
, gallivm
,
1871 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1873 struct swr_tes_llvm_iface tes_iface
;
1875 tes_iface
.base
.fetch_vertex_input
= ::swr_tes_llvm_fetch_vtx_input
;
1876 tes_iface
.base
.fetch_patch_input
= ::swr_tes_llvm_fetch_patch_input
;
1878 tes_iface
.pBuilder
= this;
1879 tes_iface
.pTesCtx
= pTesCtx
;
1880 tes_iface
.pTsState
= pTS
;
1881 tes_iface
.num_outputs
= tes
->info
.base
.num_outputs
;
1882 tes_iface
.info
= info
;
1883 tes_iface
.pVtxAttribMap
= vtxAttribMap
;
1884 tes_iface
.pPatchAttribMap
= patchAttribMap
;
1886 struct lp_build_tgsi_params params
;
1887 memset(¶ms
, 0, sizeof(params
));
1888 params
.type
= lp_type_float_vec(32, 32 * 8);
1889 params
.mask
= & mask
;
1890 params
.consts_ptr
= wrap(consts_ptr
);
1891 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1892 params
.system_values
= &system_values
;
1893 params
.inputs
= inputs
;
1894 params
.context_ptr
= wrap(hPrivateData
);
1895 params
.sampler
= sampler
;
1896 params
.info
= &tes
->info
.base
;
1897 params
.tes_iface
= &tes_iface
.base
;
1900 lp_build_tgsi_soa(gallivm
,
1905 lp_build_mask_end(&mask
);
1907 sampler
->destroy(sampler
);
1909 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1911 // Write output attributes
1912 Value
*dclOut
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pOutputData
}, "dclOut");
1914 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
1915 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1916 if (!outputs
[attrib
][channel
])
1919 Value
*val
= LOAD(unwrap(outputs
[attrib
][channel
]));;
1920 Value
*attribOffset
=
1921 LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_outVertexAttribOffset
});
1923 // Assume we write possition
1924 Value
* outputSlot
= C(VERTEX_POSITION_SLOT
);
1925 if (tes
->info
.base
.output_semantic_name
[attrib
] != TGSI_SEMANTIC_POSITION
) {
1926 // No, it's a generic attribute, not a position - let's calculate output slot
1927 uint32_t outSlot
= attrib
;
1928 if (tes
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
) {
1929 // this shader will write position, so in shader's term
1930 // output starts at attrib 1, but we will handle that separately,
1931 // so let's fix the outSlot
1934 outputSlot
= ADD(attribOffset
, C(outSlot
));
1937 Value
*attribVecIndex
=
1938 ADD(MUL(vecStride
, MUL(outputSlot
, C(4))), vecOffset
);
1940 uint32_t outputComponent
= 0;
1941 uint32_t curComp
= outputComponent
+ channel
;
1942 auto outValIndex
= ADD(attribVecIndex
, MUL(vecStride
, C(curComp
)));
1943 STOREV(val
, dclOut
, {outValIndex
});
1945 if (verbose_shader
) {
1946 lp_build_printf(gallivm
,
1947 "TES output [%d][%d]",
1950 lp_build_print_value(gallivm
, " = ", wrap(val
));
1957 JM()->DumpToFile(pFunction
, "src");
1958 gallivm_verify_function(gallivm
, wrap(pFunction
));
1960 gallivm_compile_module(gallivm
);
1961 JM()->DumpToFile(pFunction
, "optimized");
1963 PFN_TES_FUNC pFunc
=
1964 (PFN_TES_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1966 debug_printf("tess evaluation shader %p\n", pFunc
);
1967 assert(pFunc
&& "Error: TessEvaluationShader = NULL");
1969 JM()->DumpAsm(pFunction
, "asm");
1971 JM()->mIsModuleFinalized
= true;
1977 BuilderSWR::CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
1979 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
1980 struct tgsi_shader_info
*info
= &ctx
->tcs
->info
.base
;
1982 pTS
->numHsInputAttribs
= info
->num_inputs
;
1983 pTS
->numHsOutputAttribs
= info
->num_outputs
;
1985 pTS
->hsAllocationSize
= sizeof(ScalarPatch
);
1987 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1988 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1990 struct swr_tess_control_shader
*tcs
= ctx
->tcs
;
1992 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1993 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1995 memset(outputs
, 0, sizeof(outputs
));
1997 AttrBuilder attrBuilder
;
1998 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2000 std::vector
<Type
*> tcsArgs
{
2001 PointerType::get(Gen_swr_draw_context(JM()), 0),
2002 PointerType::get(mInt8Ty
, 0),
2003 PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
2004 FunctionType
*tcsFuncType
=
2005 FunctionType::get(Type::getVoidTy(JM()->mContext
), tcsArgs
, false);
2007 // create new vertex shader function
2008 auto pFunction
= Function::Create(tcsFuncType
,
2009 GlobalValue::ExternalLinkage
,
2011 JM()->mpCurrentModule
);
2013 #if LLVM_VERSION_MAJOR < 5
2014 AttributeSet attrSet
= AttributeSet::get(
2015 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2016 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2018 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2021 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2022 IRB()->SetInsertPoint(block
);
2023 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2025 auto argitr
= pFunction
->arg_begin();
2026 Value
*hPrivateData
= &*argitr
++;
2027 hPrivateData
->setName("hPrivateData");
2028 Value
*pWorkerData
= &*argitr
++;
2029 pWorkerData
->setName("pWorkerData");
2030 Value
*pTcsCtx
= &*argitr
++;
2031 pTcsCtx
->setName("tcsCtx");
2034 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTCS
)});
2035 consts_ptr
->setName("tcs_constants");
2036 Value
*const_sizes_ptr
=
2037 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTCS
});
2038 const_sizes_ptr
->setName("num_tcs_constants");
2040 struct lp_build_sampler_soa
*sampler
=
2041 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_CTRL
);
2042 assert(sampler
!= nullptr);
2044 struct lp_bld_tgsi_system_values system_values
;
2045 memset(&system_values
, 0, sizeof(system_values
));
2047 system_values
.prim_id
=
2048 wrap(LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_PrimitiveID
}));
2050 Constant
*vInvocationId
;
2052 vInvocationId
= C({0, 1, 2, 3, 4, 5, 6, 7});
2055 C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
2058 system_values
.invocation_id
= wrap(vInvocationId
);
2059 system_values
.vertices_in
= wrap(C(tcs
->vertices_per_patch
));
2061 if (verbose_shader
) {
2062 lp_build_print_value(gallivm
, "TCS::prim_id = ", system_values
.prim_id
);
2063 lp_build_print_value(gallivm
, "TCS::invocation_id = ", system_values
.invocation_id
);
2064 lp_build_print_value(gallivm
, "TCS::vertices_in = ", system_values
.vertices_in
);
2067 std::vector
<Constant
*> mapConstants
;
2068 Value
*vtxAttribMap
=
2069 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2071 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
2072 ubyte semantic_name
= info
->input_semantic_name
[slot
];
2073 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
2076 locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
2077 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
2079 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
2081 if (ctx
->vs
->info
.base
.output_semantic_name
[0]
2082 == TGSI_SEMANTIC_POSITION
)
2085 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
2086 vs_slot
= VERTEX_POSITION_SLOT
;
2088 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
2089 mapConstants
.push_back(C(vs_slot
));
2092 // Prepare map of output attributes. Needed when shader instance wants
2093 // to read own output or output of other instance, which is allowed in TCS
2094 Value
*vtxOutputAttribMap
=
2095 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2096 // Map for per-patch attributes
2097 Value
*patchOutputAttribMap
=
2098 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2099 for (unsigned slot
= 0; slot
< info
->num_outputs
; slot
++) {
2100 ubyte name
= info
->output_semantic_name
[slot
];
2101 int32_t idx
= info
->output_semantic_index
[slot
];
2102 if (name
== TGSI_SEMANTIC_PATCH
) {
2103 STORE(C(idx
), patchOutputAttribMap
, {0, slot
});
2105 int32_t target_slot
= slot
;
2106 if (name
== TGSI_SEMANTIC_GENERIC
) {
2107 target_slot
+= VERTEX_ATTRIB_START_SLOT
;
2109 // Now normalize target slot
2110 for (ubyte as
= 0; as
< slot
; as
++) {
2111 ubyte name
= info
->output_semantic_name
[as
];
2113 case TGSI_SEMANTIC_TESSOUTER
:
2114 case TGSI_SEMANTIC_TESSINNER
:
2115 case TGSI_SEMANTIC_PATCH
:
2116 case TGSI_SEMANTIC_POSITION
:
2120 if (name
== TGSI_SEMANTIC_POSITION
) {
2121 target_slot
= VERTEX_POSITION_SLOT
;
2123 STORE(C(target_slot
), vtxOutputAttribMap
, {0, slot
});
2124 mapConstants
.push_back(C(target_slot
));
2128 struct lp_build_mask_context mask
;
2129 Value
*mask_val
= LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_mask
}, "tcsMask");
2130 lp_build_mask_begin(
2131 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
2133 struct swr_tcs_llvm_iface tcs_iface
;
2135 tcs_iface
.base
.emit_store_output
= ::swr_tcs_llvm_store_output
;
2136 tcs_iface
.base
.emit_fetch_input
= ::swr_tcs_llvm_fetch_input
;
2137 tcs_iface
.base
.emit_fetch_output
= ::swr_tcs_llvm_fetch_output
;
2138 tcs_iface
.base
.emit_barrier
= ::swr_tcs_llvm_emit_barrier
;
2139 tcs_iface
.base
.emit_prologue
= ::swr_tcs_llvm_emit_prologue
;
2140 tcs_iface
.base
.emit_epilogue
= ::swr_tcs_llvm_emit_epilogue
;
2142 tcs_iface
.pBuilder
= this;
2143 tcs_iface
.pTcsCtx
= pTcsCtx
;
2144 tcs_iface
.pTsState
= pTS
;
2145 tcs_iface
.output_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
2146 tcs_iface
.info
= info
;
2147 tcs_iface
.pVtxAttribMap
= vtxAttribMap
;
2148 tcs_iface
.pVtxOutputAttribMap
= vtxOutputAttribMap
;
2149 tcs_iface
.pPatchOutputAttribMap
= patchOutputAttribMap
;
2151 struct lp_build_tgsi_params params
;
2152 memset(¶ms
, 0, sizeof(params
));
2153 params
.type
= lp_type_float_vec(32, 32 * 8);
2154 params
.mask
= &mask
;
2155 params
.consts_ptr
= wrap(consts_ptr
);
2156 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2157 params
.system_values
= &system_values
;
2158 params
.inputs
= inputs
;
2159 params
.context_ptr
= wrap(hPrivateData
);
2160 params
.sampler
= sampler
;
2161 params
.info
= &tcs
->info
.base
;
2162 params
.tcs_iface
= &tcs_iface
.base
;
2164 lp_build_tgsi_soa(gallivm
, tcs
->pipe
.tokens
, ¶ms
, outputs
);
2166 lp_build_mask_end(&mask
);
2168 sampler
->destroy(sampler
);
2170 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2173 JM()->DumpToFile(pFunction
, "src");
2174 gallivm_verify_function(gallivm
, wrap(pFunction
));
2175 gallivm_compile_module(gallivm
);
2176 JM()->DumpToFile(pFunction
, "optimized");
2178 PFN_TCS_FUNC pFunc
=
2179 (PFN_TCS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2181 debug_printf("tess control shader %p\n", pFunc
);
2182 assert(pFunc
&& "Error: TessControlShader = NULL");
2183 JM()->DumpAsm(pFunction
, "asm");
2185 JM()->mIsModuleFinalized
= true;
2192 swr_compile_gs(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
2195 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2197 PFN_GS_FUNC func
= builder
.CompileGS(ctx
, key
);
2199 ctx
->gs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantGS
>(new VariantGS(builder
.gallivm
, func
))));
2204 swr_compile_tcs(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
2207 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2209 PFN_TCS_FUNC func
= builder
.CompileTCS(ctx
, key
);
2211 ctx
->tcs
->map
.insert(
2212 std::make_pair(key
, std::unique_ptr
<VariantTCS
>(new VariantTCS(builder
.gallivm
, func
))));
2218 swr_compile_tes(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
2221 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2223 PFN_TES_FUNC func
= builder
.CompileTES(ctx
, key
);
2225 ctx
->tes
->map
.insert(
2226 std::make_pair(key
, std::unique_ptr
<VariantTES
>(new VariantTES(builder
.gallivm
, func
))));
2232 BuilderSWR::WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
, unsigned slot
, unsigned channel
)
2234 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
2235 // interleave the simdvertex components into the dest simd16vertex
2236 // slot16offset = slot8offset * 2
2237 // comp16offset = comp8offset * 2 + alternateOffset
2239 Value
*offset
= LOAD(pVsContext
, { 0, SWR_VS_CONTEXT_AlternateOffset
});
2240 Value
*pOut
= GEP(pVtxOutput
, { C(0), C(0), C(slot
* 2), offset
} );
2241 STORE(pVal
, pOut
, {channel
* 2});
2243 Value
*pOut
= GEP(pVtxOutput
, {0, 0, slot
});
2244 STORE(pVal
, pOut
, {0, channel
});
2245 if (verbose_shader
) {
2246 lp_build_printf(gallivm
, "VS: Storing on slot %d, channel %d: ", C(slot
), C(channel
));
2247 lp_build_print_value(gallivm
, "", wrap(pVal
));
2253 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2255 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
2257 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2258 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2260 memset(outputs
, 0, sizeof(outputs
));
2262 AttrBuilder attrBuilder
;
2263 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2265 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2266 PointerType::get(mInt8Ty
, 0),
2267 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
2268 FunctionType
*vsFuncType
=
2269 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
2271 // create new vertex shader function
2272 auto pFunction
= Function::Create(vsFuncType
,
2273 GlobalValue::ExternalLinkage
,
2275 JM()->mpCurrentModule
);
2276 #if LLVM_VERSION_MAJOR < 5
2277 AttributeSet attrSet
= AttributeSet::get(
2278 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2279 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2281 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2284 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2285 IRB()->SetInsertPoint(block
);
2286 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2288 auto argitr
= pFunction
->arg_begin();
2289 Value
*hPrivateData
= &*argitr
++;
2290 hPrivateData
->setName("hPrivateData");
2291 Value
*pWorkerData
= &*argitr
++;
2292 pWorkerData
->setName("pWorkerData");
2293 Value
*pVsCtx
= &*argitr
++;
2294 pVsCtx
->setName("vsCtx");
2296 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
2298 consts_ptr
->setName("vs_constants");
2299 Value
*const_sizes_ptr
=
2300 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
2301 const_sizes_ptr
->setName("num_vs_constants");
2303 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
2305 vtxInput
= BITCAST(vtxInput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2308 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2309 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
2310 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2311 if (mask
& (1 << channel
)) {
2312 inputs
[attrib
][channel
] =
2313 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
2318 struct lp_build_sampler_soa
*sampler
=
2319 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
2320 assert(sampler
!= nullptr);
2322 struct lp_bld_tgsi_system_values system_values
;
2323 memset(&system_values
, 0, sizeof(system_values
));
2324 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
2327 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID16
}));
2329 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
2333 uint32_t vectorWidth
= mVWidth16
;
2335 uint32_t vectorWidth
= mVWidth
;
2338 struct lp_build_tgsi_params params
;
2339 memset(¶ms
, 0, sizeof(params
));
2340 params
.type
= lp_type_float_vec(32, 32 * vectorWidth
);
2341 params
.consts_ptr
= wrap(consts_ptr
);
2342 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2343 params
.system_values
= &system_values
;
2344 params
.inputs
= inputs
;
2345 params
.context_ptr
= wrap(hPrivateData
);
2346 params
.sampler
= sampler
;
2347 params
.info
= &swr_vs
->info
.base
;
2349 lp_build_tgsi_soa(gallivm
,
2350 swr_vs
->pipe
.tokens
,
2354 sampler
->destroy(sampler
);
2356 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2358 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
2360 vtxOutput
= BITCAST(vtxOutput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2363 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2364 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
2365 if (!outputs
[attrib
][channel
])
2371 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
2372 if (channel
!= VERTEX_SGV_POINT_SIZE_COMP
)
2374 val
= LOAD(unwrap(outputs
[attrib
][0]));
2375 outSlot
= VERTEX_SGV_SLOT
;
2376 } else if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
2377 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2378 outSlot
= VERTEX_POSITION_SLOT
;
2380 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2381 outSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
2382 if (swr_vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
2386 WriteVS(val
, pVsCtx
, vtxOutput
, outSlot
, channel
);
2390 if (ctx
->rasterizer
->clip_plane_enable
||
2391 swr_vs
->info
.base
.culldist_writemask
) {
2392 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
2395 if (swr_vs
->info
.base
.writes_clipvertex
) {
2396 cv
= locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
2397 &swr_vs
->info
.base
);
2399 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2400 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
2401 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
2407 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2408 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
2409 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
2410 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
2411 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
2413 tgsi_shader_info
*pLastFE
= &ctx
->vs
->info
.base
;
2416 pLastFE
= &ctx
->gs
->info
.base
;
2418 else if (ctx
->tes
) {
2419 pLastFE
= &ctx
->tes
->info
.base
;
2421 else if (ctx
->tcs
) {
2422 pLastFE
= &ctx
->tcs
->info
.base
;
2425 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
2426 // clip distance overrides user clip planes
2427 if ((pLastFE
->clipdist_writemask
& clip_mask
& (1 << val
)) ||
2428 ((pLastFE
->culldist_writemask
<< pLastFE
->num_written_clipdistance
) & (1 << val
))) {
2429 unsigned cv
= locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1, pLastFE
);
2430 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2432 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
2433 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2435 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
2436 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2441 if (!(clip_mask
& (1 << val
)))
2444 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
2445 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
2446 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
2447 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
2449 Value
*bpx
= VBROADCAST_16(px
);
2450 Value
*bpy
= VBROADCAST_16(py
);
2451 Value
*bpz
= VBROADCAST_16(pz
);
2452 Value
*bpw
= VBROADCAST_16(pw
);
2454 Value
*bpx
= VBROADCAST(px
);
2455 Value
*bpy
= VBROADCAST(py
);
2456 Value
*bpz
= VBROADCAST(pz
);
2457 Value
*bpw
= VBROADCAST(pw
);
2459 Value
*dist
= FADD(FMUL(unwrap(cx
), bpx
),
2460 FADD(FMUL(unwrap(cy
), bpy
),
2461 FADD(FMUL(unwrap(cz
), bpz
),
2462 FMUL(unwrap(cw
), bpw
))));
2465 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2467 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2473 JM()->DumpToFile(pFunction
, "vs_function1");
2474 gallivm_verify_function(gallivm
, wrap(pFunction
));
2475 gallivm_compile_module(gallivm
);
2476 JM()->DumpToFile(pFunction
, "vs_function2");
2478 // lp_debug_dump_value(func);
2480 PFN_VERTEX_FUNC pFunc
=
2481 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2483 JM()->DumpAsm(pFunction
, "vs_function_asm");
2484 debug_printf("vert shader %p\n", pFunc
);
2485 assert(pFunc
&& "Error: VertShader = NULL");
2487 JM()->mIsModuleFinalized
= true;
2493 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2495 if (!ctx
->vs
->pipe
.tokens
)
2499 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2501 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
2503 ctx
->vs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantVS
>(new VariantVS(builder
.gallivm
, func
))));
2508 swr_so_adjust_attrib(unsigned in_attrib
,
2509 swr_vertex_shader
*swr_vs
)
2511 ubyte semantic_name
;
2514 attrib
= in_attrib
+ VERTEX_ATTRIB_START_SLOT
;
2517 semantic_name
= swr_vs
->info
.base
.output_semantic_name
[in_attrib
];
2518 if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
2519 attrib
= VERTEX_POSITION_SLOT
;
2520 } else if (semantic_name
== TGSI_SEMANTIC_PSIZE
) {
2521 attrib
= VERTEX_SGV_SLOT
;
2522 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) {
2523 attrib
= VERTEX_SGV_SLOT
;
2525 if (swr_vs
->info
.base
.writes_position
) {
2535 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
2537 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2538 if ((info
->output_semantic_name
[i
] == name
)
2539 && (info
->output_semantic_index
[i
] == index
)) {
2548 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
2550 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
2552 struct tgsi_shader_info
*pPrevShader
;
2554 pPrevShader
= &ctx
->gs
->info
.base
;
2556 pPrevShader
= &ctx
->tes
->info
.base
;
2558 pPrevShader
= &ctx
->vs
->info
.base
;
2560 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2561 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2563 memset(inputs
, 0, sizeof(inputs
));
2564 memset(outputs
, 0, sizeof(outputs
));
2566 struct lp_build_sampler_soa
*sampler
= NULL
;
2568 AttrBuilder attrBuilder
;
2569 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2571 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2572 PointerType::get(mInt8Ty
, 0),
2573 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
2574 FunctionType
*funcType
=
2575 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
2577 auto pFunction
= Function::Create(funcType
,
2578 GlobalValue::ExternalLinkage
,
2580 JM()->mpCurrentModule
);
2581 #if LLVM_VERSION_MAJOR < 5
2582 AttributeSet attrSet
= AttributeSet::get(
2583 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2584 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2586 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2589 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2590 IRB()->SetInsertPoint(block
);
2591 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2593 auto args
= pFunction
->arg_begin();
2594 Value
*hPrivateData
= &*args
++;
2595 hPrivateData
->setName("hPrivateData");
2596 Value
*pWorkerData
= &*args
++;
2597 pWorkerData
->setName("pWorkerData");
2598 Value
*pPS
= &*args
++;
2599 pPS
->setName("psCtx");
2601 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
2602 consts_ptr
->setName("fs_constants");
2603 Value
*const_sizes_ptr
=
2604 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
2605 const_sizes_ptr
->setName("num_fs_constants");
2607 // load *pAttribs, *pPerspAttribs
2608 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
2609 Value
*pPerspAttribs
=
2610 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
2612 swr_fs
->constantMask
= 0;
2613 swr_fs
->flatConstantMask
= 0;
2614 swr_fs
->pointSpriteMask
= 0;
2616 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2617 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
2618 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
2619 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
2625 Value
*vi
= nullptr, *vj
= nullptr;
2626 switch (interpLoc
) {
2627 case TGSI_INTERPOLATE_LOC_CENTER
:
2628 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
2629 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
2631 case TGSI_INTERPOLATE_LOC_CENTROID
:
2632 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
2633 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
2635 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2636 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
2637 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
2642 Value
*vw
= nullptr, *pAttribs
;
2643 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2644 interpMode
== TGSI_INTERPOLATE_COLOR
) {
2645 pAttribs
= pPerspAttribs
;
2646 switch (interpLoc
) {
2647 case TGSI_INTERPOLATE_LOC_CENTER
:
2648 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
2650 case TGSI_INTERPOLATE_LOC_CENTROID
:
2651 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
2653 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2654 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
2658 pAttribs
= pRawAttribs
;
2664 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
2665 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
2667 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
2669 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
2670 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
2671 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
2673 inputs
[attrib
][0] = wrap(ff
);
2674 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2675 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2676 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2678 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
2679 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
2680 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
2681 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
2682 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
2684 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
2685 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
2687 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
2689 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
2691 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) { // gl_Layer
2692 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_renderTargetArrayIndex
});
2693 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vRenderTargetArrayIndex");
2694 inputs
[attrib
][0] = wrap(ff
);
2695 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2696 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2697 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2699 } else if (semantic_name
== TGSI_SEMANTIC_VIEWPORT_INDEX
) { // gl_ViewportIndex
2700 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_viewportIndex
});
2701 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vViewportIndex");
2702 inputs
[attrib
][0] = wrap(ff
);
2703 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2704 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2705 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2708 unsigned linkedAttrib
=
2709 locate_linkage(semantic_name
, semantic_idx
, pPrevShader
) - 1;
2711 uint32_t extraAttribs
= 0;
2712 if (semantic_name
== TGSI_SEMANTIC_PRIMID
&& !ctx
->gs
) {
2713 /* non-gs generated primID - need to grab from swizzleMap override */
2714 linkedAttrib
= pPrevShader
->num_outputs
- 1;
2715 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2717 } else if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
2718 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
2719 /* we add an extra attrib to the backendState in swr_update_derived. */
2720 linkedAttrib
= pPrevShader
->num_outputs
+ extraAttribs
- 1;
2721 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
2723 } else if (linkedAttrib
+ 1 == 0xFFFFFFFF) {
2724 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
2725 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2726 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2727 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2728 /* If we're reading in color and 2-sided lighting is enabled, we have
2731 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
2734 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2735 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2736 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2737 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
2741 unsigned bcolorAttrib
= 0xFFFFFFFF;
2742 Value
*offset
= NULL
;
2743 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
2744 bcolorAttrib
= locate_linkage(
2745 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, pPrevShader
);
2746 /* Neither front nor back colors were available. Nothing to load. */
2747 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
2749 /* If there is no front color, just always use the back color. */
2750 if (linkedAttrib
+ 1 == 0xFFFFFFFF)
2751 linkedAttrib
= bcolorAttrib
;
2753 if (bcolorAttrib
!= 0xFFFFFFFF) {
2755 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2756 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
2757 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2758 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
2761 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
2765 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
2767 offset
= MUL(back
, C(diff
));
2768 offset
->setName("offset");
2773 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2774 if (mask
& (1 << channel
)) {
2775 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
2776 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
2777 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
2780 indexA
= ADD(indexA
, offset
);
2781 indexB
= ADD(indexB
, offset
);
2782 indexC
= ADD(indexC
, offset
);
2785 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
2786 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
2787 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
2789 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2790 inputs
[attrib
][channel
] = wrap(va
);
2792 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
2796 Value
*interp
= FMUL(va
, vi
);
2797 Value
*interp1
= FMUL(vb
, vj
);
2798 interp
= FADD(interp
, interp1
);
2799 interp
= FADD(interp
, vc
);
2800 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2801 interpMode
== TGSI_INTERPOLATE_COLOR
)
2802 interp
= FMUL(interp
, vw
);
2803 inputs
[attrib
][channel
] = wrap(interp
);
2809 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
2810 assert(sampler
!= nullptr);
2812 struct lp_bld_tgsi_system_values system_values
;
2813 memset(&system_values
, 0, sizeof(system_values
));
2815 struct lp_build_mask_context mask
;
2816 bool uses_mask
= false;
2818 if (swr_fs
->info
.base
.uses_kill
||
2819 key
.poly_stipple_enable
) {
2820 Value
*vActiveMask
= NULL
;
2821 if (swr_fs
->info
.base
.uses_kill
) {
2822 vActiveMask
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
2824 if (key
.poly_stipple_enable
) {
2825 // first get fragment xy coords and clip to stipple bounds
2826 Value
*vXf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
});
2827 Value
*vYf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
});
2828 Value
*vXu
= FP_TO_UI(vXf
, mSimdInt32Ty
);
2829 Value
*vYu
= FP_TO_UI(vYf
, mSimdInt32Ty
);
2831 // stipple pattern is 32x32, which means that one line of stipple
2832 // is stored in one word:
2833 // vXstipple is bit offset inside 32-bit stipple word
2834 // vYstipple is word index is stipple array
2835 Value
*vXstipple
= AND(vXu
, VIMMED1(0x1f)); // & (32-1)
2836 Value
*vYstipple
= AND(vYu
, VIMMED1(0x1f)); // & (32-1)
2838 // grab stipple pattern base address
2839 Value
*stipplePtr
= GEP(hPrivateData
, {0, swr_draw_context_polyStipple
, 0});
2840 stipplePtr
= BITCAST(stipplePtr
, mInt8PtrTy
);
2842 // peform a gather to grab stipple words for each lane
2843 Value
*vStipple
= GATHERDD(VUNDEF_I(), stipplePtr
, vYstipple
,
2844 VIMMED1(0xffffffff), 4);
2846 // create a mask with one bit corresponding to the x stipple
2847 // and AND it with the pattern, to see if we have a bit
2848 Value
*vBitMask
= LSHR(VIMMED1(0x80000000), vXstipple
);
2849 Value
*vStippleMask
= AND(vStipple
, vBitMask
);
2850 vStippleMask
= ICMP_NE(vStippleMask
, VIMMED1(0));
2851 vStippleMask
= VMASK(vStippleMask
);
2853 if (swr_fs
->info
.base
.uses_kill
) {
2854 vActiveMask
= AND(vActiveMask
, vStippleMask
);
2856 vActiveMask
= vStippleMask
;
2859 lp_build_mask_begin(
2860 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask
));
2864 struct lp_build_tgsi_params params
;
2865 memset(¶ms
, 0, sizeof(params
));
2866 params
.type
= lp_type_float_vec(32, 32 * 8);
2867 params
.mask
= uses_mask
? &mask
: NULL
;
2868 params
.consts_ptr
= wrap(consts_ptr
);
2869 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2870 params
.system_values
= &system_values
;
2871 params
.inputs
= inputs
;
2872 params
.context_ptr
= wrap(hPrivateData
);
2873 params
.sampler
= sampler
;
2874 params
.info
= &swr_fs
->info
.base
;
2876 lp_build_tgsi_soa(gallivm
,
2877 swr_fs
->pipe
.tokens
,
2881 sampler
->destroy(sampler
);
2883 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2885 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
2887 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
2888 case TGSI_SEMANTIC_POSITION
: {
2891 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
2892 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
2895 case TGSI_SEMANTIC_COLOR
: {
2896 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2897 if (!outputs
[attrib
][channel
])
2901 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
2902 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
2903 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
2904 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
2907 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
2913 SWR_PS_CONTEXT_shaded
,
2914 swr_fs
->info
.base
.output_semantic_index
[attrib
],
2922 "unknown output from FS %s[%d]\n",
2923 tgsi_semantic_names
[swr_fs
->info
.base
2924 .output_semantic_name
[attrib
]],
2925 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
2931 LLVMValueRef mask_result
= 0;
2933 mask_result
= lp_build_mask_end(&mask
);
2936 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2939 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
2944 gallivm_verify_function(gallivm
, wrap(pFunction
));
2946 gallivm_compile_module(gallivm
);
2948 // after the gallivm passes, we have to lower the core's intrinsics
2949 llvm::legacy::FunctionPassManager
lowerPass(JM()->mpCurrentModule
);
2950 lowerPass
.add(createLowerX86Pass(this));
2951 lowerPass
.run(*pFunction
);
2953 PFN_PIXEL_KERNEL kernel
=
2954 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2955 debug_printf("frag shader %p\n", kernel
);
2956 assert(kernel
&& "Error: FragShader = NULL");
2958 JM()->mIsModuleFinalized
= true;
2964 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
2966 if (!ctx
->fs
->pipe
.tokens
)
2970 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2972 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
2974 ctx
->fs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantFS
>(new VariantFS(builder
.gallivm
, func
))));