1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 #include <llvm/Config/llvm-config.h>
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
42 #include "gen_state_llvm.h"
44 #include "functionpasses/passes.h"
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53 #include "gallivm/lp_bld_const.h"
54 #include "gallivm/lp_bld_printf.h"
55 #include "gallivm/lp_bld_logic.h"
57 #include "swr_context.h"
58 #include "gen_surf_state_llvm.h"
59 #include "gen_swr_context_llvm.h"
60 #include "swr_resource.h"
61 #include "swr_state.h"
62 #include "swr_screen.h"
65 /////////////////////////////////////////////////////////////////////////
70 #include "util/u_debug.h"
71 #include "util/u_memory.h"
72 #include "util/u_string.h"
74 #include "gallivm/lp_bld_type.h"
76 #if defined(DEBUG) && defined(SWR_VERBOSE_SHADER)
77 constexpr bool verbose_shader
= true;
78 constexpr bool verbose_tcs_shader_in
= true;
79 constexpr bool verbose_tcs_shader_out
= true;
80 constexpr bool verbose_tcs_shader_loop
= true;
81 constexpr bool verbose_vs_shader
= true;
83 constexpr bool verbose_shader
= false;
84 constexpr bool verbose_tcs_shader_in
= false;
85 constexpr bool verbose_tcs_shader_out
= false;
86 constexpr bool verbose_tcs_shader_loop
= false;
87 constexpr bool verbose_vs_shader
= false;
90 using namespace SwrJit
;
93 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
95 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
97 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
100 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
102 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
105 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
107 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
110 bool operator==(const swr_jit_gs_key
&lhs
, const swr_jit_gs_key
&rhs
)
112 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
115 bool operator==(const swr_jit_tcs_key
&lhs
, const swr_jit_tcs_key
&rhs
)
117 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
120 bool operator==(const swr_jit_tes_key
&lhs
, const swr_jit_tes_key
&rhs
)
122 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
127 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
128 struct swr_context
*ctx
,
129 enum pipe_shader_type shader_type
,
130 struct swr_jit_sampler_key
&key
)
132 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
134 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
135 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
136 lp_sampler_static_sampler_state(
137 &key
.sampler
[i
].sampler_state
,
138 ctx
->samplers
[shader_type
][i
]);
143 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
144 * are dx10-style? Can't really have mixed opcodes, at least not
145 * if we want to skip the holes here (without rescanning tgsi).
147 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
148 key
.nr_sampler_views
=
149 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
150 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
151 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1u << (i
& 31))) {
152 const struct pipe_sampler_view
*view
=
153 ctx
->sampler_views
[shader_type
][i
];
154 lp_sampler_static_texture_state(
155 &key
.sampler
[i
].texture_state
, view
);
157 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
158 const struct util_format_description
*desc
=
159 util_format_description(view
->format
);
160 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
161 !util_format_has_depth(desc
))
162 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
167 key
.nr_sampler_views
= key
.nr_samplers
;
168 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
169 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
170 const struct pipe_sampler_view
*view
=
171 ctx
->sampler_views
[shader_type
][i
];
172 lp_sampler_static_texture_state(
173 &key
.sampler
[i
].texture_state
, view
);
175 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
176 const struct util_format_description
*desc
=
177 util_format_description(view
->format
);
178 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
179 !util_format_has_depth(desc
))
180 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
188 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
189 struct swr_context
*ctx
,
190 swr_fragment_shader
*swr_fs
)
192 memset((void*)&key
, 0, sizeof(key
));
194 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
195 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
196 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
198 struct tgsi_shader_info
*pPrevShader
;
200 pPrevShader
= &ctx
->gs
->info
.base
;
202 pPrevShader
= &ctx
->tes
->info
.base
;
204 pPrevShader
= &ctx
->vs
->info
.base
;
206 memcpy(&key
.vs_output_semantic_name
,
207 &pPrevShader
->output_semantic_name
,
208 sizeof(key
.vs_output_semantic_name
));
209 memcpy(&key
.vs_output_semantic_idx
,
210 &pPrevShader
->output_semantic_index
,
211 sizeof(key
.vs_output_semantic_idx
));
213 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
215 key
.poly_stipple_enable
= ctx
->rasterizer
->poly_stipple_enable
&&
216 ctx
->poly_stipple
.prim_is_poly
;
220 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
221 struct swr_context
*ctx
,
222 swr_vertex_shader
*swr_vs
)
224 memset((void*)&key
, 0, sizeof(key
));
226 key
.clip_plane_mask
=
227 swr_vs
->info
.base
.clipdist_writemask
?
228 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
229 ctx
->rasterizer
->clip_plane_enable
;
231 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
235 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
236 struct swr_vertex_element_state
*velems
)
238 memset((void*)&key
, 0, sizeof(key
));
240 key
.fsState
= velems
->fsState
;
244 swr_generate_gs_key(struct swr_jit_gs_key
&key
,
245 struct swr_context
*ctx
,
246 swr_geometry_shader
*swr_gs
)
248 memset((void*)&key
, 0, sizeof(key
));
250 struct tgsi_shader_info
*pPrevShader
= nullptr;
253 pPrevShader
= &ctx
->tes
->info
.base
;
255 pPrevShader
= &ctx
->vs
->info
.base
;
258 memcpy(&key
.vs_output_semantic_name
,
259 &pPrevShader
->output_semantic_name
,
260 sizeof(key
.vs_output_semantic_name
));
261 memcpy(&key
.vs_output_semantic_idx
,
262 &pPrevShader
->output_semantic_index
,
263 sizeof(key
.vs_output_semantic_idx
));
265 swr_generate_sampler_key(swr_gs
->info
, ctx
, PIPE_SHADER_GEOMETRY
, key
);
269 swr_generate_tcs_key(struct swr_jit_tcs_key
&key
,
270 struct swr_context
*ctx
,
271 swr_tess_control_shader
*swr_tcs
)
273 memset((void*)&key
, 0, sizeof(key
));
275 struct tgsi_shader_info
*pPrevShader
= &ctx
->vs
->info
.base
;
277 memcpy(&key
.vs_output_semantic_name
,
278 &pPrevShader
->output_semantic_name
,
279 sizeof(key
.vs_output_semantic_name
));
280 memcpy(&key
.vs_output_semantic_idx
,
281 &pPrevShader
->output_semantic_index
,
282 sizeof(key
.vs_output_semantic_idx
));
284 key
.clip_plane_mask
=
285 swr_tcs
->info
.base
.clipdist_writemask
?
286 swr_tcs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
287 ctx
->rasterizer
->clip_plane_enable
;
289 swr_generate_sampler_key(swr_tcs
->info
, ctx
, PIPE_SHADER_TESS_CTRL
, key
);
293 swr_generate_tes_key(struct swr_jit_tes_key
&key
,
294 struct swr_context
*ctx
,
295 swr_tess_evaluation_shader
*swr_tes
)
297 memset((void*)&key
, 0, sizeof(key
));
299 struct tgsi_shader_info
*pPrevShader
= nullptr;
302 pPrevShader
= &ctx
->tcs
->info
.base
;
305 pPrevShader
= &ctx
->vs
->info
.base
;
308 SWR_ASSERT(pPrevShader
!= nullptr, "TES: No TCS or VS defined");
310 memcpy(&key
.prev_output_semantic_name
,
311 &pPrevShader
->output_semantic_name
,
312 sizeof(key
.prev_output_semantic_name
));
313 memcpy(&key
.prev_output_semantic_idx
,
314 &pPrevShader
->output_semantic_index
,
315 sizeof(key
.prev_output_semantic_idx
));
317 key
.clip_plane_mask
=
318 swr_tes
->info
.base
.clipdist_writemask
?
319 swr_tes
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
320 ctx
->rasterizer
->clip_plane_enable
;
322 swr_generate_sampler_key(swr_tes
->info
, ctx
, PIPE_SHADER_TESS_EVAL
, key
);
325 struct BuilderSWR
: public Builder
{
326 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
329 pJitMgr
->SetupNewModule();
330 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
), NULL
);
331 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
335 gallivm_free_ir(gallivm
);
338 void WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
,
339 unsigned slot
, unsigned channel
);
341 struct gallivm_state
*gallivm
;
342 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
343 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
344 PFN_GS_FUNC
CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
);
345 PFN_TCS_FUNC
CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
);
346 PFN_TES_FUNC
CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
);
348 // GS-specific emit functions
350 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
351 struct lp_build_context
* bld
,
352 boolean is_vindex_indirect
,
353 LLVMValueRef vertex_index
,
354 boolean is_aindex_indirect
,
355 LLVMValueRef attrib_index
,
356 LLVMValueRef swizzle_index
);
358 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
359 struct lp_build_context
* bld
,
360 LLVMValueRef (*outputs
)[4],
361 LLVMValueRef emitted_vertices_vec
,
362 LLVMValueRef stream_id
);
365 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
366 struct lp_build_context
* bld
,
367 LLVMValueRef total_emitted_vertices_vec_ptr
,
368 LLVMValueRef verts_per_prim_vec
,
369 LLVMValueRef emitted_prims_vec
,
370 LLVMValueRef mask_vec
);
373 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
374 LLVMValueRef total_emitted_vertices_vec
,
375 LLVMValueRef emitted_prims_vec
, unsigned stream
);
377 // TCS-specific emit functions
378 void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
);
379 void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
);
382 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
383 struct lp_build_tgsi_context
* bld_base
,
384 boolean is_vindex_indirect
,
385 LLVMValueRef vertex_index
,
386 boolean is_aindex_indirect
,
387 LLVMValueRef attrib_index
,
388 LLVMValueRef swizzle_index
);
391 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
392 struct lp_build_tgsi_context
* bld_base
,
393 boolean is_vindex_indirect
,
394 LLVMValueRef vertex_index
,
395 boolean is_aindex_indirect
,
396 LLVMValueRef attrib_index
,
397 LLVMValueRef swizzle_index
,
401 swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
402 struct lp_build_tgsi_context
* bld_base
,
404 boolean is_vindex_indirect
,
405 LLVMValueRef vertex_index
,
406 boolean is_aindex_indirect
,
407 LLVMValueRef attrib_index
,
408 LLVMValueRef swizzle_index
,
410 LLVMValueRef mask_vec
);
412 // Barrier implementation (available only in TCS)
414 swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
415 struct lp_build_tgsi_context
*bld_base
);
417 // TES-specific emit functions
419 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
420 struct lp_build_tgsi_context
* bld_base
,
421 boolean is_vindex_indirect
,
422 LLVMValueRef vertex_index
,
423 boolean is_aindex_indirect
,
424 LLVMValueRef attrib_index
,
425 LLVMValueRef swizzle_index
);
428 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
429 struct lp_build_tgsi_context
* bld_base
,
430 boolean is_aindex_indirect
,
431 LLVMValueRef attrib_index
,
432 LLVMValueRef swizzle_index
);
435 struct swr_gs_llvm_iface
{
436 struct lp_build_gs_iface base
;
437 struct tgsi_shader_info
*info
;
439 BuilderSWR
*pBuilder
;
442 SWR_GS_STATE
*pGsState
;
443 uint32_t num_outputs
;
444 uint32_t num_verts_per_prim
;
446 Value
*pVtxAttribMap
;
449 struct swr_tcs_llvm_iface
{
450 struct lp_build_tcs_iface base
;
451 struct tgsi_shader_info
*info
;
453 BuilderSWR
*pBuilder
;
456 SWR_TS_STATE
*pTsState
;
458 uint32_t output_vertices
;
460 LLVMValueRef loop_var
;
462 Value
*pVtxAttribMap
;
463 Value
*pVtxOutputAttribMap
;
464 Value
*pPatchOutputAttribMap
;
467 struct swr_tes_llvm_iface
{
468 struct lp_build_tes_iface base
;
469 struct tgsi_shader_info
*info
;
471 BuilderSWR
*pBuilder
;
474 SWR_TS_STATE
*pTsState
;
476 uint32_t num_outputs
;
478 Value
*pVtxAttribMap
;
479 Value
*pPatchAttribMap
;
482 // trampoline functions so we can use the builder llvm construction methods
484 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
485 struct lp_build_context
* bld
,
486 boolean is_vindex_indirect
,
487 LLVMValueRef vertex_index
,
488 boolean is_aindex_indirect
,
489 LLVMValueRef attrib_index
,
490 LLVMValueRef swizzle_index
)
492 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
494 return iface
->pBuilder
->swr_gs_llvm_fetch_input(gs_iface
, bld
,
503 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
504 struct lp_build_context
* bld
,
505 LLVMValueRef (*outputs
)[4],
506 LLVMValueRef emitted_vertices_vec
,
507 LLVMValueRef mask_vec
,
508 LLVMValueRef stream_id
)
510 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
512 iface
->pBuilder
->swr_gs_llvm_emit_vertex(gs_base
, bld
,
514 emitted_vertices_vec
,
519 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
520 struct lp_build_context
* bld
,
521 LLVMValueRef total_emitted_vertices_vec_ptr
,
522 LLVMValueRef verts_per_prim_vec
,
523 LLVMValueRef emitted_prims_vec
,
524 LLVMValueRef mask_vec
, unsigned stream_id
)
526 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
528 iface
->pBuilder
->swr_gs_llvm_end_primitive(gs_base
, bld
,
529 total_emitted_vertices_vec_ptr
,
536 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
537 LLVMValueRef total_emitted_vertices_vec
,
538 LLVMValueRef emitted_prims_vec
, unsigned stream
)
540 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
542 iface
->pBuilder
->swr_gs_llvm_epilogue(gs_base
,
543 total_emitted_vertices_vec
,
544 emitted_prims_vec
, stream
);
548 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
549 struct lp_build_context
* bld
,
550 boolean is_vindex_indirect
,
551 LLVMValueRef vertex_index
,
552 boolean is_aindex_indirect
,
553 LLVMValueRef attrib_index
,
554 boolean is_sindex_indirect
,
555 LLVMValueRef swizzle_index
)
557 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
558 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
560 return iface
->pBuilder
->swr_tcs_llvm_fetch_input(tcs_iface
, bld_base
,
569 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
570 struct lp_build_context
* bld
,
571 boolean is_vindex_indirect
,
572 LLVMValueRef vertex_index
,
573 boolean is_aindex_indirect
,
574 LLVMValueRef attrib_index
,
575 boolean is_sindex_indirect
,
576 LLVMValueRef swizzle_index
,
579 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
580 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
582 return iface
->pBuilder
->swr_tcs_llvm_fetch_output(tcs_iface
, bld_base
,
593 swr_tcs_llvm_emit_prologue(struct lp_build_context
* bld
)
595 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
596 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
597 iface
->pBuilder
->swr_tcs_llvm_emit_prologue(bld_base
);
601 swr_tcs_llvm_emit_epilogue(struct lp_build_context
* bld
)
603 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
604 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
605 iface
->pBuilder
->swr_tcs_llvm_emit_epilogue(bld_base
);
609 void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
610 struct lp_build_context
* bld
,
612 boolean is_vindex_indirect
,
613 LLVMValueRef vertex_index
,
614 boolean is_aindex_indirect
,
615 LLVMValueRef attrib_index
,
616 boolean is_sindex_indirect
,
617 LLVMValueRef swizzle_index
,
619 LLVMValueRef mask_vec
)
621 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
622 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
624 iface
->pBuilder
->swr_tcs_llvm_store_output(tcs_iface
,
638 void swr_tcs_llvm_emit_barrier(struct lp_build_context
*bld
)
640 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
641 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
643 iface
->pBuilder
->swr_tcs_llvm_emit_barrier(bld_base
->tcs_iface
, &bld_base
->bld_base
);
648 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
649 struct lp_build_context
* bld
,
650 boolean is_vindex_indirect
,
651 LLVMValueRef vertex_index
,
652 boolean is_aindex_indirect
,
653 LLVMValueRef attrib_index
,
654 boolean is_sindex_indirect
,
655 LLVMValueRef swizzle_index
)
657 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
658 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
660 return iface
->pBuilder
->swr_tes_llvm_fetch_vtx_input(tes_iface
, bld_base
,
669 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
670 struct lp_build_context
* bld
,
671 boolean is_aindex_indirect
,
672 LLVMValueRef attrib_index
,
673 LLVMValueRef swizzle_index
)
675 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
676 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
678 return iface
->pBuilder
->swr_tes_llvm_fetch_patch_input(tes_iface
, bld_base
,
685 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
686 struct lp_build_context
* bld
,
687 boolean is_vindex_indirect
,
688 LLVMValueRef vertex_index
,
689 boolean is_aindex_indirect
,
690 LLVMValueRef attrib_index
,
691 LLVMValueRef swizzle_index
)
693 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
694 Value
*vert_index
= unwrap(vertex_index
);
695 Value
*attr_index
= unwrap(attrib_index
);
697 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
699 if (is_vindex_indirect
|| is_aindex_indirect
) {
701 Value
*res
= unwrap(bld
->zero
);
702 struct lp_type type
= bld
->type
;
704 for (i
= 0; i
< type
.length
; i
++) {
705 Value
*vert_chan_index
= vert_index
;
706 Value
*attr_chan_index
= attr_index
;
708 if (is_vindex_indirect
) {
709 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
711 if (is_aindex_indirect
) {
712 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
716 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
718 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
719 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
721 Value
*pVector
= ADD(MUL(vert_chan_index
, pInputVertStride
), attrib
);
722 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
724 Value
*value
= VEXTRACT(pInput
, C(i
));
725 res
= VINSERT(res
, value
, C(i
));
730 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
732 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
733 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
735 Value
*pVector
= ADD(MUL(vert_index
, pInputVertStride
), attrib
);
737 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
743 // GS output stream layout
744 #define VERTEX_COUNT_SIZE 32
745 #define CONTROL_HEADER_SIZE (8*32)
748 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
749 struct lp_build_context
* bld
,
750 LLVMValueRef (*outputs
)[4],
751 LLVMValueRef emitted_vertices_vec
,
752 LLVMValueRef stream_id
)
754 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
756 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
757 const uint32_t headerSize
= VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
;
758 const uint32_t attribSize
= 4 * sizeof(float);
759 const uint32_t vertSize
= attribSize
* SWR_VTX_NUM_SLOTS
;
760 Value
*pVertexOffset
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(vertSize
));
762 Value
*vMask
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_mask
});
763 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, mVWidth
));
765 Value
*pStack
= STACKSAVE();
766 Value
*pTmpPtr
= ALLOCA(mFP32Ty
, C(4)); // used for dummy write for lane masking
768 for (uint32_t attrib
= 0; attrib
< iface
->num_outputs
; ++attrib
) {
769 uint32_t attribSlot
= attrib
;
770 uint32_t sgvChannel
= 0;
771 if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
772 attribSlot
= VERTEX_SGV_SLOT
;
773 sgvChannel
= VERTEX_SGV_POINT_SIZE_COMP
;
774 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_LAYER
) {
775 attribSlot
= VERTEX_SGV_SLOT
;
776 sgvChannel
= VERTEX_SGV_RTAI_COMP
;
777 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_VIEWPORT_INDEX
) {
778 attribSlot
= VERTEX_SGV_SLOT
;
779 sgvChannel
= VERTEX_SGV_VAI_COMP
;
780 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
781 attribSlot
= VERTEX_POSITION_SLOT
;
783 attribSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
784 if (iface
->info
->writes_position
) {
789 Value
*pOutputOffset
= ADD(pVertexOffset
, VIMMED1(headerSize
+ attribSize
* attribSlot
)); // + sgvChannel ?
791 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
792 Value
*pLaneOffset
= VEXTRACT(pOutputOffset
, C(lane
));
793 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
794 Value
*pStreamOffset
= GEP(pStream
, pLaneOffset
);
795 pStreamOffset
= BITCAST(pStreamOffset
, mFP32PtrTy
);
797 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
798 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
800 for (uint32_t channel
= 0; channel
< 4; ++channel
) {
803 if (attribSlot
== VERTEX_SGV_SLOT
)
804 vData
= LOAD(unwrap(outputs
[attrib
][0]));
806 vData
= LOAD(unwrap(outputs
[attrib
][channel
]));
808 if (attribSlot
!= VERTEX_SGV_SLOT
||
809 sgvChannel
== channel
) {
810 vData
= VEXTRACT(vData
, C(lane
));
811 STORE(vData
, pStreamOffset
);
813 pStreamOffset
= GEP(pStreamOffset
, C(1));
818 /* When the output type is not points, the geometry shader may not
819 * output data to multiple streams. So early exit here.
821 if(iface
->pGsState
->outputTopology
!= TOP_POINT_LIST
) {
822 STACKRESTORE(pStack
);
826 // Info about stream id for each vertex
827 // is coded in 2 bits (4 vert per byte "box"):
828 // ----------------- ----------------- ----
829 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
830 // ----------------- ----------------- ----
832 // Calculate where need to put stream id for current vert
834 Value
*pShiftControl
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(2));
836 // Calculate in which box put stream id for current vert.
837 Value
*pOffsetControl
= LSHR(unwrap(emitted_vertices_vec
), VIMMED1(2));
840 Value
*pStreamIdOffset
= ADD(pOffsetControl
, VIMMED1(VERTEX_COUNT_SIZE
));
842 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
843 Value
*pShift
= TRUNC(VEXTRACT(pShiftControl
, C(lane
)), mInt8Ty
);
844 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
846 Value
*pStreamOffset
= GEP(pStream
, VEXTRACT(pStreamIdOffset
, C(lane
)));
848 // Just make sure that not overflow max - stream id = (0,1,2,3)
849 Value
*vVal
= TRUNC(AND(VEXTRACT(unwrap(stream_id
), C(0)), C(0x3)), mInt8Ty
);
851 // Shift it to correct position in byte "box"
852 vVal
= SHL(vVal
, pShift
);
854 // Info about other vertices can be already stored
855 // so we need to read and add bits from current vert info.
856 Value
*storedValue
= LOAD(pStreamOffset
);
857 vVal
= OR(storedValue
, vVal
);
858 STORE(vVal
, pStreamOffset
);
861 STACKRESTORE(pStack
);
865 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
866 struct lp_build_context
* bld
,
867 LLVMValueRef total_emitted_vertices_vec
,
868 LLVMValueRef verts_per_prim_vec
,
869 LLVMValueRef emitted_prims_vec
,
870 LLVMValueRef mask_vec
)
872 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
874 /* When the output type is points, the geometry shader may output data
875 * to multiple streams, and end_primitive has no effect. Info about
876 * stream id for vertices is stored into the same place in memory where
877 * end primitive info is stored so early exit in this case.
879 if (iface
->pGsState
->outputTopology
== TOP_POINT_LIST
) {
883 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
885 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
886 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, 8));
888 uint32_t vertsPerPrim
= iface
->num_verts_per_prim
;
891 ADD(MUL(unwrap(emitted_prims_vec
), VIMMED1(vertsPerPrim
)),
892 unwrap(verts_per_prim_vec
));
894 vCount
= unwrap(total_emitted_vertices_vec
);
896 Value
*mask
= unwrap(mask_vec
);
897 Value
*cmpMask
= VMASK(ICMP_NE(unwrap(verts_per_prim_vec
), VIMMED1(0)));
898 mask
= AND(mask
, cmpMask
);
899 vMask1
= TRUNC(mask
, VectorType::get(mInt1Ty
, 8));
901 vCount
= SUB(vCount
, VIMMED1(1));
902 Value
*vOffset
= ADD(UDIV(vCount
, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE
));
903 Value
*vValue
= SHL(VIMMED1(1), UREM(vCount
, VIMMED1(8)));
905 vValue
= TRUNC(vValue
, VectorType::get(mInt8Ty
, 8));
907 Value
*pStack
= STACKSAVE();
908 Value
*pTmpPtr
= ALLOCA(mInt8Ty
, C(4)); // used for dummy read/write for lane masking
910 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
911 Value
*vLaneOffset
= VEXTRACT(vOffset
, C(lane
));
912 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
913 Value
*pStreamOffset
= GEP(pStream
, vLaneOffset
);
915 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
916 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
918 Value
*vVal
= LOAD(pStreamOffset
);
919 vVal
= OR(vVal
, VEXTRACT(vValue
, C(lane
)));
920 STORE(vVal
, pStreamOffset
);
923 STACKRESTORE(pStack
);
927 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
928 LLVMValueRef total_emitted_vertices_vec
,
929 LLVMValueRef emitted_prims_vec
, unsigned stream
)
931 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
933 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
935 // Store emit count to each output stream in the first DWORD
936 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
938 Value
* pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
939 pStream
= BITCAST(pStream
, mInt32PtrTy
);
940 Value
* pLaneCount
= VEXTRACT(unwrap(total_emitted_vertices_vec
), C(lane
));
941 STORE(pLaneCount
, pStream
);
946 BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
)
948 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
950 Value
* loop_var
= ALLOCA(mSimdInt32Ty
);
951 STORE(VBROADCAST(C(0)), loop_var
);
953 iface
->loop_var
= wrap(loop_var
);
955 lp_exec_bgnloop(&bld
->exec_mask
, true);
957 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
958 bld
->system_values
.invocation_id
= wrap((LOAD(unwrap(iface
->loop_var
))));
960 if (verbose_tcs_shader_loop
) {
961 lp_build_print_value(gallivm
, "Prologue LOOP Iteration BEGIN:", bld
->system_values
.invocation_id
);
967 BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
)
969 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
971 struct lp_build_context
*uint_bld
= &bld
->bld_base
.uint_bld
;
973 STORE(ADD(LOAD(unwrap(iface
->loop_var
)), VBROADCAST(C(1))), unwrap(iface
->loop_var
));
974 if (verbose_tcs_shader_loop
) {
975 lp_build_print_value(gallivm
, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface
->loop_var
))));
978 LLVMValueRef tmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_GEQUAL
, wrap(LOAD(unwrap(iface
->loop_var
))),
979 wrap(VBROADCAST(C(iface
->output_vertices
))));
980 lp_exec_mask_cond_push(&bld
->exec_mask
, tmp
);
981 lp_exec_break(&bld
->exec_mask
, &bld
->bld_base
.pc
, false);
982 lp_exec_mask_cond_pop(&bld
->exec_mask
);
983 lp_exec_endloop(bld
->bld_base
.base
.gallivm
, &bld
->exec_mask
);
987 BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
988 struct lp_build_tgsi_context
* bld_base
,
989 boolean is_vindex_indirect
,
990 LLVMValueRef vertex_index
,
991 boolean is_aindex_indirect
,
992 LLVMValueRef attrib_index
,
993 LLVMValueRef swizzle_index
)
995 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
997 Value
*vert_index
= unwrap(vertex_index
);
998 Value
*attr_index
= unwrap(attrib_index
);
1000 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1002 if (verbose_tcs_shader_in
) {
1003 lp_build_printf(gallivm
, "[TCS IN][VTX] ======================================\n");
1004 lp_build_print_value(gallivm
, "[TCS IN][VTX] vertex_index: ", vertex_index
);
1005 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1006 lp_build_printf(gallivm
, "[TCS IN][VTX] --------------------------------------\n");
1009 Value
*res
= unwrap(bld_base
->base
.zero
);
1010 if (is_vindex_indirect
|| is_aindex_indirect
) {
1012 struct lp_type type
= bld_base
->base
.type
;
1014 for (i
= 0; i
< type
.length
; i
++) {
1015 Value
*vert_chan_index
= vert_index
;
1016 Value
*attr_chan_index
= attr_index
;
1018 if (is_vindex_indirect
) {
1019 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
1021 if (is_aindex_indirect
) {
1022 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1026 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
1028 Value
*pBase
= GEP(iface
->pTcsCtx
,
1029 { C(0), C(SWR_HS_CONTEXT_vert
), vert_chan_index
,
1030 C(simdvertex_attrib
), attrib
, unwrap(swizzle_index
), C(i
) });
1032 Value
*val
= LOAD(pBase
);
1034 if (verbose_tcs_shader_in
) {
1035 lp_build_print_value(gallivm
, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index
));
1036 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1037 lp_build_print_value(gallivm
, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index
));
1038 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib read from map: ", wrap(attrib
));
1039 lp_build_print_value(gallivm
, "[TCS IN][VTX] swizzle_index: ", swizzle_index
);
1040 lp_build_print_value(gallivm
, "[TCS IN][VTX] Loaded: ", wrap(val
));
1042 res
= VINSERT(res
, val
, C(i
));
1045 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
1047 Value
*pBase
= GEP(iface
->pTcsCtx
,
1048 { C(0), C(SWR_HS_CONTEXT_vert
), vert_index
,
1049 C(simdvertex_attrib
), attrib
, unwrap(swizzle_index
) });
1053 if (verbose_tcs_shader_in
) {
1054 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1055 lp_build_print_value(gallivm
, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index
));
1056 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib read from map: ", wrap(attrib
));
1057 lp_build_print_value(gallivm
, "[TCS IN][VTX] swizzle_index: ", swizzle_index
);
1058 lp_build_print_value(gallivm
, "[TCS IN][VTX] Loaded: ", wrap(res
));
1061 if (verbose_tcs_shader_in
) {
1062 lp_build_print_value(gallivm
, "[TCS IN][VTX] returning: ", wrap(res
));
1068 BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
1069 struct lp_build_tgsi_context
* bld_base
,
1070 boolean is_vindex_indirect
,
1071 LLVMValueRef vertex_index
,
1072 boolean is_aindex_indirect
,
1073 LLVMValueRef attrib_index
,
1074 LLVMValueRef swizzle_index
,
1077 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1079 Value
*vert_index
= unwrap(vertex_index
);
1080 Value
*attr_index
= unwrap(attrib_index
);
1082 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1084 if (verbose_tcs_shader_in
) {
1085 lp_build_print_value(gallivm
, "[TCS INOUT] Vertex index: ", vertex_index
);
1086 lp_build_print_value(gallivm
, "[TCS INOUT] Attrib index: ", wrap(attr_index
));
1087 lp_build_print_value(gallivm
, "[TCS INOUT] Swizzle index: ", swizzle_index
);
1090 Value
* res
= unwrap(bld_base
->base
.zero
);
1092 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1093 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1094 Value
* pCpOut
= GEP(p1
, {lane
});
1096 Value
*vert_chan_index
= vert_index
;
1097 Value
*attr_chan_index
= attr_index
;
1099 if (is_vindex_indirect
) {
1100 vert_chan_index
= VEXTRACT(vert_index
, C(lane
));
1101 if (verbose_tcs_shader_in
) {
1102 lp_build_print_value(gallivm
, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index
));
1106 if (is_aindex_indirect
) {
1107 attr_chan_index
= VEXTRACT(attr_index
, C(lane
));
1108 if (verbose_tcs_shader_in
) {
1109 lp_build_print_value(gallivm
, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index
));
1113 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1114 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1115 Value
* tessFactorArray
= nullptr;
1116 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1117 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1119 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1121 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1122 res
= VINSERT(res
, LOAD(tessFactor
), C(lane
));
1123 if (verbose_tcs_shader_in
) {
1124 lp_build_print_value(gallivm
, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane
)));
1125 lp_build_print_value(gallivm
, "[TCS INOUT][FACTOR] loaded value: ", wrap(res
));
1127 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1128 Value
* attr_index_from_map
= LOAD(GEP(iface
->pPatchOutputAttribMap
, {C(0), attr_chan_index
}));
1129 Value
* attr_value
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attr_index_from_map
, unwrap(swizzle_index
)});
1130 res
= VINSERT(res
, LOAD(attr_value
), C(lane
));
1131 if (verbose_tcs_shader_in
) {
1132 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map
));
1133 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane
)));
1134 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] loaded value: ", wrap(res
));
1137 // Generic attribute
1139 LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_chan_index
}));
1140 if (verbose_tcs_shader_in
) {
1141 lp_build_print_value(gallivm
, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib
));
1143 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
), vert_chan_index
,
1144 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1146 res
= VINSERT(res
, LOAD(attr_chan
), C(lane
));
1147 if (verbose_tcs_shader_in
) {
1148 lp_build_print_value(gallivm
, "[TCS INOUT][VTX] loaded value: ", wrap(res
));
1157 BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
1158 struct lp_build_tgsi_context
*bld_base
,
1160 boolean is_vindex_indirect
,
1161 LLVMValueRef vertex_index
,
1162 boolean is_aindex_indirect
,
1163 LLVMValueRef attrib_index
,
1164 LLVMValueRef swizzle_index
,
1166 LLVMValueRef mask_vec
)
1168 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1169 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1171 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1173 if (verbose_tcs_shader_out
) {
1174 lp_build_printf(gallivm
, "[TCS OUT] =============================================\n");
1177 if (verbose_tcs_shader_out
) {
1178 lp_build_print_value(gallivm
, "[TCS OUT] Store mask: ", bld
->exec_mask
.exec_mask
);
1179 lp_build_print_value(gallivm
, "[TCS OUT] Store value: ", value
);
1182 Value
*vert_index
= unwrap(vertex_index
);
1183 Value
*attr_index
= unwrap(attrib_index
);
1185 if (verbose_tcs_shader_out
) {
1186 lp_build_print_value(gallivm
, "[TCS OUT] Vertex index: ", vertex_index
);
1187 lp_build_print_value(gallivm
, "[TCS OUT] Attrib index: ", wrap(attr_index
));
1188 lp_build_print_value(gallivm
, "[TCS OUT] Swizzle index: ", swizzle_index
);
1191 if (is_vindex_indirect
) {
1192 vert_index
= VEXTRACT(vert_index
, C(0));
1193 if (verbose_tcs_shader_out
) {
1194 lp_build_print_value(gallivm
, "[TCS OUT] Extracted vertex index: ", vertex_index
);
1198 if (is_aindex_indirect
) {
1199 attr_index
= VEXTRACT(attr_index
, C(0));
1200 if (verbose_tcs_shader_out
) {
1201 lp_build_print_value(gallivm
, "[TCS OUT] Extracted attrib index: ", wrap(attr_index
));
1205 if (verbose_tcs_shader_out
) {
1206 if (bld
->exec_mask
.has_mask
) {
1207 lp_build_print_value(gallivm
, "[TCS OUT] Exec mask: ", bld
->exec_mask
.exec_mask
);
1210 lp_build_printf(gallivm
, "[TCS OUT] has no mask\n");
1213 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1214 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1215 Value
* pCpOut
= GEP(p1
, {lane
});
1217 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1218 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1219 Value
* tessFactorArray
= nullptr;
1220 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1221 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1223 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1225 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1226 Value
* valueToStore
= VEXTRACT(unwrap(value
), C(lane
));
1227 valueToStore
= BITCAST(valueToStore
, mFP32Ty
);
1229 Value
*originalVal
= LOAD(tessFactor
);
1230 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1231 valueToStore
= SELECT(vMask
, valueToStore
, originalVal
);
1233 STORE(valueToStore
, tessFactor
);
1234 if (verbose_tcs_shader_out
)
1236 lp_build_print_value(gallivm
, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec
);
1237 lp_build_print_value(gallivm
, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore
));
1239 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1240 Value
* attrib
= LOAD(GEP(iface
->pPatchOutputAttribMap
, {C(0), attr_index
}));
1241 if (verbose_tcs_shader_out
) {
1242 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index
));
1243 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index
));
1244 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect
)));
1245 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect
)));
1246 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib
));
1248 Value
* attr
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attrib
});
1249 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1250 if (verbose_tcs_shader_out
) {
1251 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane
)));
1252 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] value to store: ", value
);
1253 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store
));
1254 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] chan_index: ", swizzle_index
);
1256 value_to_store
= BITCAST(value_to_store
, mFP32Ty
);
1258 Value
*originalVal
= LOADV(attr
, {C(0), unwrap(swizzle_index
)});
1259 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1260 value_to_store
= SELECT(vMask
, value_to_store
, originalVal
);
1261 if (verbose_tcs_shader_out
) {
1262 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] store mask: ", mask_vec
);
1263 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal
));
1264 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vMask: ", wrap(vMask
));
1265 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store
));
1268 STOREV(value_to_store
, attr
, {C(0), unwrap(swizzle_index
)});
1269 if (verbose_tcs_shader_out
) {
1270 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store
));
1273 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1274 Value
* attrib
= LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_index
}));
1276 if (verbose_tcs_shader_out
) {
1277 lp_build_printf(gallivm
, "[TCS OUT] Writting attribute\n");
1278 lp_build_print_value(gallivm
, "[TCS OUT][VTX] invocation_id: ", bld
->system_values
.invocation_id
);
1279 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index
));
1280 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib
));
1281 lp_build_print_value(gallivm
, "[TCS OUT][VTX] chan_index: ", swizzle_index
);
1282 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value: ", value
);
1283 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store
));
1286 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
),
1287 VEXTRACT(unwrap(bld
->system_values
.invocation_id
), C(0)),
1288 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1290 // Mask output values if needed
1291 value_to_store
= BITCAST(value_to_store
, mFP32Ty
);
1293 Value
*originalVal
= LOAD(attr_chan
);
1294 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1295 value_to_store
= SELECT(vMask
, value_to_store
, originalVal
);
1297 STORE(value_to_store
, attr_chan
);
1298 if (verbose_tcs_shader_out
) {
1299 lp_build_print_value(gallivm
, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec
);
1300 lp_build_print_value(gallivm
, "[TCS OUT][VTX] stored: ", wrap(value_to_store
));
1307 BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
1308 struct lp_build_tgsi_context
*bld_base
)
1310 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1311 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1313 if (verbose_tcs_shader_loop
) {
1314 lp_build_print_value(gallivm
, "Barrier LOOP: Iteration %d END\n", iface
->loop_var
);
1317 struct lp_build_context
*uint_bld
= &bld
->bld_base
.uint_bld
;
1319 STORE(ADD(LOAD(unwrap(iface
->loop_var
)), VBROADCAST(C(1))), unwrap(iface
->loop_var
));
1321 LLVMValueRef tmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_GEQUAL
, wrap(LOAD(unwrap(iface
->loop_var
))),
1322 wrap(VBROADCAST(C(iface
->output_vertices
))));
1324 lp_exec_mask_cond_push(&bld
->exec_mask
, tmp
);
1325 lp_exec_break(&bld
->exec_mask
, &bld
->bld_base
.pc
, false);
1326 lp_exec_mask_cond_pop(&bld
->exec_mask
);
1327 lp_exec_endloop(bld
->bld_base
.base
.gallivm
, &bld
->exec_mask
);
1329 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1331 STORE(VBROADCAST(C(0)), unwrap(iface
->loop_var
));
1332 lp_exec_bgnloop(&bld
->exec_mask
, true);
1334 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1336 bld
->system_values
.invocation_id
= wrap((LOAD(unwrap(iface
->loop_var
))));
1338 if (verbose_tcs_shader_loop
) {
1339 lp_build_print_value(gallivm
, "Barrier LOOP: Iteration BEGIN: ", iface
->loop_var
);
1340 lp_build_print_value(gallivm
, "Barrier LOOP: InvocationId: \n", bld
->system_values
.invocation_id
);
1346 BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
1347 struct lp_build_tgsi_context
* bld_base
,
1348 boolean is_aindex_indirect
,
1349 LLVMValueRef attrib_index
,
1350 LLVMValueRef swizzle_index
)
1352 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1353 Value
*attr_index
= unwrap(attrib_index
);
1354 Value
*res
= unwrap(bld_base
->base
.zero
);
1356 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1358 if (verbose_shader
) {
1359 lp_build_printf(gallivm
, "[TES IN][PATCH] --------------------------------------\n");
1362 if (is_aindex_indirect
) {
1364 struct lp_type type
= bld_base
->base
.type
;
1366 for (i
= 0; i
< type
.length
; i
++) {
1367 Value
*attr_chan_index
= attr_index
;
1369 if (is_aindex_indirect
) {
1370 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1374 LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_chan_index
}));
1376 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1377 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1378 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1379 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1380 if (verbose_shader
) {
1381 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1382 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index
));
1383 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1384 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1385 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1387 res
= VINSERT(res
, Val
, C(i
));
1390 Value
*attrib
= LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_index
}));
1392 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1393 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1394 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1395 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1396 if (verbose_shader
) {
1397 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1398 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index
));
1399 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1400 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1401 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1403 res
= VBROADCAST(Val
);
1405 if (verbose_shader
) {
1406 lp_build_print_value(gallivm
, "[TES IN][PATCH] returning: ", wrap(res
));
1414 BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
1415 struct lp_build_tgsi_context
* bld_base
,
1416 boolean is_vindex_indirect
,
1417 LLVMValueRef vertex_index
,
1418 boolean is_aindex_indirect
,
1419 LLVMValueRef attrib_index
,
1420 LLVMValueRef swizzle_index
)
1422 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1423 Value
*vert_index
= unwrap(vertex_index
);
1424 Value
*attr_index
= unwrap(attrib_index
);
1426 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1428 if (verbose_shader
) {
1429 lp_build_printf(gallivm
, "[TES IN][VTX] --------------------------------------\n");
1432 Value
*res
= unwrap(bld_base
->base
.zero
);
1433 if (is_vindex_indirect
|| is_aindex_indirect
) {
1435 struct lp_type type
= bld_base
->base
.type
;
1437 for (i
= 0; i
< type
.length
; i
++) {
1438 Value
*vert_chan_index
= vert_index
;
1439 Value
*attr_chan_index
= attr_index
;
1441 if (is_vindex_indirect
) {
1442 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
1444 if (is_aindex_indirect
) {
1445 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1449 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
1451 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1452 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1453 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_chan_index
});
1454 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1455 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1456 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1457 if (verbose_shader
) {
1458 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1459 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1460 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1461 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1462 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1464 res
= VINSERT(res
, Val
, C(i
));
1467 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
1469 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1470 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1471 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_index
});
1472 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1473 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1474 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1475 if (verbose_shader
) {
1476 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1477 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1478 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1479 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1480 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1482 res
= VBROADCAST(Val
);
1484 if (verbose_shader
) {
1485 lp_build_print_value(gallivm
, "[TES IN][VTX] returning: ", wrap(res
));
1494 BuilderSWR::CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
1496 SWR_GS_STATE
*pGS
= &ctx
->gs
->gsState
;
1497 struct tgsi_shader_info
*info
= &ctx
->gs
->info
.base
;
1499 memset(pGS
, 0, sizeof(*pGS
));
1501 pGS
->gsEnable
= true;
1503 pGS
->numInputAttribs
= (VERTEX_ATTRIB_START_SLOT
- VERTEX_POSITION_SLOT
) + info
->num_inputs
;
1504 pGS
->outputTopology
=
1505 swr_convert_prim_topology(info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
], 0);
1507 /* It's +1 because emit_vertex in swr is always called exactly one time more
1508 * than max_vertices passed in Geometry Shader. We need to allocate more memory
1509 * to avoid crash/memory overwritten.
1511 pGS
->maxNumVerts
= info
->properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
] + 1;
1512 pGS
->instanceCount
= info
->properties
[TGSI_PROPERTY_GS_INVOCATIONS
];
1514 // If point primitive then assume to use multiple streams
1515 if(pGS
->outputTopology
== TOP_POINT_LIST
) {
1516 pGS
->isSingleStream
= false;
1518 pGS
->isSingleStream
= true;
1519 pGS
->singleStreamID
= 0;
1522 pGS
->vertexAttribOffset
= VERTEX_POSITION_SLOT
;
1523 pGS
->inputVertStride
= pGS
->numInputAttribs
+ pGS
->vertexAttribOffset
;
1524 pGS
->outputVertexSize
= SWR_VTX_NUM_SLOTS
;
1525 pGS
->controlDataSize
= 8; // GS ouputs max of 8 32B units
1526 pGS
->controlDataOffset
= VERTEX_COUNT_SIZE
;
1527 pGS
->outputVertexOffset
= pGS
->controlDataOffset
+ CONTROL_HEADER_SIZE
;
1529 pGS
->allocationSize
=
1530 VERTEX_COUNT_SIZE
+ // vertex count
1531 CONTROL_HEADER_SIZE
+ // control header
1532 (SWR_VTX_NUM_SLOTS
* 16) * // sizeof vertex
1533 pGS
->maxNumVerts
; // num verts
1535 struct swr_geometry_shader
*gs
= ctx
->gs
;
1537 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1538 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1540 memset(outputs
, 0, sizeof(outputs
));
1542 AttrBuilder attrBuilder
;
1543 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1545 std::vector
<Type
*> gsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1546 PointerType::get(mInt8Ty
, 0),
1547 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
1548 FunctionType
*vsFuncType
=
1549 FunctionType::get(Type::getVoidTy(JM()->mContext
), gsArgs
, false);
1551 // create new vertex shader function
1552 auto pFunction
= Function::Create(vsFuncType
,
1553 GlobalValue::ExternalLinkage
,
1555 JM()->mpCurrentModule
);
1556 #if LLVM_VERSION_MAJOR < 5
1557 AttributeSet attrSet
= AttributeSet::get(
1558 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1559 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1561 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1564 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1565 IRB()->SetInsertPoint(block
);
1566 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1568 auto argitr
= pFunction
->arg_begin();
1569 Value
*hPrivateData
= &*argitr
++;
1570 hPrivateData
->setName("hPrivateData");
1571 Value
*pWorkerData
= &*argitr
++;
1572 pWorkerData
->setName("pWorkerData");
1573 Value
*pGsCtx
= &*argitr
++;
1574 pGsCtx
->setName("gsCtx");
1577 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantGS
)});
1578 consts_ptr
->setName("gs_constants");
1579 Value
*const_sizes_ptr
=
1580 GEP(hPrivateData
, {0, swr_draw_context_num_constantsGS
});
1581 const_sizes_ptr
->setName("num_gs_constants");
1583 struct lp_build_sampler_soa
*sampler
=
1584 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_GEOMETRY
);
1585 assert(sampler
!= nullptr);
1587 struct lp_bld_tgsi_system_values system_values
;
1588 memset(&system_values
, 0, sizeof(system_values
));
1589 system_values
.prim_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_PrimitiveID
}));
1590 system_values
.invocation_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_InstanceID
}));
1592 std::vector
<Constant
*> mapConstants
;
1593 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1594 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1595 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1596 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1598 unsigned vs_slot
= locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
1599 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1601 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
1603 if (ctx
->vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
1606 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
1607 vs_slot
= VERTEX_POSITION_SLOT
;
1609 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
1610 mapConstants
.push_back(C(vs_slot
));
1613 struct lp_build_mask_context mask
;
1614 Value
*mask_val
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_mask
}, "gsMask");
1615 lp_build_mask_begin(&mask
, gallivm
,
1616 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1618 // zero out cut buffer so we can load/modify/store bits
1619 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
1621 Value
* pStream
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
1622 #if LLVM_VERSION_MAJOR >= 10
1623 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH
));
1625 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, sizeof(float) * KNOB_SIMD_WIDTH
);
1629 struct swr_gs_llvm_iface gs_iface
;
1630 gs_iface
.base
.fetch_input
= ::swr_gs_llvm_fetch_input
;
1631 gs_iface
.base
.emit_vertex
= ::swr_gs_llvm_emit_vertex
;
1632 gs_iface
.base
.end_primitive
= ::swr_gs_llvm_end_primitive
;
1633 gs_iface
.base
.gs_epilogue
= ::swr_gs_llvm_epilogue
;
1634 gs_iface
.pBuilder
= this;
1635 gs_iface
.pGsCtx
= pGsCtx
;
1636 gs_iface
.pGsState
= pGS
;
1637 gs_iface
.num_outputs
= gs
->info
.base
.num_outputs
;
1638 gs_iface
.num_verts_per_prim
=
1639 u_vertices_per_prim((pipe_prim_type
)info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
1640 gs_iface
.info
= info
;
1641 gs_iface
.pVtxAttribMap
= vtxAttribMap
;
1643 struct lp_build_tgsi_params params
;
1644 memset(¶ms
, 0, sizeof(params
));
1645 params
.type
= lp_type_float_vec(32, 32 * 8);
1646 params
.mask
= & mask
;
1647 params
.consts_ptr
= wrap(consts_ptr
);
1648 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1649 params
.system_values
= &system_values
;
1650 params
.inputs
= inputs
;
1651 params
.context_ptr
= wrap(hPrivateData
);
1652 params
.sampler
= sampler
;
1653 params
.info
= &gs
->info
.base
;
1654 params
.gs_iface
= &gs_iface
.base
;
1656 lp_build_tgsi_soa(gallivm
,
1661 lp_build_mask_end(&mask
);
1663 sampler
->destroy(sampler
);
1665 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1669 gallivm_verify_function(gallivm
, wrap(pFunction
));
1670 gallivm_compile_module(gallivm
);
1673 (PFN_GS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1675 debug_printf("geom shader %p\n", pFunc
);
1676 assert(pFunc
&& "Error: GeomShader = NULL");
1678 JM()->mIsModuleFinalized
= true;
1684 BuilderSWR::CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
1686 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
1687 struct tgsi_shader_info
*info
= &ctx
->tes
->info
.base
;
1689 // tessellation is enabled if TES is present
1690 // clear tessellation state here then
1691 memset(pTS
, 0, sizeof(*pTS
));
1693 pTS
->tsEnable
= true;
1695 unsigned tes_prim_mode
= info
->properties
[TGSI_PROPERTY_TES_PRIM_MODE
];
1696 unsigned tes_spacing
= info
->properties
[TGSI_PROPERTY_TES_SPACING
];
1697 bool tes_vertex_order_cw
= info
->properties
[TGSI_PROPERTY_TES_VERTEX_ORDER_CW
];
1698 bool tes_point_mode
= info
->properties
[TGSI_PROPERTY_TES_POINT_MODE
];
1699 SWR_TS_DOMAIN type
= SWR_TS_ISOLINE
;
1700 SWR_TS_PARTITIONING partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1701 SWR_TS_OUTPUT_TOPOLOGY topology
= SWR_TS_OUTPUT_POINT
;
1702 PRIMITIVE_TOPOLOGY postDSTopology
= TOP_POINT_LIST
;
1704 // TESS_TODO: move this to helper functions to improve readability
1705 switch (tes_prim_mode
) {
1706 case PIPE_PRIM_LINES
:
1707 type
= SWR_TS_ISOLINE
;
1708 postDSTopology
= TOP_LINE_LIST
;
1710 case PIPE_PRIM_TRIANGLES
:
1712 postDSTopology
= TOP_TRIANGLE_LIST
;
1714 case PIPE_PRIM_QUADS
:
1716 // See OpenGL spec - quads are tessellated into triangles
1717 postDSTopology
= TOP_TRIANGLE_LIST
;
1723 switch (tes_spacing
) {
1724 case PIPE_TESS_SPACING_FRACTIONAL_ODD
:
1725 partitioning
= SWR_TS_ODD_FRACTIONAL
;
1727 case PIPE_TESS_SPACING_FRACTIONAL_EVEN
:
1728 partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1730 case PIPE_TESS_SPACING_EQUAL
:
1731 partitioning
= SWR_TS_INTEGER
;
1737 if (tes_point_mode
) {
1738 topology
= SWR_TS_OUTPUT_POINT
;
1739 postDSTopology
= TOP_POINT_LIST
;
1741 else if (tes_prim_mode
== PIPE_PRIM_LINES
) {
1742 topology
= SWR_TS_OUTPUT_LINE
;
1744 else if (tes_vertex_order_cw
) {
1745 topology
= SWR_TS_OUTPUT_TRI_CW
;
1748 topology
= SWR_TS_OUTPUT_TRI_CCW
;
1752 pTS
->tsOutputTopology
= topology
;
1753 pTS
->partitioning
= partitioning
;
1754 pTS
->numDsOutputAttribs
= info
->num_outputs
;
1755 pTS
->postDSTopology
= postDSTopology
;
1757 pTS
->dsAllocationSize
= SWR_VTX_NUM_SLOTS
* MAX_NUM_VERTS_PER_PRIM
;
1758 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1759 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1760 pTS
->dsOutVtxAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1762 struct swr_tess_evaluation_shader
*tes
= ctx
->tes
;
1764 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1765 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1767 memset(outputs
, 0, sizeof(outputs
));
1769 AttrBuilder attrBuilder
;
1770 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1772 std::vector
<Type
*> tesArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1773 PointerType::get(mInt8Ty
, 0),
1774 PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
1775 FunctionType
*tesFuncType
=
1776 FunctionType::get(Type::getVoidTy(JM()->mContext
), tesArgs
, false);
1778 // create new vertex shader function
1779 auto pFunction
= Function::Create(tesFuncType
,
1780 GlobalValue::ExternalLinkage
,
1782 JM()->mpCurrentModule
);
1784 #if LLVM_VERSION_MAJOR < 5
1785 AttributeSet attrSet
= AttributeSet::get(
1786 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1787 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1789 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1792 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1793 IRB()->SetInsertPoint(block
);
1794 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1796 auto argitr
= pFunction
->arg_begin();
1797 Value
*hPrivateData
= &*argitr
++;
1798 hPrivateData
->setName("hPrivateData");
1799 Value
*pWorkerData
= &*argitr
++;
1800 pWorkerData
->setName("pWorkerData");
1801 Value
*pTesCtx
= &*argitr
++;
1802 pTesCtx
->setName("tesCtx");
1805 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTES
)});
1806 consts_ptr
->setName("tes_constants");
1807 Value
*const_sizes_ptr
=
1808 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTES
});
1809 const_sizes_ptr
->setName("num_tes_constants");
1811 struct lp_build_sampler_soa
*sampler
=
1812 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_EVAL
);
1813 assert(sampler
!= nullptr);
1815 struct lp_bld_tgsi_system_values system_values
;
1816 memset(&system_values
, 0, sizeof(system_values
));
1818 // Load and calculate system values
1819 // Tessellation coordinates (gl_TessCoord)
1820 Value
*vecOffset
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
}, "vecOffset");
1821 Value
*vecStride
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorStride
}, "vecStride");
1822 Value
*vecIndex
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
});
1824 Value
* tess_coord
= ALLOCA(ArrayType::get(mSimdFP32Ty
, 3));
1826 Value
*tessCoordU
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainU
}), {vecIndex
}, "tessCoordU");
1827 STORE(tessCoordU
, tess_coord
, {0, 0});
1828 Value
*tessCoordV
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainV
}), {vecIndex
}, "tessCoordV");
1829 STORE(tessCoordV
, tess_coord
, {0, 1});
1830 Value
*tessCoordW
= FSUB(FSUB(VIMMED1(1.0f
), tessCoordU
), tessCoordV
, "tessCoordW");
1831 STORE(tessCoordW
, tess_coord
, {0, 2});
1832 system_values
.tess_coord
= wrap(tess_coord
);
1835 system_values
.prim_id
= wrap(VBROADCAST(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_PrimitiveID
}), "PrimitiveID"));
1837 // Tessellation factors
1838 Value
* pPatch
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
});
1839 Value
* pTessFactors
= GEP(pPatch
, {C(0), C(ScalarPatch_tessFactors
)});
1841 assert(SWR_NUM_OUTER_TESS_FACTORS
== 4);
1842 Value
* sys_value_outer_factors
= UndefValue::get(VectorType::get(mFP32Ty
, 4));
1843 for (unsigned i
= 0; i
< SWR_NUM_OUTER_TESS_FACTORS
; i
++) {
1844 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors
, i
});
1845 sys_value_outer_factors
= VINSERT(sys_value_outer_factors
, v
, i
, "gl_TessLevelOuter");
1847 system_values
.tess_outer
= wrap(sys_value_outer_factors
);
1849 assert(SWR_NUM_INNER_TESS_FACTORS
== 2);
1850 Value
* sys_value_inner_factors
= UndefValue::get(VectorType::get(mFP32Ty
, 4));
1851 for (unsigned i
= 0; i
< SWR_NUM_INNER_TESS_FACTORS
; i
++) {
1852 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors
, i
});
1853 sys_value_inner_factors
= VINSERT(sys_value_inner_factors
, v
, i
, "gl_TessLevelInner");
1855 system_values
.tess_inner
= wrap(sys_value_inner_factors
);
1859 lp_build_print_value(gallivm
, "tess_coord = ", system_values
.tess_coord
);
1862 struct tgsi_shader_info
*pPrevShader
= nullptr;
1865 pPrevShader
= &ctx
->tcs
->info
.base
;
1868 pPrevShader
= &ctx
->vs
->info
.base
;
1871 // Figure out how many per-patch attributes we have
1872 unsigned perPatchAttrs
= 0;
1873 unsigned genericAttrs
= 0;
1874 unsigned tessLevelAttrs
= 0;
1875 unsigned sgvAttrs
= 0;
1876 for (unsigned slot
= 0; slot
< pPrevShader
->num_outputs
; slot
++) {
1877 switch (pPrevShader
->output_semantic_name
[slot
]) {
1878 case TGSI_SEMANTIC_PATCH
:
1881 case TGSI_SEMANTIC_GENERIC
:
1884 case TGSI_SEMANTIC_TESSINNER
:
1885 case TGSI_SEMANTIC_TESSOUTER
:
1888 case TGSI_SEMANTIC_POSITION
:
1889 case TGSI_SEMANTIC_CLIPDIST
:
1890 case TGSI_SEMANTIC_PSIZE
:
1894 assert(!"Unknown semantic input in TES");
1898 std::vector
<Constant
*> mapConstants
;
1899 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1900 Value
*patchAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1901 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1902 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1903 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1905 // Where in TCS output is my attribute?
1906 // TESS_TODO: revisit after implement pass-through TCS
1907 unsigned tcs_slot
= locate_linkage(semantic_name
, semantic_idx
, pPrevShader
);
1908 assert(tcs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1910 // Skip tessellation levels - these go to the tessellator, not TES
1911 switch (semantic_name
) {
1912 case TGSI_SEMANTIC_GENERIC
:
1913 tcs_slot
= tcs_slot
+ VERTEX_ATTRIB_START_SLOT
- sgvAttrs
- tessLevelAttrs
;
1915 case TGSI_SEMANTIC_PATCH
:
1916 tcs_slot
= semantic_idx
;
1918 case TGSI_SEMANTIC_POSITION
:
1919 tcs_slot
= VERTEX_POSITION_SLOT
;
1921 case TGSI_SEMANTIC_CLIPDIST
:
1922 case TGSI_SEMANTIC_PSIZE
:
1925 assert(!"Unexpected semantic found while builiding TES input map");
1927 if (semantic_name
== TGSI_SEMANTIC_PATCH
) {
1928 STORE(C(tcs_slot
), patchAttribMap
, {0, slot
});
1930 STORE(C(tcs_slot
), vtxAttribMap
, {0, slot
});
1932 mapConstants
.push_back(C(tcs_slot
));
1935 // Build execution mask
1936 struct lp_build_mask_context mask
;
1937 Value
*mask_val
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_mask
}, "tesMask");
1940 lp_build_print_value(gallivm
, "TES execution mask: ", wrap(mask_val
));
1942 lp_build_mask_begin(&mask
, gallivm
,
1943 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1945 struct swr_tes_llvm_iface tes_iface
;
1947 tes_iface
.base
.fetch_vertex_input
= ::swr_tes_llvm_fetch_vtx_input
;
1948 tes_iface
.base
.fetch_patch_input
= ::swr_tes_llvm_fetch_patch_input
;
1950 tes_iface
.pBuilder
= this;
1951 tes_iface
.pTesCtx
= pTesCtx
;
1952 tes_iface
.pTsState
= pTS
;
1953 tes_iface
.num_outputs
= tes
->info
.base
.num_outputs
;
1954 tes_iface
.info
= info
;
1955 tes_iface
.pVtxAttribMap
= vtxAttribMap
;
1956 tes_iface
.pPatchAttribMap
= patchAttribMap
;
1958 struct lp_build_tgsi_params params
;
1959 memset(¶ms
, 0, sizeof(params
));
1960 params
.type
= lp_type_float_vec(32, 32 * 8);
1961 params
.mask
= & mask
;
1962 params
.consts_ptr
= wrap(consts_ptr
);
1963 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1964 params
.system_values
= &system_values
;
1965 params
.inputs
= inputs
;
1966 params
.context_ptr
= wrap(hPrivateData
);
1967 params
.sampler
= sampler
;
1968 params
.info
= &tes
->info
.base
;
1969 params
.tes_iface
= &tes_iface
.base
;
1972 lp_build_tgsi_soa(gallivm
,
1977 lp_build_mask_end(&mask
);
1979 sampler
->destroy(sampler
);
1981 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1983 // Write output attributes
1984 Value
*dclOut
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pOutputData
}, "dclOut");
1986 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
1987 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1988 if (!outputs
[attrib
][channel
])
1991 Value
*val
= LOAD(unwrap(outputs
[attrib
][channel
]));;
1992 Value
*attribOffset
=
1993 LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_outVertexAttribOffset
});
1995 // Assume we write possition
1996 Value
* outputSlot
= C(VERTEX_POSITION_SLOT
);
1997 if (tes
->info
.base
.output_semantic_name
[attrib
] != TGSI_SEMANTIC_POSITION
) {
1998 // No, it's a generic attribute, not a position - let's calculate output slot
1999 uint32_t outSlot
= attrib
;
2000 if (tes
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
) {
2001 // this shader will write position, so in shader's term
2002 // output starts at attrib 1, but we will handle that separately,
2003 // so let's fix the outSlot
2006 outputSlot
= ADD(attribOffset
, C(outSlot
));
2009 Value
*attribVecIndex
=
2010 ADD(MUL(vecStride
, MUL(outputSlot
, C(4))), vecOffset
);
2012 uint32_t outputComponent
= 0;
2013 uint32_t curComp
= outputComponent
+ channel
;
2014 auto outValIndex
= ADD(attribVecIndex
, MUL(vecStride
, C(curComp
)));
2015 STOREV(val
, dclOut
, {outValIndex
});
2017 if (verbose_shader
) {
2018 lp_build_printf(gallivm
,
2019 "TES output [%d][%d]",
2022 lp_build_print_value(gallivm
, " = ", wrap(val
));
2029 JM()->DumpToFile(pFunction
, "src");
2030 gallivm_verify_function(gallivm
, wrap(pFunction
));
2032 gallivm_compile_module(gallivm
);
2033 JM()->DumpToFile(pFunction
, "optimized");
2035 PFN_TES_FUNC pFunc
=
2036 (PFN_TES_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2038 debug_printf("tess evaluation shader %p\n", pFunc
);
2039 assert(pFunc
&& "Error: TessEvaluationShader = NULL");
2041 JM()->DumpAsm(pFunction
, "asm");
2043 JM()->mIsModuleFinalized
= true;
2049 BuilderSWR::CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
2051 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
2052 struct tgsi_shader_info
*info
= &ctx
->tcs
->info
.base
;
2054 pTS
->numHsInputAttribs
= info
->num_inputs
;
2055 pTS
->numHsOutputAttribs
= info
->num_outputs
;
2057 pTS
->hsAllocationSize
= sizeof(ScalarPatch
);
2059 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
2060 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
2062 struct swr_tess_control_shader
*tcs
= ctx
->tcs
;
2064 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2065 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2067 memset(outputs
, 0, sizeof(outputs
));
2069 AttrBuilder attrBuilder
;
2070 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2072 std::vector
<Type
*> tcsArgs
{
2073 PointerType::get(Gen_swr_draw_context(JM()), 0),
2074 PointerType::get(mInt8Ty
, 0),
2075 PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
2076 FunctionType
*tcsFuncType
=
2077 FunctionType::get(Type::getVoidTy(JM()->mContext
), tcsArgs
, false);
2079 // create new vertex shader function
2080 auto pFunction
= Function::Create(tcsFuncType
,
2081 GlobalValue::ExternalLinkage
,
2083 JM()->mpCurrentModule
);
2085 #if LLVM_VERSION_MAJOR < 5
2086 AttributeSet attrSet
= AttributeSet::get(
2087 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2088 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2090 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2093 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2094 IRB()->SetInsertPoint(block
);
2095 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2097 auto argitr
= pFunction
->arg_begin();
2098 Value
*hPrivateData
= &*argitr
++;
2099 hPrivateData
->setName("hPrivateData");
2100 Value
*pWorkerData
= &*argitr
++;
2101 pWorkerData
->setName("pWorkerData");
2102 Value
*pTcsCtx
= &*argitr
++;
2103 pTcsCtx
->setName("tcsCtx");
2106 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTCS
)});
2107 consts_ptr
->setName("tcs_constants");
2108 Value
*const_sizes_ptr
=
2109 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTCS
});
2110 const_sizes_ptr
->setName("num_tcs_constants");
2112 struct lp_build_sampler_soa
*sampler
=
2113 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_CTRL
);
2114 assert(sampler
!= nullptr);
2116 struct lp_bld_tgsi_system_values system_values
;
2117 memset(&system_values
, 0, sizeof(system_values
));
2119 system_values
.prim_id
=
2120 wrap(LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_PrimitiveID
}));
2122 system_values
.invocation_id
= wrap(VBROADCAST(C(0)));
2123 system_values
.vertices_in
= wrap(C(tcs
->vertices_per_patch
));
2125 if (verbose_shader
) {
2126 lp_build_print_value(gallivm
, "TCS::prim_id = ", system_values
.prim_id
);
2127 lp_build_print_value(gallivm
, "TCS::invocation_id = ", system_values
.invocation_id
);
2128 lp_build_print_value(gallivm
, "TCS::vertices_in = ", system_values
.vertices_in
);
2131 std::vector
<Constant
*> mapConstants
;
2132 Value
*vtxAttribMap
=
2133 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2135 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
2136 ubyte semantic_name
= info
->input_semantic_name
[slot
];
2137 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
2140 locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
2141 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
2143 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
2145 if (ctx
->vs
->info
.base
.output_semantic_name
[0]
2146 == TGSI_SEMANTIC_POSITION
)
2149 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
2150 vs_slot
= VERTEX_POSITION_SLOT
;
2152 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
2153 mapConstants
.push_back(C(vs_slot
));
2156 // Prepare map of output attributes. Needed when shader instance wants
2157 // to read own output or output of other instance, which is allowed in TCS
2158 Value
*vtxOutputAttribMap
=
2159 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2160 // Map for per-patch attributes
2161 Value
*patchOutputAttribMap
=
2162 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2163 for (unsigned slot
= 0; slot
< info
->num_outputs
; slot
++) {
2164 ubyte name
= info
->output_semantic_name
[slot
];
2165 int32_t idx
= info
->output_semantic_index
[slot
];
2166 if (name
== TGSI_SEMANTIC_PATCH
) {
2167 STORE(C(idx
), patchOutputAttribMap
, {0, slot
});
2169 int32_t target_slot
= slot
;
2170 if (name
== TGSI_SEMANTIC_GENERIC
) {
2171 target_slot
+= VERTEX_ATTRIB_START_SLOT
;
2173 // Now normalize target slot
2174 for (ubyte as
= 0; as
< slot
; as
++) {
2175 ubyte name
= info
->output_semantic_name
[as
];
2177 case TGSI_SEMANTIC_TESSOUTER
:
2178 case TGSI_SEMANTIC_TESSINNER
:
2179 case TGSI_SEMANTIC_PATCH
:
2180 case TGSI_SEMANTIC_POSITION
:
2184 if (name
== TGSI_SEMANTIC_POSITION
) {
2185 target_slot
= VERTEX_POSITION_SLOT
;
2187 STORE(C(target_slot
), vtxOutputAttribMap
, {0, slot
});
2188 mapConstants
.push_back(C(target_slot
));
2192 struct lp_build_mask_context mask
;
2193 Value
*mask_val
= LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_mask
}, "tcsMask");
2194 lp_build_mask_begin(
2195 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
2197 struct swr_tcs_llvm_iface tcs_iface
;
2199 tcs_iface
.base
.emit_store_output
= ::swr_tcs_llvm_store_output
;
2200 tcs_iface
.base
.emit_fetch_input
= ::swr_tcs_llvm_fetch_input
;
2201 tcs_iface
.base
.emit_fetch_output
= ::swr_tcs_llvm_fetch_output
;
2202 tcs_iface
.base
.emit_barrier
= ::swr_tcs_llvm_emit_barrier
;
2203 tcs_iface
.base
.emit_prologue
= ::swr_tcs_llvm_emit_prologue
;
2204 tcs_iface
.base
.emit_epilogue
= ::swr_tcs_llvm_emit_epilogue
;
2206 tcs_iface
.pBuilder
= this;
2207 tcs_iface
.pTcsCtx
= pTcsCtx
;
2208 tcs_iface
.pTsState
= pTS
;
2209 tcs_iface
.output_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
2210 tcs_iface
.info
= info
;
2211 tcs_iface
.pVtxAttribMap
= vtxAttribMap
;
2212 tcs_iface
.pVtxOutputAttribMap
= vtxOutputAttribMap
;
2213 tcs_iface
.pPatchOutputAttribMap
= patchOutputAttribMap
;
2215 struct lp_build_tgsi_params params
;
2216 memset(¶ms
, 0, sizeof(params
));
2217 params
.type
= lp_type_float_vec(32, 32 * 8);
2218 params
.mask
= &mask
;
2219 params
.consts_ptr
= wrap(consts_ptr
);
2220 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2221 params
.system_values
= &system_values
;
2222 params
.inputs
= inputs
;
2223 params
.context_ptr
= wrap(hPrivateData
);
2224 params
.sampler
= sampler
;
2225 params
.info
= &tcs
->info
.base
;
2226 params
.tcs_iface
= &tcs_iface
.base
;
2228 lp_build_tgsi_soa(gallivm
, tcs
->pipe
.tokens
, ¶ms
, outputs
);
2230 lp_build_mask_end(&mask
);
2232 sampler
->destroy(sampler
);
2234 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2237 JM()->DumpToFile(pFunction
, "src");
2238 gallivm_verify_function(gallivm
, wrap(pFunction
));
2239 gallivm_compile_module(gallivm
);
2240 JM()->DumpToFile(pFunction
, "optimized");
2242 PFN_TCS_FUNC pFunc
=
2243 (PFN_TCS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2245 debug_printf("tess control shader %p\n", pFunc
);
2246 assert(pFunc
&& "Error: TessControlShader = NULL");
2247 JM()->DumpAsm(pFunction
, "asm");
2249 JM()->mIsModuleFinalized
= true;
2256 swr_compile_gs(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
2259 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2261 PFN_GS_FUNC func
= builder
.CompileGS(ctx
, key
);
2263 ctx
->gs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantGS
>(new VariantGS(builder
.gallivm
, func
))));
2268 swr_compile_tcs(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
2271 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2273 PFN_TCS_FUNC func
= builder
.CompileTCS(ctx
, key
);
2275 ctx
->tcs
->map
.insert(
2276 std::make_pair(key
, std::unique_ptr
<VariantTCS
>(new VariantTCS(builder
.gallivm
, func
))));
2282 swr_compile_tes(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
2285 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2287 PFN_TES_FUNC func
= builder
.CompileTES(ctx
, key
);
2289 ctx
->tes
->map
.insert(
2290 std::make_pair(key
, std::unique_ptr
<VariantTES
>(new VariantTES(builder
.gallivm
, func
))));
2296 BuilderSWR::WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
, unsigned slot
, unsigned channel
)
2298 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
2299 // interleave the simdvertex components into the dest simd16vertex
2300 // slot16offset = slot8offset * 2
2301 // comp16offset = comp8offset * 2 + alternateOffset
2303 Value
*offset
= LOAD(pVsContext
, { 0, SWR_VS_CONTEXT_AlternateOffset
});
2304 Value
*pOut
= GEP(pVtxOutput
, { C(0), C(0), C(slot
* 2), offset
} );
2305 STORE(pVal
, pOut
, {channel
* 2});
2307 Value
*pOut
= GEP(pVtxOutput
, {0, 0, slot
});
2308 STORE(pVal
, pOut
, {0, channel
});
2309 if (verbose_vs_shader
) {
2310 lp_build_printf(gallivm
, "VS: Storing on slot %d, channel %d: ", C(slot
), C(channel
));
2311 lp_build_print_value(gallivm
, "", wrap(pVal
));
2317 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2319 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
2321 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2322 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2324 memset(outputs
, 0, sizeof(outputs
));
2326 AttrBuilder attrBuilder
;
2327 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2329 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2330 PointerType::get(mInt8Ty
, 0),
2331 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
2332 FunctionType
*vsFuncType
=
2333 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
2335 // create new vertex shader function
2336 auto pFunction
= Function::Create(vsFuncType
,
2337 GlobalValue::ExternalLinkage
,
2339 JM()->mpCurrentModule
);
2340 #if LLVM_VERSION_MAJOR < 5
2341 AttributeSet attrSet
= AttributeSet::get(
2342 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2343 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2345 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2348 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2349 IRB()->SetInsertPoint(block
);
2350 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2352 auto argitr
= pFunction
->arg_begin();
2353 Value
*hPrivateData
= &*argitr
++;
2354 hPrivateData
->setName("hPrivateData");
2355 Value
*pWorkerData
= &*argitr
++;
2356 pWorkerData
->setName("pWorkerData");
2357 Value
*pVsCtx
= &*argitr
++;
2358 pVsCtx
->setName("vsCtx");
2360 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
2362 consts_ptr
->setName("vs_constants");
2363 Value
*const_sizes_ptr
=
2364 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
2365 const_sizes_ptr
->setName("num_vs_constants");
2367 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
2369 vtxInput
= BITCAST(vtxInput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2372 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2373 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
2374 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2375 if (mask
& (1 << channel
)) {
2376 inputs
[attrib
][channel
] =
2377 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
2382 struct lp_build_sampler_soa
*sampler
=
2383 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
2384 assert(sampler
!= nullptr);
2386 struct lp_bld_tgsi_system_values system_values
;
2387 memset(&system_values
, 0, sizeof(system_values
));
2388 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
2391 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID16
}));
2393 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
2397 uint32_t vectorWidth
= mVWidth16
;
2399 uint32_t vectorWidth
= mVWidth
;
2402 struct lp_build_tgsi_params params
;
2403 memset(¶ms
, 0, sizeof(params
));
2404 params
.type
= lp_type_float_vec(32, 32 * vectorWidth
);
2405 params
.consts_ptr
= wrap(consts_ptr
);
2406 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2407 params
.system_values
= &system_values
;
2408 params
.inputs
= inputs
;
2409 params
.context_ptr
= wrap(hPrivateData
);
2410 params
.sampler
= sampler
;
2411 params
.info
= &swr_vs
->info
.base
;
2413 lp_build_tgsi_soa(gallivm
,
2414 swr_vs
->pipe
.tokens
,
2418 sampler
->destroy(sampler
);
2420 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2422 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
2424 vtxOutput
= BITCAST(vtxOutput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2427 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2428 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
2429 if (!outputs
[attrib
][channel
])
2435 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
2436 if (channel
!= VERTEX_SGV_POINT_SIZE_COMP
)
2438 val
= LOAD(unwrap(outputs
[attrib
][0]));
2439 outSlot
= VERTEX_SGV_SLOT
;
2440 } else if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
2441 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2442 outSlot
= VERTEX_POSITION_SLOT
;
2444 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2445 outSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
2446 if (swr_vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
2450 WriteVS(val
, pVsCtx
, vtxOutput
, outSlot
, channel
);
2454 if (ctx
->rasterizer
->clip_plane_enable
||
2455 swr_vs
->info
.base
.culldist_writemask
) {
2456 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
2459 if (swr_vs
->info
.base
.writes_clipvertex
) {
2460 cv
= locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
2461 &swr_vs
->info
.base
);
2463 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2464 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
2465 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
2471 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2472 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
2473 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
2474 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
2475 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
2477 tgsi_shader_info
*pLastFE
= &ctx
->vs
->info
.base
;
2480 pLastFE
= &ctx
->gs
->info
.base
;
2482 else if (ctx
->tes
) {
2483 pLastFE
= &ctx
->tes
->info
.base
;
2485 else if (ctx
->tcs
) {
2486 pLastFE
= &ctx
->tcs
->info
.base
;
2489 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
2490 // clip distance overrides user clip planes
2491 if ((pLastFE
->clipdist_writemask
& clip_mask
& (1 << val
)) ||
2492 ((pLastFE
->culldist_writemask
<< pLastFE
->num_written_clipdistance
) & (1 << val
))) {
2493 unsigned cv
= locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1, pLastFE
);
2494 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2496 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
2497 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2499 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
2500 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2505 if (!(clip_mask
& (1 << val
)))
2508 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
2509 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
2510 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
2511 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
2513 Value
*bpx
= VBROADCAST_16(px
);
2514 Value
*bpy
= VBROADCAST_16(py
);
2515 Value
*bpz
= VBROADCAST_16(pz
);
2516 Value
*bpw
= VBROADCAST_16(pw
);
2518 Value
*bpx
= VBROADCAST(px
);
2519 Value
*bpy
= VBROADCAST(py
);
2520 Value
*bpz
= VBROADCAST(pz
);
2521 Value
*bpw
= VBROADCAST(pw
);
2523 Value
*dist
= FADD(FMUL(unwrap(cx
), bpx
),
2524 FADD(FMUL(unwrap(cy
), bpy
),
2525 FADD(FMUL(unwrap(cz
), bpz
),
2526 FMUL(unwrap(cw
), bpw
))));
2529 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2531 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2537 JM()->DumpToFile(pFunction
, "vs_function1");
2538 gallivm_verify_function(gallivm
, wrap(pFunction
));
2539 gallivm_compile_module(gallivm
);
2540 JM()->DumpToFile(pFunction
, "vs_function2");
2542 // lp_debug_dump_value(func);
2544 PFN_VERTEX_FUNC pFunc
=
2545 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2547 JM()->DumpAsm(pFunction
, "vs_function_asm");
2548 debug_printf("vert shader %p\n", pFunc
);
2549 assert(pFunc
&& "Error: VertShader = NULL");
2551 JM()->mIsModuleFinalized
= true;
2557 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2559 if (!ctx
->vs
->pipe
.tokens
)
2563 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2565 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
2567 ctx
->vs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantVS
>(new VariantVS(builder
.gallivm
, func
))));
2572 swr_so_adjust_attrib(unsigned in_attrib
,
2573 swr_vertex_shader
*swr_vs
)
2575 ubyte semantic_name
;
2578 attrib
= in_attrib
+ VERTEX_ATTRIB_START_SLOT
;
2581 semantic_name
= swr_vs
->info
.base
.output_semantic_name
[in_attrib
];
2582 if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
2583 attrib
= VERTEX_POSITION_SLOT
;
2584 } else if (semantic_name
== TGSI_SEMANTIC_PSIZE
) {
2585 attrib
= VERTEX_SGV_SLOT
;
2586 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) {
2587 attrib
= VERTEX_SGV_SLOT
;
2589 if (swr_vs
->info
.base
.writes_position
) {
2599 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
2601 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2602 if ((info
->output_semantic_name
[i
] == name
)
2603 && (info
->output_semantic_index
[i
] == index
)) {
2612 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
2614 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
2616 struct tgsi_shader_info
*pPrevShader
;
2618 pPrevShader
= &ctx
->gs
->info
.base
;
2620 pPrevShader
= &ctx
->tes
->info
.base
;
2622 pPrevShader
= &ctx
->vs
->info
.base
;
2624 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2625 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2627 memset(inputs
, 0, sizeof(inputs
));
2628 memset(outputs
, 0, sizeof(outputs
));
2630 struct lp_build_sampler_soa
*sampler
= NULL
;
2632 AttrBuilder attrBuilder
;
2633 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2635 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2636 PointerType::get(mInt8Ty
, 0),
2637 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
2638 FunctionType
*funcType
=
2639 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
2641 auto pFunction
= Function::Create(funcType
,
2642 GlobalValue::ExternalLinkage
,
2644 JM()->mpCurrentModule
);
2645 #if LLVM_VERSION_MAJOR < 5
2646 AttributeSet attrSet
= AttributeSet::get(
2647 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2648 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2650 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2653 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2654 IRB()->SetInsertPoint(block
);
2655 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2657 auto args
= pFunction
->arg_begin();
2658 Value
*hPrivateData
= &*args
++;
2659 hPrivateData
->setName("hPrivateData");
2660 Value
*pWorkerData
= &*args
++;
2661 pWorkerData
->setName("pWorkerData");
2662 Value
*pPS
= &*args
++;
2663 pPS
->setName("psCtx");
2665 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
2666 consts_ptr
->setName("fs_constants");
2667 Value
*const_sizes_ptr
=
2668 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
2669 const_sizes_ptr
->setName("num_fs_constants");
2671 // load *pAttribs, *pPerspAttribs
2672 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
2673 Value
*pPerspAttribs
=
2674 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
2676 swr_fs
->constantMask
= 0;
2677 swr_fs
->flatConstantMask
= 0;
2678 swr_fs
->pointSpriteMask
= 0;
2680 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2681 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
2682 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
2683 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
2689 Value
*vi
= nullptr, *vj
= nullptr;
2690 switch (interpLoc
) {
2691 case TGSI_INTERPOLATE_LOC_CENTER
:
2692 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
2693 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
2695 case TGSI_INTERPOLATE_LOC_CENTROID
:
2696 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
2697 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
2699 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2700 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
2701 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
2706 Value
*vw
= nullptr, *pAttribs
;
2707 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2708 interpMode
== TGSI_INTERPOLATE_COLOR
) {
2709 pAttribs
= pPerspAttribs
;
2710 switch (interpLoc
) {
2711 case TGSI_INTERPOLATE_LOC_CENTER
:
2712 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
2714 case TGSI_INTERPOLATE_LOC_CENTROID
:
2715 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
2717 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2718 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
2722 pAttribs
= pRawAttribs
;
2728 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
2729 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
2731 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
2733 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
2734 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
2735 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
2737 inputs
[attrib
][0] = wrap(ff
);
2738 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2739 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2740 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2742 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
2743 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
2744 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
2745 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
2746 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
2748 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
2749 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
2751 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
2753 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
2755 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) { // gl_Layer
2756 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_renderTargetArrayIndex
});
2757 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vRenderTargetArrayIndex");
2758 inputs
[attrib
][0] = wrap(ff
);
2759 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2760 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2761 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2763 } else if (semantic_name
== TGSI_SEMANTIC_VIEWPORT_INDEX
) { // gl_ViewportIndex
2764 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_viewportIndex
});
2765 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vViewportIndex");
2766 inputs
[attrib
][0] = wrap(ff
);
2767 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2768 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2769 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2772 unsigned linkedAttrib
=
2773 locate_linkage(semantic_name
, semantic_idx
, pPrevShader
) - 1;
2775 uint32_t extraAttribs
= 0;
2776 if (semantic_name
== TGSI_SEMANTIC_PRIMID
&& !ctx
->gs
) {
2777 /* non-gs generated primID - need to grab from swizzleMap override */
2778 linkedAttrib
= pPrevShader
->num_outputs
- 1;
2779 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2781 } else if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
2782 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
2783 /* we add an extra attrib to the backendState in swr_update_derived. */
2784 linkedAttrib
= pPrevShader
->num_outputs
+ extraAttribs
- 1;
2785 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
2787 } else if (linkedAttrib
+ 1 == 0xFFFFFFFF) {
2788 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
2789 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2790 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2791 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2792 /* If we're reading in color and 2-sided lighting is enabled, we have
2795 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
2798 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2799 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2800 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2801 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
2805 unsigned bcolorAttrib
= 0xFFFFFFFF;
2806 Value
*offset
= NULL
;
2807 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
2808 bcolorAttrib
= locate_linkage(
2809 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, pPrevShader
);
2810 /* Neither front nor back colors were available. Nothing to load. */
2811 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
2813 /* If there is no front color, just always use the back color. */
2814 if (linkedAttrib
+ 1 == 0xFFFFFFFF)
2815 linkedAttrib
= bcolorAttrib
;
2817 if (bcolorAttrib
!= 0xFFFFFFFF) {
2819 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2820 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
2821 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2822 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
2825 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
2829 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
2831 offset
= MUL(back
, C(diff
));
2832 offset
->setName("offset");
2837 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2838 if (mask
& (1 << channel
)) {
2839 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
2840 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
2841 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
2844 indexA
= ADD(indexA
, offset
);
2845 indexB
= ADD(indexB
, offset
);
2846 indexC
= ADD(indexC
, offset
);
2849 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
2850 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
2851 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
2853 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2854 inputs
[attrib
][channel
] = wrap(va
);
2856 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
2860 Value
*interp
= FMUL(va
, vi
);
2861 Value
*interp1
= FMUL(vb
, vj
);
2862 interp
= FADD(interp
, interp1
);
2863 interp
= FADD(interp
, vc
);
2864 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2865 interpMode
== TGSI_INTERPOLATE_COLOR
)
2866 interp
= FMUL(interp
, vw
);
2867 inputs
[attrib
][channel
] = wrap(interp
);
2873 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
2874 assert(sampler
!= nullptr);
2876 struct lp_bld_tgsi_system_values system_values
;
2877 memset(&system_values
, 0, sizeof(system_values
));
2879 struct lp_build_mask_context mask
;
2880 bool uses_mask
= false;
2882 if (swr_fs
->info
.base
.uses_kill
||
2883 key
.poly_stipple_enable
) {
2884 Value
*vActiveMask
= NULL
;
2885 if (swr_fs
->info
.base
.uses_kill
) {
2886 vActiveMask
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
2888 if (key
.poly_stipple_enable
) {
2889 // first get fragment xy coords and clip to stipple bounds
2890 Value
*vXf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
});
2891 Value
*vYf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
});
2892 Value
*vXu
= FP_TO_UI(vXf
, mSimdInt32Ty
);
2893 Value
*vYu
= FP_TO_UI(vYf
, mSimdInt32Ty
);
2895 // stipple pattern is 32x32, which means that one line of stipple
2896 // is stored in one word:
2897 // vXstipple is bit offset inside 32-bit stipple word
2898 // vYstipple is word index is stipple array
2899 Value
*vXstipple
= AND(vXu
, VIMMED1(0x1f)); // & (32-1)
2900 Value
*vYstipple
= AND(vYu
, VIMMED1(0x1f)); // & (32-1)
2902 // grab stipple pattern base address
2903 Value
*stipplePtr
= GEP(hPrivateData
, {0, swr_draw_context_polyStipple
, 0});
2904 stipplePtr
= BITCAST(stipplePtr
, mInt8PtrTy
);
2906 // peform a gather to grab stipple words for each lane
2907 Value
*vStipple
= GATHERDD(VUNDEF_I(), stipplePtr
, vYstipple
,
2908 VIMMED1(0xffffffff), 4);
2910 // create a mask with one bit corresponding to the x stipple
2911 // and AND it with the pattern, to see if we have a bit
2912 Value
*vBitMask
= LSHR(VIMMED1(0x80000000), vXstipple
);
2913 Value
*vStippleMask
= AND(vStipple
, vBitMask
);
2914 vStippleMask
= ICMP_NE(vStippleMask
, VIMMED1(0));
2915 vStippleMask
= VMASK(vStippleMask
);
2917 if (swr_fs
->info
.base
.uses_kill
) {
2918 vActiveMask
= AND(vActiveMask
, vStippleMask
);
2920 vActiveMask
= vStippleMask
;
2923 lp_build_mask_begin(
2924 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask
));
2928 struct lp_build_tgsi_params params
;
2929 memset(¶ms
, 0, sizeof(params
));
2930 params
.type
= lp_type_float_vec(32, 32 * 8);
2931 params
.mask
= uses_mask
? &mask
: NULL
;
2932 params
.consts_ptr
= wrap(consts_ptr
);
2933 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2934 params
.system_values
= &system_values
;
2935 params
.inputs
= inputs
;
2936 params
.context_ptr
= wrap(hPrivateData
);
2937 params
.sampler
= sampler
;
2938 params
.info
= &swr_fs
->info
.base
;
2940 lp_build_tgsi_soa(gallivm
,
2941 swr_fs
->pipe
.tokens
,
2945 sampler
->destroy(sampler
);
2947 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2949 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
2951 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
2952 case TGSI_SEMANTIC_POSITION
: {
2955 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
2956 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
2959 case TGSI_SEMANTIC_COLOR
: {
2960 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2961 if (!outputs
[attrib
][channel
])
2965 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
2966 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
2967 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
2968 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
2971 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
2977 SWR_PS_CONTEXT_shaded
,
2978 swr_fs
->info
.base
.output_semantic_index
[attrib
],
2986 "unknown output from FS %s[%d]\n",
2987 tgsi_semantic_names
[swr_fs
->info
.base
2988 .output_semantic_name
[attrib
]],
2989 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
2995 LLVMValueRef mask_result
= 0;
2997 mask_result
= lp_build_mask_end(&mask
);
3000 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
3003 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
3008 gallivm_verify_function(gallivm
, wrap(pFunction
));
3010 gallivm_compile_module(gallivm
);
3012 // after the gallivm passes, we have to lower the core's intrinsics
3013 llvm::legacy::FunctionPassManager
lowerPass(JM()->mpCurrentModule
);
3014 lowerPass
.add(createLowerX86Pass(this));
3015 lowerPass
.run(*pFunction
);
3017 PFN_PIXEL_KERNEL kernel
=
3018 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
3019 debug_printf("frag shader %p\n", kernel
);
3020 assert(kernel
&& "Error: FragShader = NULL");
3022 JM()->mIsModuleFinalized
= true;
3028 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
3030 if (!ctx
->fs
->pipe
.tokens
)
3034 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
3036 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
3038 ctx
->fs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantFS
>(new VariantFS(builder
.gallivm
, func
))));