1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 #include <llvm/Config/llvm-config.h>
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
42 #include "gen_state_llvm.h"
44 #include "functionpasses/passes.h"
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53 #include "gallivm/lp_bld_const.h"
54 #include "gallivm/lp_bld_printf.h"
55 #include "gallivm/lp_bld_logic.h"
57 #include "swr_context.h"
58 #include "gen_surf_state_llvm.h"
59 #include "gen_swr_context_llvm.h"
60 #include "swr_resource.h"
61 #include "swr_state.h"
62 #include "swr_screen.h"
65 /////////////////////////////////////////////////////////////////////////
70 #include "util/u_debug.h"
71 #include "util/u_memory.h"
72 #include "util/u_string.h"
74 #include "gallivm/lp_bld_type.h"
76 #if defined(DEBUG) && defined(SWR_VERBOSE_SHADER)
77 constexpr bool verbose_shader
= true;
78 constexpr bool verbose_tcs_shader_in
= true;
79 constexpr bool verbose_tcs_shader_out
= true;
80 constexpr bool verbose_tcs_shader_loop
= true;
81 constexpr bool verbose_vs_shader
= true;
83 constexpr bool verbose_shader
= false;
84 constexpr bool verbose_tcs_shader_in
= false;
85 constexpr bool verbose_tcs_shader_out
= false;
86 constexpr bool verbose_tcs_shader_loop
= false;
87 constexpr bool verbose_vs_shader
= false;
90 using namespace SwrJit
;
93 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
95 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
97 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
100 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
102 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
105 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
107 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
110 bool operator==(const swr_jit_gs_key
&lhs
, const swr_jit_gs_key
&rhs
)
112 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
115 bool operator==(const swr_jit_tcs_key
&lhs
, const swr_jit_tcs_key
&rhs
)
117 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
120 bool operator==(const swr_jit_tes_key
&lhs
, const swr_jit_tes_key
&rhs
)
122 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
127 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
128 struct swr_context
*ctx
,
129 enum pipe_shader_type shader_type
,
130 struct swr_jit_sampler_key
&key
)
132 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
134 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
135 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
136 lp_sampler_static_sampler_state(
137 &key
.sampler
[i
].sampler_state
,
138 ctx
->samplers
[shader_type
][i
]);
143 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
144 * are dx10-style? Can't really have mixed opcodes, at least not
145 * if we want to skip the holes here (without rescanning tgsi).
147 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
148 key
.nr_sampler_views
=
149 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
150 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
151 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1u << (i
& 31))) {
152 const struct pipe_sampler_view
*view
=
153 ctx
->sampler_views
[shader_type
][i
];
154 lp_sampler_static_texture_state(
155 &key
.sampler
[i
].texture_state
, view
);
157 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
158 const struct util_format_description
*desc
=
159 util_format_description(view
->format
);
160 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
161 !util_format_has_depth(desc
))
162 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
167 key
.nr_sampler_views
= key
.nr_samplers
;
168 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
169 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
170 const struct pipe_sampler_view
*view
=
171 ctx
->sampler_views
[shader_type
][i
];
172 lp_sampler_static_texture_state(
173 &key
.sampler
[i
].texture_state
, view
);
175 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
176 const struct util_format_description
*desc
=
177 util_format_description(view
->format
);
178 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
179 !util_format_has_depth(desc
))
180 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
188 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
189 struct swr_context
*ctx
,
190 swr_fragment_shader
*swr_fs
)
192 memset((void*)&key
, 0, sizeof(key
));
194 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
195 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
196 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
198 struct tgsi_shader_info
*pPrevShader
;
200 pPrevShader
= &ctx
->gs
->info
.base
;
202 pPrevShader
= &ctx
->tes
->info
.base
;
204 pPrevShader
= &ctx
->vs
->info
.base
;
206 memcpy(&key
.vs_output_semantic_name
,
207 &pPrevShader
->output_semantic_name
,
208 sizeof(key
.vs_output_semantic_name
));
209 memcpy(&key
.vs_output_semantic_idx
,
210 &pPrevShader
->output_semantic_index
,
211 sizeof(key
.vs_output_semantic_idx
));
213 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
215 key
.poly_stipple_enable
= ctx
->rasterizer
->poly_stipple_enable
&&
216 ctx
->poly_stipple
.prim_is_poly
;
220 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
221 struct swr_context
*ctx
,
222 swr_vertex_shader
*swr_vs
)
224 memset((void*)&key
, 0, sizeof(key
));
226 key
.clip_plane_mask
=
227 swr_vs
->info
.base
.clipdist_writemask
?
228 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
229 ctx
->rasterizer
->clip_plane_enable
;
231 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
235 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
236 struct swr_vertex_element_state
*velems
)
238 memset((void*)&key
, 0, sizeof(key
));
240 key
.fsState
= velems
->fsState
;
244 swr_generate_gs_key(struct swr_jit_gs_key
&key
,
245 struct swr_context
*ctx
,
246 swr_geometry_shader
*swr_gs
)
248 memset((void*)&key
, 0, sizeof(key
));
250 struct tgsi_shader_info
*pPrevShader
= nullptr;
253 pPrevShader
= &ctx
->tes
->info
.base
;
255 pPrevShader
= &ctx
->vs
->info
.base
;
258 memcpy(&key
.vs_output_semantic_name
,
259 &pPrevShader
->output_semantic_name
,
260 sizeof(key
.vs_output_semantic_name
));
261 memcpy(&key
.vs_output_semantic_idx
,
262 &pPrevShader
->output_semantic_index
,
263 sizeof(key
.vs_output_semantic_idx
));
265 swr_generate_sampler_key(swr_gs
->info
, ctx
, PIPE_SHADER_GEOMETRY
, key
);
269 swr_generate_tcs_key(struct swr_jit_tcs_key
&key
,
270 struct swr_context
*ctx
,
271 swr_tess_control_shader
*swr_tcs
)
273 memset((void*)&key
, 0, sizeof(key
));
275 struct tgsi_shader_info
*pPrevShader
= &ctx
->vs
->info
.base
;
277 memcpy(&key
.vs_output_semantic_name
,
278 &pPrevShader
->output_semantic_name
,
279 sizeof(key
.vs_output_semantic_name
));
280 memcpy(&key
.vs_output_semantic_idx
,
281 &pPrevShader
->output_semantic_index
,
282 sizeof(key
.vs_output_semantic_idx
));
284 key
.clip_plane_mask
=
285 swr_tcs
->info
.base
.clipdist_writemask
?
286 swr_tcs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
287 ctx
->rasterizer
->clip_plane_enable
;
289 swr_generate_sampler_key(swr_tcs
->info
, ctx
, PIPE_SHADER_TESS_CTRL
, key
);
293 swr_generate_tes_key(struct swr_jit_tes_key
&key
,
294 struct swr_context
*ctx
,
295 swr_tess_evaluation_shader
*swr_tes
)
297 memset((void*)&key
, 0, sizeof(key
));
299 struct tgsi_shader_info
*pPrevShader
= nullptr;
302 pPrevShader
= &ctx
->tcs
->info
.base
;
305 pPrevShader
= &ctx
->vs
->info
.base
;
308 SWR_ASSERT(pPrevShader
!= nullptr, "TES: No TCS or VS defined");
310 memcpy(&key
.prev_output_semantic_name
,
311 &pPrevShader
->output_semantic_name
,
312 sizeof(key
.prev_output_semantic_name
));
313 memcpy(&key
.prev_output_semantic_idx
,
314 &pPrevShader
->output_semantic_index
,
315 sizeof(key
.prev_output_semantic_idx
));
317 key
.clip_plane_mask
=
318 swr_tes
->info
.base
.clipdist_writemask
?
319 swr_tes
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
320 ctx
->rasterizer
->clip_plane_enable
;
322 swr_generate_sampler_key(swr_tes
->info
, ctx
, PIPE_SHADER_TESS_EVAL
, key
);
325 struct BuilderSWR
: public Builder
{
326 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
329 pJitMgr
->SetupNewModule();
330 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
), NULL
);
331 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
335 gallivm_free_ir(gallivm
);
338 void WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
,
339 unsigned slot
, unsigned channel
);
341 struct gallivm_state
*gallivm
;
342 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
343 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
344 PFN_GS_FUNC
CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
);
345 PFN_TCS_FUNC
CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
);
346 PFN_TES_FUNC
CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
);
348 // GS-specific emit functions
350 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
351 struct lp_build_context
* bld
,
352 boolean is_vindex_indirect
,
353 LLVMValueRef vertex_index
,
354 boolean is_aindex_indirect
,
355 LLVMValueRef attrib_index
,
356 LLVMValueRef swizzle_index
);
358 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
359 struct lp_build_context
* bld
,
360 LLVMValueRef (*outputs
)[4],
361 LLVMValueRef emitted_vertices_vec
,
362 LLVMValueRef stream_id
);
365 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
366 struct lp_build_context
* bld
,
367 LLVMValueRef total_emitted_vertices_vec_ptr
,
368 LLVMValueRef verts_per_prim_vec
,
369 LLVMValueRef emitted_prims_vec
,
370 LLVMValueRef mask_vec
);
373 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
374 LLVMValueRef total_emitted_vertices_vec
,
375 LLVMValueRef emitted_prims_vec
, unsigned stream
);
377 // TCS-specific emit functions
378 void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
);
379 void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
);
382 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
383 struct lp_build_tgsi_context
* bld_base
,
384 boolean is_vindex_indirect
,
385 LLVMValueRef vertex_index
,
386 boolean is_aindex_indirect
,
387 LLVMValueRef attrib_index
,
388 LLVMValueRef swizzle_index
);
391 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
392 struct lp_build_tgsi_context
* bld_base
,
393 boolean is_vindex_indirect
,
394 LLVMValueRef vertex_index
,
395 boolean is_aindex_indirect
,
396 LLVMValueRef attrib_index
,
397 LLVMValueRef swizzle_index
,
401 swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
402 struct lp_build_tgsi_context
* bld_base
,
404 boolean is_vindex_indirect
,
405 LLVMValueRef vertex_index
,
406 boolean is_aindex_indirect
,
407 LLVMValueRef attrib_index
,
408 LLVMValueRef swizzle_index
,
410 LLVMValueRef mask_vec
);
412 // Barrier implementation (available only in TCS)
414 swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
415 struct lp_build_tgsi_context
*bld_base
);
417 // TES-specific emit functions
419 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
420 struct lp_build_tgsi_context
* bld_base
,
421 boolean is_vindex_indirect
,
422 LLVMValueRef vertex_index
,
423 boolean is_aindex_indirect
,
424 LLVMValueRef attrib_index
,
425 LLVMValueRef swizzle_index
);
428 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
429 struct lp_build_tgsi_context
* bld_base
,
430 boolean is_aindex_indirect
,
431 LLVMValueRef attrib_index
,
432 LLVMValueRef swizzle_index
);
435 struct swr_gs_llvm_iface
{
436 struct lp_build_gs_iface base
;
437 struct tgsi_shader_info
*info
;
439 BuilderSWR
*pBuilder
;
442 SWR_GS_STATE
*pGsState
;
443 uint32_t num_outputs
;
444 uint32_t num_verts_per_prim
;
446 Value
*pVtxAttribMap
;
449 struct swr_tcs_llvm_iface
{
450 struct lp_build_tcs_iface base
;
451 struct tgsi_shader_info
*info
;
453 BuilderSWR
*pBuilder
;
456 SWR_TS_STATE
*pTsState
;
458 uint32_t output_vertices
;
460 LLVMValueRef loop_var
;
462 Value
*pVtxAttribMap
;
463 Value
*pVtxOutputAttribMap
;
464 Value
*pPatchOutputAttribMap
;
467 struct swr_tes_llvm_iface
{
468 struct lp_build_tes_iface base
;
469 struct tgsi_shader_info
*info
;
471 BuilderSWR
*pBuilder
;
474 SWR_TS_STATE
*pTsState
;
476 uint32_t num_outputs
;
478 Value
*pVtxAttribMap
;
479 Value
*pPatchAttribMap
;
482 // trampoline functions so we can use the builder llvm construction methods
484 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
485 struct lp_build_context
* bld
,
486 boolean is_vindex_indirect
,
487 LLVMValueRef vertex_index
,
488 boolean is_aindex_indirect
,
489 LLVMValueRef attrib_index
,
490 LLVMValueRef swizzle_index
)
492 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
494 return iface
->pBuilder
->swr_gs_llvm_fetch_input(gs_iface
, bld
,
503 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
504 struct lp_build_context
* bld
,
505 LLVMValueRef (*outputs
)[4],
506 LLVMValueRef emitted_vertices_vec
,
507 LLVMValueRef mask_vec
,
508 LLVMValueRef stream_id
)
510 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
512 iface
->pBuilder
->swr_gs_llvm_emit_vertex(gs_base
, bld
,
514 emitted_vertices_vec
,
519 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
520 struct lp_build_context
* bld
,
521 LLVMValueRef total_emitted_vertices_vec_ptr
,
522 LLVMValueRef verts_per_prim_vec
,
523 LLVMValueRef emitted_prims_vec
,
524 LLVMValueRef mask_vec
, unsigned stream_id
)
526 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
528 iface
->pBuilder
->swr_gs_llvm_end_primitive(gs_base
, bld
,
529 total_emitted_vertices_vec_ptr
,
536 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
537 LLVMValueRef total_emitted_vertices_vec
,
538 LLVMValueRef emitted_prims_vec
, unsigned stream
)
540 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
542 iface
->pBuilder
->swr_gs_llvm_epilogue(gs_base
,
543 total_emitted_vertices_vec
,
544 emitted_prims_vec
, stream
);
548 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
549 struct lp_build_context
* bld
,
550 boolean is_vindex_indirect
,
551 LLVMValueRef vertex_index
,
552 boolean is_aindex_indirect
,
553 LLVMValueRef attrib_index
,
554 LLVMValueRef swizzle_index
)
556 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
557 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
559 return iface
->pBuilder
->swr_tcs_llvm_fetch_input(tcs_iface
, bld_base
,
568 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
569 struct lp_build_context
* bld
,
570 boolean is_vindex_indirect
,
571 LLVMValueRef vertex_index
,
572 boolean is_aindex_indirect
,
573 LLVMValueRef attrib_index
,
574 LLVMValueRef swizzle_index
,
577 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
578 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
580 return iface
->pBuilder
->swr_tcs_llvm_fetch_output(tcs_iface
, bld_base
,
591 swr_tcs_llvm_emit_prologue(struct lp_build_context
* bld
)
593 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
594 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
595 iface
->pBuilder
->swr_tcs_llvm_emit_prologue(bld_base
);
599 swr_tcs_llvm_emit_epilogue(struct lp_build_context
* bld
)
601 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
602 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
603 iface
->pBuilder
->swr_tcs_llvm_emit_epilogue(bld_base
);
607 void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
608 struct lp_build_context
* bld
,
610 boolean is_vindex_indirect
,
611 LLVMValueRef vertex_index
,
612 boolean is_aindex_indirect
,
613 LLVMValueRef attrib_index
,
614 boolean is_sindex_indirect
,
615 LLVMValueRef swizzle_index
,
617 LLVMValueRef mask_vec
)
619 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
620 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
622 iface
->pBuilder
->swr_tcs_llvm_store_output(tcs_iface
,
636 void swr_tcs_llvm_emit_barrier(struct lp_build_context
*bld
)
638 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
639 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
641 iface
->pBuilder
->swr_tcs_llvm_emit_barrier(bld_base
->tcs_iface
, &bld_base
->bld_base
);
646 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
647 struct lp_build_context
* bld
,
648 boolean is_vindex_indirect
,
649 LLVMValueRef vertex_index
,
650 boolean is_aindex_indirect
,
651 LLVMValueRef attrib_index
,
652 LLVMValueRef swizzle_index
)
654 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
655 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
657 return iface
->pBuilder
->swr_tes_llvm_fetch_vtx_input(tes_iface
, bld_base
,
666 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
667 struct lp_build_context
* bld
,
668 boolean is_aindex_indirect
,
669 LLVMValueRef attrib_index
,
670 LLVMValueRef swizzle_index
)
672 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
673 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
675 return iface
->pBuilder
->swr_tes_llvm_fetch_patch_input(tes_iface
, bld_base
,
682 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
683 struct lp_build_context
* bld
,
684 boolean is_vindex_indirect
,
685 LLVMValueRef vertex_index
,
686 boolean is_aindex_indirect
,
687 LLVMValueRef attrib_index
,
688 LLVMValueRef swizzle_index
)
690 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
691 Value
*vert_index
= unwrap(vertex_index
);
692 Value
*attr_index
= unwrap(attrib_index
);
694 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
696 if (is_vindex_indirect
|| is_aindex_indirect
) {
698 Value
*res
= unwrap(bld
->zero
);
699 struct lp_type type
= bld
->type
;
701 for (i
= 0; i
< type
.length
; i
++) {
702 Value
*vert_chan_index
= vert_index
;
703 Value
*attr_chan_index
= attr_index
;
705 if (is_vindex_indirect
) {
706 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
708 if (is_aindex_indirect
) {
709 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
713 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
715 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
716 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
718 Value
*pVector
= ADD(MUL(vert_chan_index
, pInputVertStride
), attrib
);
719 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
721 Value
*value
= VEXTRACT(pInput
, C(i
));
722 res
= VINSERT(res
, value
, C(i
));
727 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
729 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
730 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
732 Value
*pVector
= ADD(MUL(vert_index
, pInputVertStride
), attrib
);
734 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
740 // GS output stream layout
741 #define VERTEX_COUNT_SIZE 32
742 #define CONTROL_HEADER_SIZE (8*32)
745 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
746 struct lp_build_context
* bld
,
747 LLVMValueRef (*outputs
)[4],
748 LLVMValueRef emitted_vertices_vec
,
749 LLVMValueRef stream_id
)
751 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
753 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
754 const uint32_t headerSize
= VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
;
755 const uint32_t attribSize
= 4 * sizeof(float);
756 const uint32_t vertSize
= attribSize
* SWR_VTX_NUM_SLOTS
;
757 Value
*pVertexOffset
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(vertSize
));
759 Value
*vMask
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_mask
});
760 Value
*vMask1
= TRUNC(vMask
, getVectorType(mInt1Ty
, mVWidth
));
762 Value
*pStack
= STACKSAVE();
763 Value
*pTmpPtr
= ALLOCA(mFP32Ty
, C(4)); // used for dummy write for lane masking
765 for (uint32_t attrib
= 0; attrib
< iface
->num_outputs
; ++attrib
) {
766 uint32_t attribSlot
= attrib
;
767 uint32_t sgvChannel
= 0;
768 if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
769 attribSlot
= VERTEX_SGV_SLOT
;
770 sgvChannel
= VERTEX_SGV_POINT_SIZE_COMP
;
771 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_LAYER
) {
772 attribSlot
= VERTEX_SGV_SLOT
;
773 sgvChannel
= VERTEX_SGV_RTAI_COMP
;
774 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_VIEWPORT_INDEX
) {
775 attribSlot
= VERTEX_SGV_SLOT
;
776 sgvChannel
= VERTEX_SGV_VAI_COMP
;
777 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
778 attribSlot
= VERTEX_POSITION_SLOT
;
780 attribSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
781 if (iface
->info
->writes_position
) {
786 Value
*pOutputOffset
= ADD(pVertexOffset
, VIMMED1(headerSize
+ attribSize
* attribSlot
)); // + sgvChannel ?
788 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
789 Value
*pLaneOffset
= VEXTRACT(pOutputOffset
, C(lane
));
790 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
791 Value
*pStreamOffset
= GEP(pStream
, pLaneOffset
);
792 pStreamOffset
= BITCAST(pStreamOffset
, mFP32PtrTy
);
794 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
795 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
797 for (uint32_t channel
= 0; channel
< 4; ++channel
) {
800 if (attribSlot
== VERTEX_SGV_SLOT
)
801 vData
= LOAD(unwrap(outputs
[attrib
][0]));
803 vData
= LOAD(unwrap(outputs
[attrib
][channel
]));
805 if (attribSlot
!= VERTEX_SGV_SLOT
||
806 sgvChannel
== channel
) {
807 vData
= VEXTRACT(vData
, C(lane
));
808 STORE(vData
, pStreamOffset
);
810 pStreamOffset
= GEP(pStreamOffset
, C(1));
815 /* When the output type is not points, the geometry shader may not
816 * output data to multiple streams. So early exit here.
818 if(iface
->pGsState
->outputTopology
!= TOP_POINT_LIST
) {
819 STACKRESTORE(pStack
);
823 // Info about stream id for each vertex
824 // is coded in 2 bits (4 vert per byte "box"):
825 // ----------------- ----------------- ----
826 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
827 // ----------------- ----------------- ----
829 // Calculate where need to put stream id for current vert
831 Value
*pShiftControl
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(2));
833 // Calculate in which box put stream id for current vert.
834 Value
*pOffsetControl
= LSHR(unwrap(emitted_vertices_vec
), VIMMED1(2));
837 Value
*pStreamIdOffset
= ADD(pOffsetControl
, VIMMED1(VERTEX_COUNT_SIZE
));
839 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
840 Value
*pShift
= TRUNC(VEXTRACT(pShiftControl
, C(lane
)), mInt8Ty
);
841 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
843 Value
*pStreamOffset
= GEP(pStream
, VEXTRACT(pStreamIdOffset
, C(lane
)));
845 // Just make sure that not overflow max - stream id = (0,1,2,3)
846 Value
*vVal
= TRUNC(AND(VEXTRACT(unwrap(stream_id
), C(0)), C(0x3)), mInt8Ty
);
848 // Shift it to correct position in byte "box"
849 vVal
= SHL(vVal
, pShift
);
851 // Info about other vertices can be already stored
852 // so we need to read and add bits from current vert info.
853 Value
*storedValue
= LOAD(pStreamOffset
);
854 vVal
= OR(storedValue
, vVal
);
855 STORE(vVal
, pStreamOffset
);
858 STACKRESTORE(pStack
);
862 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
863 struct lp_build_context
* bld
,
864 LLVMValueRef total_emitted_vertices_vec
,
865 LLVMValueRef verts_per_prim_vec
,
866 LLVMValueRef emitted_prims_vec
,
867 LLVMValueRef mask_vec
)
869 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
871 /* When the output type is points, the geometry shader may output data
872 * to multiple streams, and end_primitive has no effect. Info about
873 * stream id for vertices is stored into the same place in memory where
874 * end primitive info is stored so early exit in this case.
876 if (iface
->pGsState
->outputTopology
== TOP_POINT_LIST
) {
880 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
882 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
883 Value
*vMask1
= TRUNC(vMask
, getVectorType(mInt1Ty
, 8));
885 uint32_t vertsPerPrim
= iface
->num_verts_per_prim
;
888 ADD(MUL(unwrap(emitted_prims_vec
), VIMMED1(vertsPerPrim
)),
889 unwrap(verts_per_prim_vec
));
891 vCount
= unwrap(total_emitted_vertices_vec
);
893 Value
*mask
= unwrap(mask_vec
);
894 Value
*cmpMask
= VMASK(ICMP_NE(unwrap(verts_per_prim_vec
), VIMMED1(0)));
895 mask
= AND(mask
, cmpMask
);
896 vMask1
= TRUNC(mask
, getVectorType(mInt1Ty
, 8));
898 vCount
= SUB(vCount
, VIMMED1(1));
899 Value
*vOffset
= ADD(UDIV(vCount
, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE
));
900 Value
*vValue
= SHL(VIMMED1(1), UREM(vCount
, VIMMED1(8)));
902 vValue
= TRUNC(vValue
, getVectorType(mInt8Ty
, 8));
904 Value
*pStack
= STACKSAVE();
905 Value
*pTmpPtr
= ALLOCA(mInt8Ty
, C(4)); // used for dummy read/write for lane masking
907 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
908 Value
*vLaneOffset
= VEXTRACT(vOffset
, C(lane
));
909 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
910 Value
*pStreamOffset
= GEP(pStream
, vLaneOffset
);
912 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
913 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
915 Value
*vVal
= LOAD(pStreamOffset
);
916 vVal
= OR(vVal
, VEXTRACT(vValue
, C(lane
)));
917 STORE(vVal
, pStreamOffset
);
920 STACKRESTORE(pStack
);
924 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
925 LLVMValueRef total_emitted_vertices_vec
,
926 LLVMValueRef emitted_prims_vec
, unsigned stream
)
928 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
930 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
932 // Store emit count to each output stream in the first DWORD
933 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
935 Value
* pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
936 pStream
= BITCAST(pStream
, mInt32PtrTy
);
937 Value
* pLaneCount
= VEXTRACT(unwrap(total_emitted_vertices_vec
), C(lane
));
938 STORE(pLaneCount
, pStream
);
943 BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
)
945 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
947 Value
* loop_var
= ALLOCA(mSimdInt32Ty
);
948 STORE(VBROADCAST(C(0)), loop_var
);
950 iface
->loop_var
= wrap(loop_var
);
952 lp_exec_bgnloop(&bld
->exec_mask
, true);
954 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
955 bld
->system_values
.invocation_id
= wrap((LOAD(unwrap(iface
->loop_var
))));
957 if (verbose_tcs_shader_loop
) {
958 lp_build_print_value(gallivm
, "Prologue LOOP Iteration BEGIN:", bld
->system_values
.invocation_id
);
964 BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
)
966 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
968 struct lp_build_context
*uint_bld
= &bld
->bld_base
.uint_bld
;
970 STORE(ADD(LOAD(unwrap(iface
->loop_var
)), VBROADCAST(C(1))), unwrap(iface
->loop_var
));
971 if (verbose_tcs_shader_loop
) {
972 lp_build_print_value(gallivm
, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface
->loop_var
))));
975 LLVMValueRef tmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_GEQUAL
, wrap(LOAD(unwrap(iface
->loop_var
))),
976 wrap(VBROADCAST(C(iface
->output_vertices
))));
977 lp_exec_mask_cond_push(&bld
->exec_mask
, tmp
);
978 lp_exec_break(&bld
->exec_mask
, &bld
->bld_base
.pc
, false);
979 lp_exec_mask_cond_pop(&bld
->exec_mask
);
980 lp_exec_endloop(bld
->bld_base
.base
.gallivm
, &bld
->exec_mask
);
984 BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
985 struct lp_build_tgsi_context
* bld_base
,
986 boolean is_vindex_indirect
,
987 LLVMValueRef vertex_index
,
988 boolean is_aindex_indirect
,
989 LLVMValueRef attrib_index
,
990 LLVMValueRef swizzle_index
)
992 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
994 Value
*vert_index
= unwrap(vertex_index
);
995 Value
*attr_index
= unwrap(attrib_index
);
997 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
999 if (verbose_tcs_shader_in
) {
1000 lp_build_printf(gallivm
, "[TCS IN][VTX] ======================================\n");
1001 lp_build_print_value(gallivm
, "[TCS IN][VTX] vertex_index: ", vertex_index
);
1002 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1003 lp_build_printf(gallivm
, "[TCS IN][VTX] --------------------------------------\n");
1006 Value
*res
= unwrap(bld_base
->base
.zero
);
1007 if (is_vindex_indirect
|| is_aindex_indirect
) {
1009 struct lp_type type
= bld_base
->base
.type
;
1011 for (i
= 0; i
< type
.length
; i
++) {
1012 Value
*vert_chan_index
= vert_index
;
1013 Value
*attr_chan_index
= attr_index
;
1015 if (is_vindex_indirect
) {
1016 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
1018 if (is_aindex_indirect
) {
1019 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1023 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
1025 Value
*pBase
= GEP(iface
->pTcsCtx
,
1026 { C(0), C(SWR_HS_CONTEXT_vert
), vert_chan_index
,
1027 C(simdvertex_attrib
), attrib
, unwrap(swizzle_index
), C(i
) });
1029 Value
*val
= LOAD(pBase
);
1031 if (verbose_tcs_shader_in
) {
1032 lp_build_print_value(gallivm
, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index
));
1033 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1034 lp_build_print_value(gallivm
, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index
));
1035 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib read from map: ", wrap(attrib
));
1036 lp_build_print_value(gallivm
, "[TCS IN][VTX] swizzle_index: ", swizzle_index
);
1037 lp_build_print_value(gallivm
, "[TCS IN][VTX] Loaded: ", wrap(val
));
1039 res
= VINSERT(res
, val
, C(i
));
1042 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
1044 Value
*pBase
= GEP(iface
->pTcsCtx
,
1045 { C(0), C(SWR_HS_CONTEXT_vert
), vert_index
,
1046 C(simdvertex_attrib
), attrib
, unwrap(swizzle_index
) });
1050 if (verbose_tcs_shader_in
) {
1051 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1052 lp_build_print_value(gallivm
, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index
));
1053 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib read from map: ", wrap(attrib
));
1054 lp_build_print_value(gallivm
, "[TCS IN][VTX] swizzle_index: ", swizzle_index
);
1055 lp_build_print_value(gallivm
, "[TCS IN][VTX] Loaded: ", wrap(res
));
1058 if (verbose_tcs_shader_in
) {
1059 lp_build_print_value(gallivm
, "[TCS IN][VTX] returning: ", wrap(res
));
1065 BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
1066 struct lp_build_tgsi_context
* bld_base
,
1067 boolean is_vindex_indirect
,
1068 LLVMValueRef vertex_index
,
1069 boolean is_aindex_indirect
,
1070 LLVMValueRef attrib_index
,
1071 LLVMValueRef swizzle_index
,
1074 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1076 Value
*vert_index
= unwrap(vertex_index
);
1077 Value
*attr_index
= unwrap(attrib_index
);
1079 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1081 if (verbose_tcs_shader_in
) {
1082 lp_build_print_value(gallivm
, "[TCS INOUT] Vertex index: ", vertex_index
);
1083 lp_build_print_value(gallivm
, "[TCS INOUT] Attrib index: ", wrap(attr_index
));
1084 lp_build_print_value(gallivm
, "[TCS INOUT] Swizzle index: ", swizzle_index
);
1087 Value
* res
= unwrap(bld_base
->base
.zero
);
1089 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1090 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1091 Value
* pCpOut
= GEP(p1
, {lane
});
1093 Value
*vert_chan_index
= vert_index
;
1094 Value
*attr_chan_index
= attr_index
;
1096 if (is_vindex_indirect
) {
1097 vert_chan_index
= VEXTRACT(vert_index
, C(lane
));
1098 if (verbose_tcs_shader_in
) {
1099 lp_build_print_value(gallivm
, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index
));
1103 if (is_aindex_indirect
) {
1104 attr_chan_index
= VEXTRACT(attr_index
, C(lane
));
1105 if (verbose_tcs_shader_in
) {
1106 lp_build_print_value(gallivm
, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index
));
1110 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1111 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1112 Value
* tessFactorArray
= nullptr;
1113 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1114 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1116 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1118 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1119 res
= VINSERT(res
, LOAD(tessFactor
), C(lane
));
1120 if (verbose_tcs_shader_in
) {
1121 lp_build_print_value(gallivm
, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane
)));
1122 lp_build_print_value(gallivm
, "[TCS INOUT][FACTOR] loaded value: ", wrap(res
));
1124 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1125 Value
* attr_index_from_map
= LOAD(GEP(iface
->pPatchOutputAttribMap
, {C(0), attr_chan_index
}));
1126 Value
* attr_value
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attr_index_from_map
, unwrap(swizzle_index
)});
1127 res
= VINSERT(res
, LOAD(attr_value
), C(lane
));
1128 if (verbose_tcs_shader_in
) {
1129 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map
));
1130 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane
)));
1131 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] loaded value: ", wrap(res
));
1134 // Generic attribute
1136 LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_chan_index
}));
1137 if (verbose_tcs_shader_in
) {
1138 lp_build_print_value(gallivm
, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib
));
1140 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
), vert_chan_index
,
1141 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1143 res
= VINSERT(res
, LOAD(attr_chan
), C(lane
));
1144 if (verbose_tcs_shader_in
) {
1145 lp_build_print_value(gallivm
, "[TCS INOUT][VTX] loaded value: ", wrap(res
));
1154 BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
1155 struct lp_build_tgsi_context
*bld_base
,
1157 boolean is_vindex_indirect
,
1158 LLVMValueRef vertex_index
,
1159 boolean is_aindex_indirect
,
1160 LLVMValueRef attrib_index
,
1161 LLVMValueRef swizzle_index
,
1163 LLVMValueRef mask_vec
)
1165 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1166 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1168 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1170 if (verbose_tcs_shader_out
) {
1171 lp_build_printf(gallivm
, "[TCS OUT] =============================================\n");
1174 if (verbose_tcs_shader_out
) {
1175 lp_build_print_value(gallivm
, "[TCS OUT] Store mask: ", bld
->exec_mask
.exec_mask
);
1176 lp_build_print_value(gallivm
, "[TCS OUT] Store value: ", value
);
1179 Value
*vert_index
= unwrap(vertex_index
);
1180 Value
*attr_index
= unwrap(attrib_index
);
1182 if (verbose_tcs_shader_out
) {
1183 lp_build_print_value(gallivm
, "[TCS OUT] Vertex index: ", vertex_index
);
1184 lp_build_print_value(gallivm
, "[TCS OUT] Attrib index: ", wrap(attr_index
));
1185 lp_build_print_value(gallivm
, "[TCS OUT] Swizzle index: ", swizzle_index
);
1188 if (is_vindex_indirect
) {
1189 vert_index
= VEXTRACT(vert_index
, C(0));
1190 if (verbose_tcs_shader_out
) {
1191 lp_build_print_value(gallivm
, "[TCS OUT] Extracted vertex index: ", vertex_index
);
1195 if (is_aindex_indirect
) {
1196 attr_index
= VEXTRACT(attr_index
, C(0));
1197 if (verbose_tcs_shader_out
) {
1198 lp_build_print_value(gallivm
, "[TCS OUT] Extracted attrib index: ", wrap(attr_index
));
1202 if (verbose_tcs_shader_out
) {
1203 if (bld
->exec_mask
.has_mask
) {
1204 lp_build_print_value(gallivm
, "[TCS OUT] Exec mask: ", bld
->exec_mask
.exec_mask
);
1207 lp_build_printf(gallivm
, "[TCS OUT] has no mask\n");
1210 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1211 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1212 Value
* pCpOut
= GEP(p1
, {lane
});
1214 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1215 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1216 Value
* tessFactorArray
= nullptr;
1217 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1218 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1220 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1222 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1223 Value
* valueToStore
= VEXTRACT(unwrap(value
), C(lane
));
1224 valueToStore
= BITCAST(valueToStore
, mFP32Ty
);
1226 Value
*originalVal
= LOAD(tessFactor
);
1227 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1228 valueToStore
= SELECT(vMask
, valueToStore
, originalVal
);
1230 STORE(valueToStore
, tessFactor
);
1231 if (verbose_tcs_shader_out
)
1233 lp_build_print_value(gallivm
, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec
);
1234 lp_build_print_value(gallivm
, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore
));
1236 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1237 Value
* attrib
= LOAD(GEP(iface
->pPatchOutputAttribMap
, {C(0), attr_index
}));
1238 if (verbose_tcs_shader_out
) {
1239 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index
));
1240 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index
));
1241 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect
)));
1242 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect
)));
1243 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib
));
1245 Value
* attr
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attrib
});
1246 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1247 if (verbose_tcs_shader_out
) {
1248 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane
)));
1249 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] value to store: ", value
);
1250 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store
));
1251 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] chan_index: ", swizzle_index
);
1253 value_to_store
= BITCAST(value_to_store
, mFP32Ty
);
1255 Value
*originalVal
= LOADV(attr
, {C(0), unwrap(swizzle_index
)});
1256 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1257 value_to_store
= SELECT(vMask
, value_to_store
, originalVal
);
1258 if (verbose_tcs_shader_out
) {
1259 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] store mask: ", mask_vec
);
1260 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal
));
1261 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vMask: ", wrap(vMask
));
1262 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store
));
1265 STOREV(value_to_store
, attr
, {C(0), unwrap(swizzle_index
)});
1266 if (verbose_tcs_shader_out
) {
1267 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store
));
1270 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1271 Value
* attrib
= LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_index
}));
1273 if (verbose_tcs_shader_out
) {
1274 lp_build_printf(gallivm
, "[TCS OUT] Writting attribute\n");
1275 lp_build_print_value(gallivm
, "[TCS OUT][VTX] invocation_id: ", bld
->system_values
.invocation_id
);
1276 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index
));
1277 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib
));
1278 lp_build_print_value(gallivm
, "[TCS OUT][VTX] chan_index: ", swizzle_index
);
1279 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value: ", value
);
1280 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store
));
1283 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
),
1284 VEXTRACT(unwrap(bld
->system_values
.invocation_id
), C(0)),
1285 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1287 // Mask output values if needed
1288 value_to_store
= BITCAST(value_to_store
, mFP32Ty
);
1290 Value
*originalVal
= LOAD(attr_chan
);
1291 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1292 value_to_store
= SELECT(vMask
, value_to_store
, originalVal
);
1294 STORE(value_to_store
, attr_chan
);
1295 if (verbose_tcs_shader_out
) {
1296 lp_build_print_value(gallivm
, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec
);
1297 lp_build_print_value(gallivm
, "[TCS OUT][VTX] stored: ", wrap(value_to_store
));
1304 BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
1305 struct lp_build_tgsi_context
*bld_base
)
1307 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1308 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1310 if (verbose_tcs_shader_loop
) {
1311 lp_build_print_value(gallivm
, "Barrier LOOP: Iteration %d END\n", iface
->loop_var
);
1314 struct lp_build_context
*uint_bld
= &bld
->bld_base
.uint_bld
;
1316 STORE(ADD(LOAD(unwrap(iface
->loop_var
)), VBROADCAST(C(1))), unwrap(iface
->loop_var
));
1318 LLVMValueRef tmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_GEQUAL
, wrap(LOAD(unwrap(iface
->loop_var
))),
1319 wrap(VBROADCAST(C(iface
->output_vertices
))));
1321 lp_exec_mask_cond_push(&bld
->exec_mask
, tmp
);
1322 lp_exec_break(&bld
->exec_mask
, &bld
->bld_base
.pc
, false);
1323 lp_exec_mask_cond_pop(&bld
->exec_mask
);
1324 lp_exec_endloop(bld
->bld_base
.base
.gallivm
, &bld
->exec_mask
);
1326 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1328 STORE(VBROADCAST(C(0)), unwrap(iface
->loop_var
));
1329 lp_exec_bgnloop(&bld
->exec_mask
, true);
1331 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1333 bld
->system_values
.invocation_id
= wrap((LOAD(unwrap(iface
->loop_var
))));
1335 if (verbose_tcs_shader_loop
) {
1336 lp_build_print_value(gallivm
, "Barrier LOOP: Iteration BEGIN: ", iface
->loop_var
);
1337 lp_build_print_value(gallivm
, "Barrier LOOP: InvocationId: \n", bld
->system_values
.invocation_id
);
1343 BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
1344 struct lp_build_tgsi_context
* bld_base
,
1345 boolean is_aindex_indirect
,
1346 LLVMValueRef attrib_index
,
1347 LLVMValueRef swizzle_index
)
1349 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1350 Value
*attr_index
= unwrap(attrib_index
);
1351 Value
*res
= unwrap(bld_base
->base
.zero
);
1353 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1355 if (verbose_shader
) {
1356 lp_build_printf(gallivm
, "[TES IN][PATCH] --------------------------------------\n");
1359 if (is_aindex_indirect
) {
1361 struct lp_type type
= bld_base
->base
.type
;
1363 for (i
= 0; i
< type
.length
; i
++) {
1364 Value
*attr_chan_index
= attr_index
;
1366 if (is_aindex_indirect
) {
1367 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1371 LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_chan_index
}));
1373 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1374 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1375 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1376 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1377 if (verbose_shader
) {
1378 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1379 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index
));
1380 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1381 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1382 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1384 res
= VINSERT(res
, Val
, C(i
));
1387 Value
*attrib
= LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_index
}));
1389 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1390 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1391 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1392 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1393 if (verbose_shader
) {
1394 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1395 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index
));
1396 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1397 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1398 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1400 res
= VBROADCAST(Val
);
1402 if (verbose_shader
) {
1403 lp_build_print_value(gallivm
, "[TES IN][PATCH] returning: ", wrap(res
));
1411 BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
1412 struct lp_build_tgsi_context
* bld_base
,
1413 boolean is_vindex_indirect
,
1414 LLVMValueRef vertex_index
,
1415 boolean is_aindex_indirect
,
1416 LLVMValueRef attrib_index
,
1417 LLVMValueRef swizzle_index
)
1419 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1420 Value
*vert_index
= unwrap(vertex_index
);
1421 Value
*attr_index
= unwrap(attrib_index
);
1423 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1425 if (verbose_shader
) {
1426 lp_build_printf(gallivm
, "[TES IN][VTX] --------------------------------------\n");
1429 Value
*res
= unwrap(bld_base
->base
.zero
);
1430 if (is_vindex_indirect
|| is_aindex_indirect
) {
1432 struct lp_type type
= bld_base
->base
.type
;
1434 for (i
= 0; i
< type
.length
; i
++) {
1435 Value
*vert_chan_index
= vert_index
;
1436 Value
*attr_chan_index
= attr_index
;
1438 if (is_vindex_indirect
) {
1439 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
1441 if (is_aindex_indirect
) {
1442 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1446 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
1448 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1449 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1450 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_chan_index
});
1451 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1452 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1453 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1454 if (verbose_shader
) {
1455 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1456 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1457 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1458 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1459 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1461 res
= VINSERT(res
, Val
, C(i
));
1464 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
1466 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1467 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1468 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_index
});
1469 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1470 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1471 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1472 if (verbose_shader
) {
1473 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1474 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1475 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1476 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1477 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1479 res
= VBROADCAST(Val
);
1481 if (verbose_shader
) {
1482 lp_build_print_value(gallivm
, "[TES IN][VTX] returning: ", wrap(res
));
1491 BuilderSWR::CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
1493 SWR_GS_STATE
*pGS
= &ctx
->gs
->gsState
;
1494 struct tgsi_shader_info
*info
= &ctx
->gs
->info
.base
;
1496 memset(pGS
, 0, sizeof(*pGS
));
1498 pGS
->gsEnable
= true;
1500 pGS
->numInputAttribs
= (VERTEX_ATTRIB_START_SLOT
- VERTEX_POSITION_SLOT
) + info
->num_inputs
;
1501 pGS
->outputTopology
=
1502 swr_convert_prim_topology(info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
], 0);
1504 /* It's +1 because emit_vertex in swr is always called exactly one time more
1505 * than max_vertices passed in Geometry Shader. We need to allocate more memory
1506 * to avoid crash/memory overwritten.
1508 pGS
->maxNumVerts
= info
->properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
] + 1;
1509 pGS
->instanceCount
= info
->properties
[TGSI_PROPERTY_GS_INVOCATIONS
];
1511 // If point primitive then assume to use multiple streams
1512 if(pGS
->outputTopology
== TOP_POINT_LIST
) {
1513 pGS
->isSingleStream
= false;
1515 pGS
->isSingleStream
= true;
1516 pGS
->singleStreamID
= 0;
1519 pGS
->vertexAttribOffset
= VERTEX_POSITION_SLOT
;
1520 pGS
->inputVertStride
= pGS
->numInputAttribs
+ pGS
->vertexAttribOffset
;
1521 pGS
->outputVertexSize
= SWR_VTX_NUM_SLOTS
;
1522 pGS
->controlDataSize
= 8; // GS ouputs max of 8 32B units
1523 pGS
->controlDataOffset
= VERTEX_COUNT_SIZE
;
1524 pGS
->outputVertexOffset
= pGS
->controlDataOffset
+ CONTROL_HEADER_SIZE
;
1526 pGS
->allocationSize
=
1527 VERTEX_COUNT_SIZE
+ // vertex count
1528 CONTROL_HEADER_SIZE
+ // control header
1529 (SWR_VTX_NUM_SLOTS
* 16) * // sizeof vertex
1530 pGS
->maxNumVerts
; // num verts
1532 struct swr_geometry_shader
*gs
= ctx
->gs
;
1534 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1535 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1537 memset(outputs
, 0, sizeof(outputs
));
1539 AttrBuilder attrBuilder
;
1540 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1542 std::vector
<Type
*> gsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1543 PointerType::get(mInt8Ty
, 0),
1544 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
1545 FunctionType
*vsFuncType
=
1546 FunctionType::get(Type::getVoidTy(JM()->mContext
), gsArgs
, false);
1548 // create new vertex shader function
1549 auto pFunction
= Function::Create(vsFuncType
,
1550 GlobalValue::ExternalLinkage
,
1552 JM()->mpCurrentModule
);
1553 #if LLVM_VERSION_MAJOR < 5
1554 AttributeSet attrSet
= AttributeSet::get(
1555 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1556 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1558 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1561 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1562 IRB()->SetInsertPoint(block
);
1563 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1565 auto argitr
= pFunction
->arg_begin();
1566 Value
*hPrivateData
= &*argitr
++;
1567 hPrivateData
->setName("hPrivateData");
1568 Value
*pWorkerData
= &*argitr
++;
1569 pWorkerData
->setName("pWorkerData");
1570 Value
*pGsCtx
= &*argitr
++;
1571 pGsCtx
->setName("gsCtx");
1574 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantGS
)});
1575 consts_ptr
->setName("gs_constants");
1576 Value
*const_sizes_ptr
=
1577 GEP(hPrivateData
, {0, swr_draw_context_num_constantsGS
});
1578 const_sizes_ptr
->setName("num_gs_constants");
1580 struct lp_build_sampler_soa
*sampler
=
1581 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_GEOMETRY
);
1582 assert(sampler
!= nullptr);
1584 struct lp_bld_tgsi_system_values system_values
;
1585 memset(&system_values
, 0, sizeof(system_values
));
1586 system_values
.prim_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_PrimitiveID
}));
1587 system_values
.invocation_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_InstanceID
}));
1589 std::vector
<Constant
*> mapConstants
;
1590 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1591 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1592 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1593 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1595 unsigned vs_slot
= locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
1596 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1598 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
1600 if (ctx
->vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
1603 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
1604 vs_slot
= VERTEX_POSITION_SLOT
;
1606 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
1607 mapConstants
.push_back(C(vs_slot
));
1610 struct lp_build_mask_context mask
;
1611 Value
*mask_val
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_mask
}, "gsMask");
1612 lp_build_mask_begin(&mask
, gallivm
,
1613 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1615 // zero out cut buffer so we can load/modify/store bits
1616 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
1618 Value
* pStream
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
1619 #if LLVM_VERSION_MAJOR >= 10
1620 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH
));
1622 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, sizeof(float) * KNOB_SIMD_WIDTH
);
1626 struct swr_gs_llvm_iface gs_iface
;
1627 gs_iface
.base
.fetch_input
= ::swr_gs_llvm_fetch_input
;
1628 gs_iface
.base
.emit_vertex
= ::swr_gs_llvm_emit_vertex
;
1629 gs_iface
.base
.end_primitive
= ::swr_gs_llvm_end_primitive
;
1630 gs_iface
.base
.gs_epilogue
= ::swr_gs_llvm_epilogue
;
1631 gs_iface
.pBuilder
= this;
1632 gs_iface
.pGsCtx
= pGsCtx
;
1633 gs_iface
.pGsState
= pGS
;
1634 gs_iface
.num_outputs
= gs
->info
.base
.num_outputs
;
1635 gs_iface
.num_verts_per_prim
=
1636 u_vertices_per_prim((pipe_prim_type
)info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
1637 gs_iface
.info
= info
;
1638 gs_iface
.pVtxAttribMap
= vtxAttribMap
;
1640 struct lp_build_tgsi_params params
;
1641 memset(¶ms
, 0, sizeof(params
));
1642 params
.type
= lp_type_float_vec(32, 32 * 8);
1643 params
.mask
= & mask
;
1644 params
.consts_ptr
= wrap(consts_ptr
);
1645 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1646 params
.system_values
= &system_values
;
1647 params
.inputs
= inputs
;
1648 params
.context_ptr
= wrap(hPrivateData
);
1649 params
.sampler
= sampler
;
1650 params
.info
= &gs
->info
.base
;
1651 params
.gs_iface
= &gs_iface
.base
;
1653 lp_build_tgsi_soa(gallivm
,
1658 lp_build_mask_end(&mask
);
1660 sampler
->destroy(sampler
);
1662 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1666 gallivm_verify_function(gallivm
, wrap(pFunction
));
1667 gallivm_compile_module(gallivm
);
1670 (PFN_GS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1672 debug_printf("geom shader %p\n", pFunc
);
1673 assert(pFunc
&& "Error: GeomShader = NULL");
1675 JM()->mIsModuleFinalized
= true;
1681 BuilderSWR::CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
1683 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
1684 struct tgsi_shader_info
*info
= &ctx
->tes
->info
.base
;
1686 // tessellation is enabled if TES is present
1687 // clear tessellation state here then
1688 memset(pTS
, 0, sizeof(*pTS
));
1690 pTS
->tsEnable
= true;
1692 unsigned tes_prim_mode
= info
->properties
[TGSI_PROPERTY_TES_PRIM_MODE
];
1693 unsigned tes_spacing
= info
->properties
[TGSI_PROPERTY_TES_SPACING
];
1694 bool tes_vertex_order_cw
= info
->properties
[TGSI_PROPERTY_TES_VERTEX_ORDER_CW
];
1695 bool tes_point_mode
= info
->properties
[TGSI_PROPERTY_TES_POINT_MODE
];
1696 SWR_TS_DOMAIN type
= SWR_TS_ISOLINE
;
1697 SWR_TS_PARTITIONING partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1698 SWR_TS_OUTPUT_TOPOLOGY topology
= SWR_TS_OUTPUT_POINT
;
1699 PRIMITIVE_TOPOLOGY postDSTopology
= TOP_POINT_LIST
;
1701 // TESS_TODO: move this to helper functions to improve readability
1702 switch (tes_prim_mode
) {
1703 case PIPE_PRIM_LINES
:
1704 type
= SWR_TS_ISOLINE
;
1705 postDSTopology
= TOP_LINE_LIST
;
1707 case PIPE_PRIM_TRIANGLES
:
1709 postDSTopology
= TOP_TRIANGLE_LIST
;
1711 case PIPE_PRIM_QUADS
:
1713 // See OpenGL spec - quads are tessellated into triangles
1714 postDSTopology
= TOP_TRIANGLE_LIST
;
1720 switch (tes_spacing
) {
1721 case PIPE_TESS_SPACING_FRACTIONAL_ODD
:
1722 partitioning
= SWR_TS_ODD_FRACTIONAL
;
1724 case PIPE_TESS_SPACING_FRACTIONAL_EVEN
:
1725 partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1727 case PIPE_TESS_SPACING_EQUAL
:
1728 partitioning
= SWR_TS_INTEGER
;
1734 if (tes_point_mode
) {
1735 topology
= SWR_TS_OUTPUT_POINT
;
1736 postDSTopology
= TOP_POINT_LIST
;
1738 else if (tes_prim_mode
== PIPE_PRIM_LINES
) {
1739 topology
= SWR_TS_OUTPUT_LINE
;
1741 else if (tes_vertex_order_cw
) {
1742 topology
= SWR_TS_OUTPUT_TRI_CW
;
1745 topology
= SWR_TS_OUTPUT_TRI_CCW
;
1749 pTS
->tsOutputTopology
= topology
;
1750 pTS
->partitioning
= partitioning
;
1751 pTS
->numDsOutputAttribs
= info
->num_outputs
;
1752 pTS
->postDSTopology
= postDSTopology
;
1754 pTS
->dsAllocationSize
= SWR_VTX_NUM_SLOTS
* MAX_NUM_VERTS_PER_PRIM
;
1755 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1756 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1757 pTS
->dsOutVtxAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1759 struct swr_tess_evaluation_shader
*tes
= ctx
->tes
;
1761 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1762 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1764 memset(outputs
, 0, sizeof(outputs
));
1766 AttrBuilder attrBuilder
;
1767 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1769 std::vector
<Type
*> tesArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1770 PointerType::get(mInt8Ty
, 0),
1771 PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
1772 FunctionType
*tesFuncType
=
1773 FunctionType::get(Type::getVoidTy(JM()->mContext
), tesArgs
, false);
1775 // create new vertex shader function
1776 auto pFunction
= Function::Create(tesFuncType
,
1777 GlobalValue::ExternalLinkage
,
1779 JM()->mpCurrentModule
);
1781 #if LLVM_VERSION_MAJOR < 5
1782 AttributeSet attrSet
= AttributeSet::get(
1783 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1784 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1786 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1789 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1790 IRB()->SetInsertPoint(block
);
1791 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1793 auto argitr
= pFunction
->arg_begin();
1794 Value
*hPrivateData
= &*argitr
++;
1795 hPrivateData
->setName("hPrivateData");
1796 Value
*pWorkerData
= &*argitr
++;
1797 pWorkerData
->setName("pWorkerData");
1798 Value
*pTesCtx
= &*argitr
++;
1799 pTesCtx
->setName("tesCtx");
1802 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTES
)});
1803 consts_ptr
->setName("tes_constants");
1804 Value
*const_sizes_ptr
=
1805 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTES
});
1806 const_sizes_ptr
->setName("num_tes_constants");
1808 struct lp_build_sampler_soa
*sampler
=
1809 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_EVAL
);
1810 assert(sampler
!= nullptr);
1812 struct lp_bld_tgsi_system_values system_values
;
1813 memset(&system_values
, 0, sizeof(system_values
));
1815 // Load and calculate system values
1816 // Tessellation coordinates (gl_TessCoord)
1817 Value
*vecOffset
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
}, "vecOffset");
1818 Value
*vecStride
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorStride
}, "vecStride");
1819 Value
*vecIndex
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
});
1821 Value
* tess_coord
= ALLOCA(ArrayType::get(mSimdFP32Ty
, 3));
1823 Value
*tessCoordU
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainU
}), {vecIndex
}, "tessCoordU");
1824 STORE(tessCoordU
, tess_coord
, {0, 0});
1825 Value
*tessCoordV
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainV
}), {vecIndex
}, "tessCoordV");
1826 STORE(tessCoordV
, tess_coord
, {0, 1});
1827 Value
*tessCoordW
= FSUB(FSUB(VIMMED1(1.0f
), tessCoordU
), tessCoordV
, "tessCoordW");
1828 STORE(tessCoordW
, tess_coord
, {0, 2});
1829 system_values
.tess_coord
= wrap(tess_coord
);
1832 system_values
.prim_id
= wrap(VBROADCAST(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_PrimitiveID
}), "PrimitiveID"));
1834 // Tessellation factors
1835 Value
* pPatch
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
});
1836 Value
* pTessFactors
= GEP(pPatch
, {C(0), C(ScalarPatch_tessFactors
)});
1838 assert(SWR_NUM_OUTER_TESS_FACTORS
== 4);
1839 Value
* sys_value_outer_factors
= UndefValue::get(getVectorType(mFP32Ty
, 4));
1840 for (unsigned i
= 0; i
< SWR_NUM_OUTER_TESS_FACTORS
; i
++) {
1841 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors
, i
});
1842 sys_value_outer_factors
= VINSERT(sys_value_outer_factors
, v
, i
, "gl_TessLevelOuter");
1844 system_values
.tess_outer
= wrap(sys_value_outer_factors
);
1846 assert(SWR_NUM_INNER_TESS_FACTORS
== 2);
1847 Value
* sys_value_inner_factors
= UndefValue::get(getVectorType(mFP32Ty
, 4));
1848 for (unsigned i
= 0; i
< SWR_NUM_INNER_TESS_FACTORS
; i
++) {
1849 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors
, i
});
1850 sys_value_inner_factors
= VINSERT(sys_value_inner_factors
, v
, i
, "gl_TessLevelInner");
1852 system_values
.tess_inner
= wrap(sys_value_inner_factors
);
1856 lp_build_print_value(gallivm
, "tess_coord = ", system_values
.tess_coord
);
1859 struct tgsi_shader_info
*pPrevShader
= nullptr;
1862 pPrevShader
= &ctx
->tcs
->info
.base
;
1865 pPrevShader
= &ctx
->vs
->info
.base
;
1868 // Figure out how many per-patch attributes we have
1869 unsigned perPatchAttrs
= 0;
1870 unsigned genericAttrs
= 0;
1871 unsigned tessLevelAttrs
= 0;
1872 unsigned sgvAttrs
= 0;
1873 for (unsigned slot
= 0; slot
< pPrevShader
->num_outputs
; slot
++) {
1874 switch (pPrevShader
->output_semantic_name
[slot
]) {
1875 case TGSI_SEMANTIC_PATCH
:
1878 case TGSI_SEMANTIC_GENERIC
:
1881 case TGSI_SEMANTIC_TESSINNER
:
1882 case TGSI_SEMANTIC_TESSOUTER
:
1885 case TGSI_SEMANTIC_POSITION
:
1886 case TGSI_SEMANTIC_CLIPDIST
:
1887 case TGSI_SEMANTIC_PSIZE
:
1891 assert(!"Unknown semantic input in TES");
1895 std::vector
<Constant
*> mapConstants
;
1896 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1897 Value
*patchAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1898 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1899 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1900 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1902 // Where in TCS output is my attribute?
1903 // TESS_TODO: revisit after implement pass-through TCS
1904 unsigned tcs_slot
= locate_linkage(semantic_name
, semantic_idx
, pPrevShader
);
1905 assert(tcs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1907 // Skip tessellation levels - these go to the tessellator, not TES
1908 switch (semantic_name
) {
1909 case TGSI_SEMANTIC_GENERIC
:
1910 tcs_slot
= tcs_slot
+ VERTEX_ATTRIB_START_SLOT
- sgvAttrs
- tessLevelAttrs
;
1912 case TGSI_SEMANTIC_PATCH
:
1913 tcs_slot
= semantic_idx
;
1915 case TGSI_SEMANTIC_POSITION
:
1916 tcs_slot
= VERTEX_POSITION_SLOT
;
1918 case TGSI_SEMANTIC_CLIPDIST
:
1919 case TGSI_SEMANTIC_PSIZE
:
1922 assert(!"Unexpected semantic found while builiding TES input map");
1924 if (semantic_name
== TGSI_SEMANTIC_PATCH
) {
1925 STORE(C(tcs_slot
), patchAttribMap
, {0, slot
});
1927 STORE(C(tcs_slot
), vtxAttribMap
, {0, slot
});
1929 mapConstants
.push_back(C(tcs_slot
));
1932 // Build execution mask
1933 struct lp_build_mask_context mask
;
1934 Value
*mask_val
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_mask
}, "tesMask");
1937 lp_build_print_value(gallivm
, "TES execution mask: ", wrap(mask_val
));
1939 lp_build_mask_begin(&mask
, gallivm
,
1940 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1942 struct swr_tes_llvm_iface tes_iface
;
1944 tes_iface
.base
.fetch_vertex_input
= ::swr_tes_llvm_fetch_vtx_input
;
1945 tes_iface
.base
.fetch_patch_input
= ::swr_tes_llvm_fetch_patch_input
;
1947 tes_iface
.pBuilder
= this;
1948 tes_iface
.pTesCtx
= pTesCtx
;
1949 tes_iface
.pTsState
= pTS
;
1950 tes_iface
.num_outputs
= tes
->info
.base
.num_outputs
;
1951 tes_iface
.info
= info
;
1952 tes_iface
.pVtxAttribMap
= vtxAttribMap
;
1953 tes_iface
.pPatchAttribMap
= patchAttribMap
;
1955 struct lp_build_tgsi_params params
;
1956 memset(¶ms
, 0, sizeof(params
));
1957 params
.type
= lp_type_float_vec(32, 32 * 8);
1958 params
.mask
= & mask
;
1959 params
.consts_ptr
= wrap(consts_ptr
);
1960 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1961 params
.system_values
= &system_values
;
1962 params
.inputs
= inputs
;
1963 params
.context_ptr
= wrap(hPrivateData
);
1964 params
.sampler
= sampler
;
1965 params
.info
= &tes
->info
.base
;
1966 params
.tes_iface
= &tes_iface
.base
;
1969 lp_build_tgsi_soa(gallivm
,
1974 lp_build_mask_end(&mask
);
1976 sampler
->destroy(sampler
);
1978 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1980 // Write output attributes
1981 Value
*dclOut
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pOutputData
}, "dclOut");
1983 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
1984 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1985 if (!outputs
[attrib
][channel
])
1988 Value
*val
= LOAD(unwrap(outputs
[attrib
][channel
]));;
1989 Value
*attribOffset
=
1990 LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_outVertexAttribOffset
});
1992 // Assume we write possition
1993 Value
* outputSlot
= C(VERTEX_POSITION_SLOT
);
1994 if (tes
->info
.base
.output_semantic_name
[attrib
] != TGSI_SEMANTIC_POSITION
) {
1995 // No, it's a generic attribute, not a position - let's calculate output slot
1996 uint32_t outSlot
= attrib
;
1997 if (tes
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
) {
1998 // this shader will write position, so in shader's term
1999 // output starts at attrib 1, but we will handle that separately,
2000 // so let's fix the outSlot
2003 outputSlot
= ADD(attribOffset
, C(outSlot
));
2006 Value
*attribVecIndex
=
2007 ADD(MUL(vecStride
, MUL(outputSlot
, C(4))), vecOffset
);
2009 uint32_t outputComponent
= 0;
2010 uint32_t curComp
= outputComponent
+ channel
;
2011 auto outValIndex
= ADD(attribVecIndex
, MUL(vecStride
, C(curComp
)));
2012 STOREV(val
, dclOut
, {outValIndex
});
2014 if (verbose_shader
) {
2015 lp_build_printf(gallivm
,
2016 "TES output [%d][%d]",
2019 lp_build_print_value(gallivm
, " = ", wrap(val
));
2026 JM()->DumpToFile(pFunction
, "src");
2027 gallivm_verify_function(gallivm
, wrap(pFunction
));
2029 gallivm_compile_module(gallivm
);
2030 JM()->DumpToFile(pFunction
, "optimized");
2032 PFN_TES_FUNC pFunc
=
2033 (PFN_TES_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2035 debug_printf("tess evaluation shader %p\n", pFunc
);
2036 assert(pFunc
&& "Error: TessEvaluationShader = NULL");
2038 JM()->DumpAsm(pFunction
, "asm");
2040 JM()->mIsModuleFinalized
= true;
2046 BuilderSWR::CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
2048 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
2049 struct tgsi_shader_info
*info
= &ctx
->tcs
->info
.base
;
2051 pTS
->numHsInputAttribs
= info
->num_inputs
;
2052 pTS
->numHsOutputAttribs
= info
->num_outputs
;
2054 pTS
->hsAllocationSize
= sizeof(ScalarPatch
);
2056 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
2057 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
2059 struct swr_tess_control_shader
*tcs
= ctx
->tcs
;
2061 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2062 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2064 memset(outputs
, 0, sizeof(outputs
));
2066 AttrBuilder attrBuilder
;
2067 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2069 std::vector
<Type
*> tcsArgs
{
2070 PointerType::get(Gen_swr_draw_context(JM()), 0),
2071 PointerType::get(mInt8Ty
, 0),
2072 PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
2073 FunctionType
*tcsFuncType
=
2074 FunctionType::get(Type::getVoidTy(JM()->mContext
), tcsArgs
, false);
2076 // create new vertex shader function
2077 auto pFunction
= Function::Create(tcsFuncType
,
2078 GlobalValue::ExternalLinkage
,
2080 JM()->mpCurrentModule
);
2082 #if LLVM_VERSION_MAJOR < 5
2083 AttributeSet attrSet
= AttributeSet::get(
2084 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2085 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2087 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2090 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2091 IRB()->SetInsertPoint(block
);
2092 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2094 auto argitr
= pFunction
->arg_begin();
2095 Value
*hPrivateData
= &*argitr
++;
2096 hPrivateData
->setName("hPrivateData");
2097 Value
*pWorkerData
= &*argitr
++;
2098 pWorkerData
->setName("pWorkerData");
2099 Value
*pTcsCtx
= &*argitr
++;
2100 pTcsCtx
->setName("tcsCtx");
2103 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTCS
)});
2104 consts_ptr
->setName("tcs_constants");
2105 Value
*const_sizes_ptr
=
2106 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTCS
});
2107 const_sizes_ptr
->setName("num_tcs_constants");
2109 struct lp_build_sampler_soa
*sampler
=
2110 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_CTRL
);
2111 assert(sampler
!= nullptr);
2113 struct lp_bld_tgsi_system_values system_values
;
2114 memset(&system_values
, 0, sizeof(system_values
));
2116 system_values
.prim_id
=
2117 wrap(LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_PrimitiveID
}));
2119 system_values
.invocation_id
= wrap(VBROADCAST(C(0)));
2120 system_values
.vertices_in
= wrap(C(tcs
->vertices_per_patch
));
2122 if (verbose_shader
) {
2123 lp_build_print_value(gallivm
, "TCS::prim_id = ", system_values
.prim_id
);
2124 lp_build_print_value(gallivm
, "TCS::invocation_id = ", system_values
.invocation_id
);
2125 lp_build_print_value(gallivm
, "TCS::vertices_in = ", system_values
.vertices_in
);
2128 std::vector
<Constant
*> mapConstants
;
2129 Value
*vtxAttribMap
=
2130 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2132 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
2133 ubyte semantic_name
= info
->input_semantic_name
[slot
];
2134 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
2137 locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
2138 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
2140 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
2142 if (ctx
->vs
->info
.base
.output_semantic_name
[0]
2143 == TGSI_SEMANTIC_POSITION
)
2146 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
2147 vs_slot
= VERTEX_POSITION_SLOT
;
2149 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
2150 mapConstants
.push_back(C(vs_slot
));
2153 // Prepare map of output attributes. Needed when shader instance wants
2154 // to read own output or output of other instance, which is allowed in TCS
2155 Value
*vtxOutputAttribMap
=
2156 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2157 // Map for per-patch attributes
2158 Value
*patchOutputAttribMap
=
2159 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2160 for (unsigned slot
= 0; slot
< info
->num_outputs
; slot
++) {
2161 ubyte name
= info
->output_semantic_name
[slot
];
2162 int32_t idx
= info
->output_semantic_index
[slot
];
2163 if (name
== TGSI_SEMANTIC_PATCH
) {
2164 STORE(C(idx
), patchOutputAttribMap
, {0, slot
});
2166 int32_t target_slot
= slot
;
2167 if (name
== TGSI_SEMANTIC_GENERIC
) {
2168 target_slot
+= VERTEX_ATTRIB_START_SLOT
;
2170 // Now normalize target slot
2171 for (ubyte as
= 0; as
< slot
; as
++) {
2172 ubyte name
= info
->output_semantic_name
[as
];
2174 case TGSI_SEMANTIC_TESSOUTER
:
2175 case TGSI_SEMANTIC_TESSINNER
:
2176 case TGSI_SEMANTIC_PATCH
:
2177 case TGSI_SEMANTIC_POSITION
:
2181 if (name
== TGSI_SEMANTIC_POSITION
) {
2182 target_slot
= VERTEX_POSITION_SLOT
;
2184 STORE(C(target_slot
), vtxOutputAttribMap
, {0, slot
});
2185 mapConstants
.push_back(C(target_slot
));
2189 struct lp_build_mask_context mask
;
2190 Value
*mask_val
= LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_mask
}, "tcsMask");
2191 lp_build_mask_begin(
2192 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
2194 struct swr_tcs_llvm_iface tcs_iface
;
2196 tcs_iface
.base
.emit_store_output
= ::swr_tcs_llvm_store_output
;
2197 tcs_iface
.base
.emit_fetch_input
= ::swr_tcs_llvm_fetch_input
;
2198 tcs_iface
.base
.emit_fetch_output
= ::swr_tcs_llvm_fetch_output
;
2199 tcs_iface
.base
.emit_barrier
= ::swr_tcs_llvm_emit_barrier
;
2200 tcs_iface
.base
.emit_prologue
= ::swr_tcs_llvm_emit_prologue
;
2201 tcs_iface
.base
.emit_epilogue
= ::swr_tcs_llvm_emit_epilogue
;
2203 tcs_iface
.pBuilder
= this;
2204 tcs_iface
.pTcsCtx
= pTcsCtx
;
2205 tcs_iface
.pTsState
= pTS
;
2206 tcs_iface
.output_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
2207 tcs_iface
.info
= info
;
2208 tcs_iface
.pVtxAttribMap
= vtxAttribMap
;
2209 tcs_iface
.pVtxOutputAttribMap
= vtxOutputAttribMap
;
2210 tcs_iface
.pPatchOutputAttribMap
= patchOutputAttribMap
;
2212 struct lp_build_tgsi_params params
;
2213 memset(¶ms
, 0, sizeof(params
));
2214 params
.type
= lp_type_float_vec(32, 32 * 8);
2215 params
.mask
= &mask
;
2216 params
.consts_ptr
= wrap(consts_ptr
);
2217 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2218 params
.system_values
= &system_values
;
2219 params
.inputs
= inputs
;
2220 params
.context_ptr
= wrap(hPrivateData
);
2221 params
.sampler
= sampler
;
2222 params
.info
= &tcs
->info
.base
;
2223 params
.tcs_iface
= &tcs_iface
.base
;
2225 lp_build_tgsi_soa(gallivm
, tcs
->pipe
.tokens
, ¶ms
, outputs
);
2227 lp_build_mask_end(&mask
);
2229 sampler
->destroy(sampler
);
2231 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2234 JM()->DumpToFile(pFunction
, "src");
2235 gallivm_verify_function(gallivm
, wrap(pFunction
));
2236 gallivm_compile_module(gallivm
);
2237 JM()->DumpToFile(pFunction
, "optimized");
2239 PFN_TCS_FUNC pFunc
=
2240 (PFN_TCS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2242 debug_printf("tess control shader %p\n", pFunc
);
2243 assert(pFunc
&& "Error: TessControlShader = NULL");
2244 JM()->DumpAsm(pFunction
, "asm");
2246 JM()->mIsModuleFinalized
= true;
2253 swr_compile_gs(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
2256 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2258 PFN_GS_FUNC func
= builder
.CompileGS(ctx
, key
);
2260 ctx
->gs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantGS
>(new VariantGS(builder
.gallivm
, func
))));
2265 swr_compile_tcs(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
2268 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2270 PFN_TCS_FUNC func
= builder
.CompileTCS(ctx
, key
);
2272 ctx
->tcs
->map
.insert(
2273 std::make_pair(key
, std::unique_ptr
<VariantTCS
>(new VariantTCS(builder
.gallivm
, func
))));
2279 swr_compile_tes(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
2282 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2284 PFN_TES_FUNC func
= builder
.CompileTES(ctx
, key
);
2286 ctx
->tes
->map
.insert(
2287 std::make_pair(key
, std::unique_ptr
<VariantTES
>(new VariantTES(builder
.gallivm
, func
))));
2293 BuilderSWR::WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
, unsigned slot
, unsigned channel
)
2295 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
2296 // interleave the simdvertex components into the dest simd16vertex
2297 // slot16offset = slot8offset * 2
2298 // comp16offset = comp8offset * 2 + alternateOffset
2300 Value
*offset
= LOAD(pVsContext
, { 0, SWR_VS_CONTEXT_AlternateOffset
});
2301 Value
*pOut
= GEP(pVtxOutput
, { C(0), C(0), C(slot
* 2), offset
} );
2302 STORE(pVal
, pOut
, {channel
* 2});
2304 Value
*pOut
= GEP(pVtxOutput
, {0, 0, slot
});
2305 STORE(pVal
, pOut
, {0, channel
});
2306 if (verbose_vs_shader
) {
2307 lp_build_printf(gallivm
, "VS: Storing on slot %d, channel %d: ", C(slot
), C(channel
));
2308 lp_build_print_value(gallivm
, "", wrap(pVal
));
2314 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2316 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
2318 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2319 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2321 memset(outputs
, 0, sizeof(outputs
));
2323 AttrBuilder attrBuilder
;
2324 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2326 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2327 PointerType::get(mInt8Ty
, 0),
2328 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
2329 FunctionType
*vsFuncType
=
2330 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
2332 // create new vertex shader function
2333 auto pFunction
= Function::Create(vsFuncType
,
2334 GlobalValue::ExternalLinkage
,
2336 JM()->mpCurrentModule
);
2337 #if LLVM_VERSION_MAJOR < 5
2338 AttributeSet attrSet
= AttributeSet::get(
2339 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2340 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2342 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2345 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2346 IRB()->SetInsertPoint(block
);
2347 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2349 auto argitr
= pFunction
->arg_begin();
2350 Value
*hPrivateData
= &*argitr
++;
2351 hPrivateData
->setName("hPrivateData");
2352 Value
*pWorkerData
= &*argitr
++;
2353 pWorkerData
->setName("pWorkerData");
2354 Value
*pVsCtx
= &*argitr
++;
2355 pVsCtx
->setName("vsCtx");
2357 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
2359 consts_ptr
->setName("vs_constants");
2360 Value
*const_sizes_ptr
=
2361 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
2362 const_sizes_ptr
->setName("num_vs_constants");
2364 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
2366 vtxInput
= BITCAST(vtxInput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2369 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2370 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
2371 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2372 if (mask
& (1 << channel
)) {
2373 inputs
[attrib
][channel
] =
2374 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
2379 struct lp_build_sampler_soa
*sampler
=
2380 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
2381 assert(sampler
!= nullptr);
2383 struct lp_bld_tgsi_system_values system_values
;
2384 memset(&system_values
, 0, sizeof(system_values
));
2385 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
2388 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID16
}));
2390 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
2394 uint32_t vectorWidth
= mVWidth16
;
2396 uint32_t vectorWidth
= mVWidth
;
2399 struct lp_build_tgsi_params params
;
2400 memset(¶ms
, 0, sizeof(params
));
2401 params
.type
= lp_type_float_vec(32, 32 * vectorWidth
);
2402 params
.consts_ptr
= wrap(consts_ptr
);
2403 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2404 params
.system_values
= &system_values
;
2405 params
.inputs
= inputs
;
2406 params
.context_ptr
= wrap(hPrivateData
);
2407 params
.sampler
= sampler
;
2408 params
.info
= &swr_vs
->info
.base
;
2410 lp_build_tgsi_soa(gallivm
,
2411 swr_vs
->pipe
.tokens
,
2415 sampler
->destroy(sampler
);
2417 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2419 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
2421 vtxOutput
= BITCAST(vtxOutput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2424 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2425 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
2426 if (!outputs
[attrib
][channel
])
2432 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
2433 if (channel
!= VERTEX_SGV_POINT_SIZE_COMP
)
2435 val
= LOAD(unwrap(outputs
[attrib
][0]));
2436 outSlot
= VERTEX_SGV_SLOT
;
2437 } else if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
2438 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2439 outSlot
= VERTEX_POSITION_SLOT
;
2441 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2442 outSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
2443 if (swr_vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
2447 WriteVS(val
, pVsCtx
, vtxOutput
, outSlot
, channel
);
2451 if (ctx
->rasterizer
->clip_plane_enable
||
2452 swr_vs
->info
.base
.culldist_writemask
) {
2453 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
2456 if (swr_vs
->info
.base
.writes_clipvertex
) {
2457 cv
= locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
2458 &swr_vs
->info
.base
);
2460 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2461 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
2462 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
2468 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2469 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
2470 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
2471 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
2472 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
2474 tgsi_shader_info
*pLastFE
= &ctx
->vs
->info
.base
;
2477 pLastFE
= &ctx
->gs
->info
.base
;
2479 else if (ctx
->tes
) {
2480 pLastFE
= &ctx
->tes
->info
.base
;
2482 else if (ctx
->tcs
) {
2483 pLastFE
= &ctx
->tcs
->info
.base
;
2486 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
2487 // clip distance overrides user clip planes
2488 if ((pLastFE
->clipdist_writemask
& clip_mask
& (1 << val
)) ||
2489 ((pLastFE
->culldist_writemask
<< pLastFE
->num_written_clipdistance
) & (1 << val
))) {
2490 unsigned cv
= locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1, pLastFE
);
2491 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2493 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
2494 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2496 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
2497 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2502 if (!(clip_mask
& (1 << val
)))
2505 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
2506 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
2507 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
2508 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
2510 Value
*bpx
= VBROADCAST_16(px
);
2511 Value
*bpy
= VBROADCAST_16(py
);
2512 Value
*bpz
= VBROADCAST_16(pz
);
2513 Value
*bpw
= VBROADCAST_16(pw
);
2515 Value
*bpx
= VBROADCAST(px
);
2516 Value
*bpy
= VBROADCAST(py
);
2517 Value
*bpz
= VBROADCAST(pz
);
2518 Value
*bpw
= VBROADCAST(pw
);
2520 Value
*dist
= FADD(FMUL(unwrap(cx
), bpx
),
2521 FADD(FMUL(unwrap(cy
), bpy
),
2522 FADD(FMUL(unwrap(cz
), bpz
),
2523 FMUL(unwrap(cw
), bpw
))));
2526 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2528 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2534 JM()->DumpToFile(pFunction
, "vs_function1");
2535 gallivm_verify_function(gallivm
, wrap(pFunction
));
2536 gallivm_compile_module(gallivm
);
2537 JM()->DumpToFile(pFunction
, "vs_function2");
2539 // lp_debug_dump_value(func);
2541 PFN_VERTEX_FUNC pFunc
=
2542 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2544 JM()->DumpAsm(pFunction
, "vs_function_asm");
2545 debug_printf("vert shader %p\n", pFunc
);
2546 assert(pFunc
&& "Error: VertShader = NULL");
2548 JM()->mIsModuleFinalized
= true;
2554 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2556 if (!ctx
->vs
->pipe
.tokens
)
2560 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2562 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
2564 ctx
->vs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantVS
>(new VariantVS(builder
.gallivm
, func
))));
2569 swr_so_adjust_attrib(unsigned in_attrib
,
2570 swr_vertex_shader
*swr_vs
)
2572 ubyte semantic_name
;
2575 attrib
= in_attrib
+ VERTEX_ATTRIB_START_SLOT
;
2578 semantic_name
= swr_vs
->info
.base
.output_semantic_name
[in_attrib
];
2579 if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
2580 attrib
= VERTEX_POSITION_SLOT
;
2581 } else if (semantic_name
== TGSI_SEMANTIC_PSIZE
) {
2582 attrib
= VERTEX_SGV_SLOT
;
2583 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) {
2584 attrib
= VERTEX_SGV_SLOT
;
2586 if (swr_vs
->info
.base
.writes_position
) {
2596 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
2598 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2599 if ((info
->output_semantic_name
[i
] == name
)
2600 && (info
->output_semantic_index
[i
] == index
)) {
2609 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
2611 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
2613 struct tgsi_shader_info
*pPrevShader
;
2615 pPrevShader
= &ctx
->gs
->info
.base
;
2617 pPrevShader
= &ctx
->tes
->info
.base
;
2619 pPrevShader
= &ctx
->vs
->info
.base
;
2621 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2622 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2624 memset(inputs
, 0, sizeof(inputs
));
2625 memset(outputs
, 0, sizeof(outputs
));
2627 struct lp_build_sampler_soa
*sampler
= NULL
;
2629 AttrBuilder attrBuilder
;
2630 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2632 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2633 PointerType::get(mInt8Ty
, 0),
2634 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
2635 FunctionType
*funcType
=
2636 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
2638 auto pFunction
= Function::Create(funcType
,
2639 GlobalValue::ExternalLinkage
,
2641 JM()->mpCurrentModule
);
2642 #if LLVM_VERSION_MAJOR < 5
2643 AttributeSet attrSet
= AttributeSet::get(
2644 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2645 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2647 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2650 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2651 IRB()->SetInsertPoint(block
);
2652 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2654 auto args
= pFunction
->arg_begin();
2655 Value
*hPrivateData
= &*args
++;
2656 hPrivateData
->setName("hPrivateData");
2657 Value
*pWorkerData
= &*args
++;
2658 pWorkerData
->setName("pWorkerData");
2659 Value
*pPS
= &*args
++;
2660 pPS
->setName("psCtx");
2662 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
2663 consts_ptr
->setName("fs_constants");
2664 Value
*const_sizes_ptr
=
2665 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
2666 const_sizes_ptr
->setName("num_fs_constants");
2668 // load *pAttribs, *pPerspAttribs
2669 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
2670 Value
*pPerspAttribs
=
2671 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
2673 swr_fs
->constantMask
= 0;
2674 swr_fs
->flatConstantMask
= 0;
2675 swr_fs
->pointSpriteMask
= 0;
2677 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2678 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
2679 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
2680 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
2686 Value
*vi
= nullptr, *vj
= nullptr;
2687 switch (interpLoc
) {
2688 case TGSI_INTERPOLATE_LOC_CENTER
:
2689 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
2690 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
2692 case TGSI_INTERPOLATE_LOC_CENTROID
:
2693 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
2694 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
2696 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2697 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
2698 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
2703 Value
*vw
= nullptr, *pAttribs
;
2704 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2705 interpMode
== TGSI_INTERPOLATE_COLOR
) {
2706 pAttribs
= pPerspAttribs
;
2707 switch (interpLoc
) {
2708 case TGSI_INTERPOLATE_LOC_CENTER
:
2709 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
2711 case TGSI_INTERPOLATE_LOC_CENTROID
:
2712 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
2714 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2715 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
2719 pAttribs
= pRawAttribs
;
2725 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
2726 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
2728 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
2730 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
2731 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
2732 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
2734 inputs
[attrib
][0] = wrap(ff
);
2735 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2736 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2737 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2739 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
2740 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
2741 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
2742 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
2743 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
2745 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
2746 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
2748 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
2750 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
2752 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) { // gl_Layer
2753 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_renderTargetArrayIndex
});
2754 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vRenderTargetArrayIndex");
2755 inputs
[attrib
][0] = wrap(ff
);
2756 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2757 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2758 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2760 } else if (semantic_name
== TGSI_SEMANTIC_VIEWPORT_INDEX
) { // gl_ViewportIndex
2761 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_viewportIndex
});
2762 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vViewportIndex");
2763 inputs
[attrib
][0] = wrap(ff
);
2764 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2765 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2766 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2769 unsigned linkedAttrib
=
2770 locate_linkage(semantic_name
, semantic_idx
, pPrevShader
) - 1;
2772 uint32_t extraAttribs
= 0;
2773 if (semantic_name
== TGSI_SEMANTIC_PRIMID
&& !ctx
->gs
) {
2774 /* non-gs generated primID - need to grab from swizzleMap override */
2775 linkedAttrib
= pPrevShader
->num_outputs
- 1;
2776 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2778 } else if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
2779 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
2780 /* we add an extra attrib to the backendState in swr_update_derived. */
2781 linkedAttrib
= pPrevShader
->num_outputs
+ extraAttribs
- 1;
2782 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
2784 } else if (linkedAttrib
+ 1 == 0xFFFFFFFF) {
2785 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
2786 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2787 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2788 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2789 /* If we're reading in color and 2-sided lighting is enabled, we have
2792 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
2795 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2796 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2797 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2798 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
2802 unsigned bcolorAttrib
= 0xFFFFFFFF;
2803 Value
*offset
= NULL
;
2804 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
2805 bcolorAttrib
= locate_linkage(
2806 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, pPrevShader
);
2807 /* Neither front nor back colors were available. Nothing to load. */
2808 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
2810 /* If there is no front color, just always use the back color. */
2811 if (linkedAttrib
+ 1 == 0xFFFFFFFF)
2812 linkedAttrib
= bcolorAttrib
;
2814 if (bcolorAttrib
!= 0xFFFFFFFF) {
2816 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2817 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
2818 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2819 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
2822 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
2826 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
2828 offset
= MUL(back
, C(diff
));
2829 offset
->setName("offset");
2834 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2835 if (mask
& (1 << channel
)) {
2836 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
2837 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
2838 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
2841 indexA
= ADD(indexA
, offset
);
2842 indexB
= ADD(indexB
, offset
);
2843 indexC
= ADD(indexC
, offset
);
2846 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
2847 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
2848 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
2850 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2851 inputs
[attrib
][channel
] = wrap(va
);
2853 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
2857 Value
*interp
= FMUL(va
, vi
);
2858 Value
*interp1
= FMUL(vb
, vj
);
2859 interp
= FADD(interp
, interp1
);
2860 interp
= FADD(interp
, vc
);
2861 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2862 interpMode
== TGSI_INTERPOLATE_COLOR
)
2863 interp
= FMUL(interp
, vw
);
2864 inputs
[attrib
][channel
] = wrap(interp
);
2870 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
2871 assert(sampler
!= nullptr);
2873 struct lp_bld_tgsi_system_values system_values
;
2874 memset(&system_values
, 0, sizeof(system_values
));
2876 struct lp_build_mask_context mask
;
2877 bool uses_mask
= false;
2879 if (swr_fs
->info
.base
.uses_kill
||
2880 key
.poly_stipple_enable
) {
2881 Value
*vActiveMask
= NULL
;
2882 if (swr_fs
->info
.base
.uses_kill
) {
2883 vActiveMask
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
2885 if (key
.poly_stipple_enable
) {
2886 // first get fragment xy coords and clip to stipple bounds
2887 Value
*vXf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
});
2888 Value
*vYf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
});
2889 Value
*vXu
= FP_TO_UI(vXf
, mSimdInt32Ty
);
2890 Value
*vYu
= FP_TO_UI(vYf
, mSimdInt32Ty
);
2892 // stipple pattern is 32x32, which means that one line of stipple
2893 // is stored in one word:
2894 // vXstipple is bit offset inside 32-bit stipple word
2895 // vYstipple is word index is stipple array
2896 Value
*vXstipple
= AND(vXu
, VIMMED1(0x1f)); // & (32-1)
2897 Value
*vYstipple
= AND(vYu
, VIMMED1(0x1f)); // & (32-1)
2899 // grab stipple pattern base address
2900 Value
*stipplePtr
= GEP(hPrivateData
, {0, swr_draw_context_polyStipple
, 0});
2901 stipplePtr
= BITCAST(stipplePtr
, mInt8PtrTy
);
2903 // peform a gather to grab stipple words for each lane
2904 Value
*vStipple
= GATHERDD(VUNDEF_I(), stipplePtr
, vYstipple
,
2905 VIMMED1(0xffffffff), 4);
2907 // create a mask with one bit corresponding to the x stipple
2908 // and AND it with the pattern, to see if we have a bit
2909 Value
*vBitMask
= LSHR(VIMMED1(0x80000000), vXstipple
);
2910 Value
*vStippleMask
= AND(vStipple
, vBitMask
);
2911 vStippleMask
= ICMP_NE(vStippleMask
, VIMMED1(0));
2912 vStippleMask
= VMASK(vStippleMask
);
2914 if (swr_fs
->info
.base
.uses_kill
) {
2915 vActiveMask
= AND(vActiveMask
, vStippleMask
);
2917 vActiveMask
= vStippleMask
;
2920 lp_build_mask_begin(
2921 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask
));
2925 struct lp_build_tgsi_params params
;
2926 memset(¶ms
, 0, sizeof(params
));
2927 params
.type
= lp_type_float_vec(32, 32 * 8);
2928 params
.mask
= uses_mask
? &mask
: NULL
;
2929 params
.consts_ptr
= wrap(consts_ptr
);
2930 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2931 params
.system_values
= &system_values
;
2932 params
.inputs
= inputs
;
2933 params
.context_ptr
= wrap(hPrivateData
);
2934 params
.sampler
= sampler
;
2935 params
.info
= &swr_fs
->info
.base
;
2937 lp_build_tgsi_soa(gallivm
,
2938 swr_fs
->pipe
.tokens
,
2942 sampler
->destroy(sampler
);
2944 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2946 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
2948 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
2949 case TGSI_SEMANTIC_POSITION
: {
2952 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
2953 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
2956 case TGSI_SEMANTIC_COLOR
: {
2957 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2958 if (!outputs
[attrib
][channel
])
2962 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
2963 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
2964 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
2965 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
2968 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
2974 SWR_PS_CONTEXT_shaded
,
2975 swr_fs
->info
.base
.output_semantic_index
[attrib
],
2983 "unknown output from FS %s[%d]\n",
2984 tgsi_semantic_names
[swr_fs
->info
.base
2985 .output_semantic_name
[attrib
]],
2986 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
2992 LLVMValueRef mask_result
= 0;
2994 mask_result
= lp_build_mask_end(&mask
);
2997 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
3000 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
3005 gallivm_verify_function(gallivm
, wrap(pFunction
));
3007 gallivm_compile_module(gallivm
);
3009 // after the gallivm passes, we have to lower the core's intrinsics
3010 llvm::legacy::FunctionPassManager
lowerPass(JM()->mpCurrentModule
);
3011 lowerPass
.add(createLowerX86Pass(this));
3012 lowerPass
.run(*pFunction
);
3014 PFN_PIXEL_KERNEL kernel
=
3015 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
3016 debug_printf("frag shader %p\n", kernel
);
3017 assert(kernel
&& "Error: FragShader = NULL");
3019 JM()->mIsModuleFinalized
= true;
3025 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
3027 if (!ctx
->fs
->pipe
.tokens
)
3031 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
3033 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
3035 ctx
->fs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantFS
>(new VariantFS(builder
.gallivm
, func
))));