1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 #include <llvm/Config/llvm-config.h>
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
42 #include "gen_state_llvm.h"
44 #include "functionpasses/passes.h"
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53 #include "gallivm/lp_bld_const.h"
54 #include "gallivm/lp_bld_printf.h"
55 #include "gallivm/lp_bld_logic.h"
57 #include "swr_context.h"
58 #include "gen_surf_state_llvm.h"
59 #include "gen_swr_context_llvm.h"
60 #include "swr_resource.h"
61 #include "swr_state.h"
62 #include "swr_screen.h"
65 /////////////////////////////////////////////////////////////////////////
70 #include "util/u_debug.h"
71 #include "util/u_memory.h"
72 #include "util/u_string.h"
74 #include "gallivm/lp_bld_type.h"
76 #if defined(DEBUG) && defined(SWR_VERBOSE_SHADER)
77 constexpr bool verbose_shader
= true;
78 constexpr bool verbose_tcs_shader_in
= true;
79 constexpr bool verbose_tcs_shader_out
= true;
80 constexpr bool verbose_tcs_shader_loop
= true;
81 constexpr bool verbose_vs_shader
= true;
83 constexpr bool verbose_shader
= false;
84 constexpr bool verbose_tcs_shader_in
= false;
85 constexpr bool verbose_tcs_shader_out
= false;
86 constexpr bool verbose_tcs_shader_loop
= false;
87 constexpr bool verbose_vs_shader
= false;
90 using namespace SwrJit
;
93 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
95 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
97 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
100 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
102 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
105 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
107 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
110 bool operator==(const swr_jit_gs_key
&lhs
, const swr_jit_gs_key
&rhs
)
112 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
115 bool operator==(const swr_jit_tcs_key
&lhs
, const swr_jit_tcs_key
&rhs
)
117 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
120 bool operator==(const swr_jit_tes_key
&lhs
, const swr_jit_tes_key
&rhs
)
122 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
127 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
128 struct swr_context
*ctx
,
129 enum pipe_shader_type shader_type
,
130 struct swr_jit_sampler_key
&key
)
132 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
134 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
135 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
136 lp_sampler_static_sampler_state(
137 &key
.sampler
[i
].sampler_state
,
138 ctx
->samplers
[shader_type
][i
]);
143 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
144 * are dx10-style? Can't really have mixed opcodes, at least not
145 * if we want to skip the holes here (without rescanning tgsi).
147 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
148 key
.nr_sampler_views
=
149 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
150 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
151 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1u << (i
& 31))) {
152 const struct pipe_sampler_view
*view
=
153 ctx
->sampler_views
[shader_type
][i
];
154 lp_sampler_static_texture_state(
155 &key
.sampler
[i
].texture_state
, view
);
157 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
158 const struct util_format_description
*desc
=
159 util_format_description(view
->format
);
160 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
161 !util_format_has_depth(desc
))
162 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
167 key
.nr_sampler_views
= key
.nr_samplers
;
168 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
169 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
170 const struct pipe_sampler_view
*view
=
171 ctx
->sampler_views
[shader_type
][i
];
172 lp_sampler_static_texture_state(
173 &key
.sampler
[i
].texture_state
, view
);
175 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
176 const struct util_format_description
*desc
=
177 util_format_description(view
->format
);
178 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
179 !util_format_has_depth(desc
))
180 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
188 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
189 struct swr_context
*ctx
,
190 swr_fragment_shader
*swr_fs
)
192 memset((void*)&key
, 0, sizeof(key
));
194 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
195 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
196 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
198 struct tgsi_shader_info
*pPrevShader
;
200 pPrevShader
= &ctx
->gs
->info
.base
;
202 pPrevShader
= &ctx
->tes
->info
.base
;
204 pPrevShader
= &ctx
->vs
->info
.base
;
206 memcpy(&key
.vs_output_semantic_name
,
207 &pPrevShader
->output_semantic_name
,
208 sizeof(key
.vs_output_semantic_name
));
209 memcpy(&key
.vs_output_semantic_idx
,
210 &pPrevShader
->output_semantic_index
,
211 sizeof(key
.vs_output_semantic_idx
));
213 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
215 key
.poly_stipple_enable
= ctx
->rasterizer
->poly_stipple_enable
&&
216 ctx
->poly_stipple
.prim_is_poly
;
220 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
221 struct swr_context
*ctx
,
222 swr_vertex_shader
*swr_vs
)
224 memset((void*)&key
, 0, sizeof(key
));
226 key
.clip_plane_mask
=
227 swr_vs
->info
.base
.clipdist_writemask
?
228 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
229 ctx
->rasterizer
->clip_plane_enable
;
231 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
235 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
236 struct swr_vertex_element_state
*velems
)
238 memset((void*)&key
, 0, sizeof(key
));
240 key
.fsState
= velems
->fsState
;
244 swr_generate_gs_key(struct swr_jit_gs_key
&key
,
245 struct swr_context
*ctx
,
246 swr_geometry_shader
*swr_gs
)
248 memset((void*)&key
, 0, sizeof(key
));
250 struct tgsi_shader_info
*pPrevShader
= nullptr;
253 pPrevShader
= &ctx
->tes
->info
.base
;
255 pPrevShader
= &ctx
->vs
->info
.base
;
258 memcpy(&key
.vs_output_semantic_name
,
259 &pPrevShader
->output_semantic_name
,
260 sizeof(key
.vs_output_semantic_name
));
261 memcpy(&key
.vs_output_semantic_idx
,
262 &pPrevShader
->output_semantic_index
,
263 sizeof(key
.vs_output_semantic_idx
));
265 swr_generate_sampler_key(swr_gs
->info
, ctx
, PIPE_SHADER_GEOMETRY
, key
);
269 swr_generate_tcs_key(struct swr_jit_tcs_key
&key
,
270 struct swr_context
*ctx
,
271 swr_tess_control_shader
*swr_tcs
)
273 memset((void*)&key
, 0, sizeof(key
));
275 struct tgsi_shader_info
*pPrevShader
= &ctx
->vs
->info
.base
;
277 memcpy(&key
.vs_output_semantic_name
,
278 &pPrevShader
->output_semantic_name
,
279 sizeof(key
.vs_output_semantic_name
));
280 memcpy(&key
.vs_output_semantic_idx
,
281 &pPrevShader
->output_semantic_index
,
282 sizeof(key
.vs_output_semantic_idx
));
284 key
.clip_plane_mask
=
285 swr_tcs
->info
.base
.clipdist_writemask
?
286 swr_tcs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
287 ctx
->rasterizer
->clip_plane_enable
;
289 swr_generate_sampler_key(swr_tcs
->info
, ctx
, PIPE_SHADER_TESS_CTRL
, key
);
293 swr_generate_tes_key(struct swr_jit_tes_key
&key
,
294 struct swr_context
*ctx
,
295 swr_tess_evaluation_shader
*swr_tes
)
297 memset((void*)&key
, 0, sizeof(key
));
299 struct tgsi_shader_info
*pPrevShader
= nullptr;
302 pPrevShader
= &ctx
->tcs
->info
.base
;
305 pPrevShader
= &ctx
->vs
->info
.base
;
308 SWR_ASSERT(pPrevShader
!= nullptr, "TES: No TCS or VS defined");
310 memcpy(&key
.prev_output_semantic_name
,
311 &pPrevShader
->output_semantic_name
,
312 sizeof(key
.prev_output_semantic_name
));
313 memcpy(&key
.prev_output_semantic_idx
,
314 &pPrevShader
->output_semantic_index
,
315 sizeof(key
.prev_output_semantic_idx
));
317 key
.clip_plane_mask
=
318 swr_tes
->info
.base
.clipdist_writemask
?
319 swr_tes
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
320 ctx
->rasterizer
->clip_plane_enable
;
322 swr_generate_sampler_key(swr_tes
->info
, ctx
, PIPE_SHADER_TESS_EVAL
, key
);
325 struct BuilderSWR
: public Builder
{
326 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
329 pJitMgr
->SetupNewModule();
330 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
), NULL
);
331 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
335 gallivm_free_ir(gallivm
);
338 void WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
,
339 unsigned slot
, unsigned channel
);
341 struct gallivm_state
*gallivm
;
342 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
343 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
344 PFN_GS_FUNC
CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
);
345 PFN_TCS_FUNC
CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
);
346 PFN_TES_FUNC
CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
);
348 // GS-specific emit functions
350 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
351 struct lp_build_context
* bld
,
352 boolean is_vindex_indirect
,
353 LLVMValueRef vertex_index
,
354 boolean is_aindex_indirect
,
355 LLVMValueRef attrib_index
,
356 LLVMValueRef swizzle_index
);
358 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
359 struct lp_build_context
* bld
,
360 LLVMValueRef (*outputs
)[4],
361 LLVMValueRef emitted_vertices_vec
,
362 LLVMValueRef stream_id
);
365 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
366 struct lp_build_context
* bld
,
367 LLVMValueRef total_emitted_vertices_vec_ptr
,
368 LLVMValueRef verts_per_prim_vec
,
369 LLVMValueRef emitted_prims_vec
,
370 LLVMValueRef mask_vec
);
373 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
374 LLVMValueRef total_emitted_vertices_vec
,
375 LLVMValueRef emitted_prims_vec
, unsigned stream
);
377 // TCS-specific emit functions
378 void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
);
379 void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
);
382 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
383 struct lp_build_tgsi_context
* bld_base
,
384 boolean is_vindex_indirect
,
385 LLVMValueRef vertex_index
,
386 boolean is_aindex_indirect
,
387 LLVMValueRef attrib_index
,
388 LLVMValueRef swizzle_index
);
391 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
392 struct lp_build_tgsi_context
* bld_base
,
393 boolean is_vindex_indirect
,
394 LLVMValueRef vertex_index
,
395 boolean is_aindex_indirect
,
396 LLVMValueRef attrib_index
,
397 LLVMValueRef swizzle_index
,
401 swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
402 struct lp_build_tgsi_context
* bld_base
,
404 boolean is_vindex_indirect
,
405 LLVMValueRef vertex_index
,
406 boolean is_aindex_indirect
,
407 LLVMValueRef attrib_index
,
408 LLVMValueRef swizzle_index
,
410 LLVMValueRef mask_vec
);
412 // Barrier implementation (available only in TCS)
414 swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
415 struct lp_build_tgsi_context
*bld_base
);
417 // TES-specific emit functions
419 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
420 struct lp_build_tgsi_context
* bld_base
,
421 boolean is_vindex_indirect
,
422 LLVMValueRef vertex_index
,
423 boolean is_aindex_indirect
,
424 LLVMValueRef attrib_index
,
425 LLVMValueRef swizzle_index
);
428 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
429 struct lp_build_tgsi_context
* bld_base
,
430 boolean is_aindex_indirect
,
431 LLVMValueRef attrib_index
,
432 LLVMValueRef swizzle_index
);
435 struct swr_gs_llvm_iface
{
436 struct lp_build_gs_iface base
;
437 struct tgsi_shader_info
*info
;
439 BuilderSWR
*pBuilder
;
442 SWR_GS_STATE
*pGsState
;
443 uint32_t num_outputs
;
444 uint32_t num_verts_per_prim
;
446 Value
*pVtxAttribMap
;
449 struct swr_tcs_llvm_iface
{
450 struct lp_build_tcs_iface base
;
451 struct tgsi_shader_info
*info
;
453 BuilderSWR
*pBuilder
;
456 SWR_TS_STATE
*pTsState
;
458 uint32_t output_vertices
;
460 LLVMValueRef loop_var
;
462 Value
*pVtxAttribMap
;
463 Value
*pVtxOutputAttribMap
;
464 Value
*pPatchOutputAttribMap
;
467 struct swr_tes_llvm_iface
{
468 struct lp_build_tes_iface base
;
469 struct tgsi_shader_info
*info
;
471 BuilderSWR
*pBuilder
;
474 SWR_TS_STATE
*pTsState
;
476 uint32_t num_outputs
;
478 Value
*pVtxAttribMap
;
479 Value
*pPatchAttribMap
;
482 // trampoline functions so we can use the builder llvm construction methods
484 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
485 struct lp_build_context
* bld
,
486 boolean is_vindex_indirect
,
487 LLVMValueRef vertex_index
,
488 boolean is_aindex_indirect
,
489 LLVMValueRef attrib_index
,
490 LLVMValueRef swizzle_index
)
492 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
494 return iface
->pBuilder
->swr_gs_llvm_fetch_input(gs_iface
, bld
,
503 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
504 struct lp_build_context
* bld
,
505 LLVMValueRef (*outputs
)[4],
506 LLVMValueRef emitted_vertices_vec
,
507 LLVMValueRef stream_id
)
509 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
511 iface
->pBuilder
->swr_gs_llvm_emit_vertex(gs_base
, bld
,
513 emitted_vertices_vec
,
518 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
519 struct lp_build_context
* bld
,
520 LLVMValueRef total_emitted_vertices_vec_ptr
,
521 LLVMValueRef verts_per_prim_vec
,
522 LLVMValueRef emitted_prims_vec
,
523 LLVMValueRef mask_vec
, unsigned stream_id
)
525 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
527 iface
->pBuilder
->swr_gs_llvm_end_primitive(gs_base
, bld
,
528 total_emitted_vertices_vec_ptr
,
535 swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
536 LLVMValueRef total_emitted_vertices_vec
,
537 LLVMValueRef emitted_prims_vec
, unsigned stream
)
539 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
541 iface
->pBuilder
->swr_gs_llvm_epilogue(gs_base
,
542 total_emitted_vertices_vec
,
543 emitted_prims_vec
, stream
);
547 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
548 struct lp_build_context
* bld
,
549 boolean is_vindex_indirect
,
550 LLVMValueRef vertex_index
,
551 boolean is_aindex_indirect
,
552 LLVMValueRef attrib_index
,
553 LLVMValueRef swizzle_index
)
555 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
556 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
558 return iface
->pBuilder
->swr_tcs_llvm_fetch_input(tcs_iface
, bld_base
,
567 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
568 struct lp_build_context
* bld
,
569 boolean is_vindex_indirect
,
570 LLVMValueRef vertex_index
,
571 boolean is_aindex_indirect
,
572 LLVMValueRef attrib_index
,
573 LLVMValueRef swizzle_index
,
576 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
577 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
579 return iface
->pBuilder
->swr_tcs_llvm_fetch_output(tcs_iface
, bld_base
,
590 swr_tcs_llvm_emit_prologue(struct lp_build_context
* bld
)
592 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
593 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
594 iface
->pBuilder
->swr_tcs_llvm_emit_prologue(bld_base
);
598 swr_tcs_llvm_emit_epilogue(struct lp_build_context
* bld
)
600 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
601 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
602 iface
->pBuilder
->swr_tcs_llvm_emit_epilogue(bld_base
);
606 void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
607 struct lp_build_context
* bld
,
609 boolean is_vindex_indirect
,
610 LLVMValueRef vertex_index
,
611 boolean is_aindex_indirect
,
612 LLVMValueRef attrib_index
,
613 LLVMValueRef swizzle_index
,
615 LLVMValueRef mask_vec
)
617 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
618 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
620 iface
->pBuilder
->swr_tcs_llvm_store_output(tcs_iface
,
634 void swr_tcs_llvm_emit_barrier(struct lp_build_context
*bld
)
636 lp_build_tgsi_soa_context
* bld_base
= (lp_build_tgsi_soa_context
*)bld
;
637 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld_base
->tcs_iface
;
639 iface
->pBuilder
->swr_tcs_llvm_emit_barrier(bld_base
->tcs_iface
, &bld_base
->bld_base
);
644 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
645 struct lp_build_context
* bld
,
646 boolean is_vindex_indirect
,
647 LLVMValueRef vertex_index
,
648 boolean is_aindex_indirect
,
649 LLVMValueRef attrib_index
,
650 LLVMValueRef swizzle_index
)
652 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
653 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
655 return iface
->pBuilder
->swr_tes_llvm_fetch_vtx_input(tes_iface
, bld_base
,
664 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
665 struct lp_build_context
* bld
,
666 boolean is_aindex_indirect
,
667 LLVMValueRef attrib_index
,
668 LLVMValueRef swizzle_index
)
670 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
671 struct lp_build_tgsi_context
*bld_base
= (struct lp_build_tgsi_context
*)bld
;
673 return iface
->pBuilder
->swr_tes_llvm_fetch_patch_input(tes_iface
, bld_base
,
680 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface
*gs_iface
,
681 struct lp_build_context
* bld
,
682 boolean is_vindex_indirect
,
683 LLVMValueRef vertex_index
,
684 boolean is_aindex_indirect
,
685 LLVMValueRef attrib_index
,
686 LLVMValueRef swizzle_index
)
688 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_iface
;
689 Value
*vert_index
= unwrap(vertex_index
);
690 Value
*attr_index
= unwrap(attrib_index
);
692 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
694 if (is_vindex_indirect
|| is_aindex_indirect
) {
696 Value
*res
= unwrap(bld
->zero
);
697 struct lp_type type
= bld
->type
;
699 for (i
= 0; i
< type
.length
; i
++) {
700 Value
*vert_chan_index
= vert_index
;
701 Value
*attr_chan_index
= attr_index
;
703 if (is_vindex_indirect
) {
704 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
706 if (is_aindex_indirect
) {
707 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
711 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
713 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
714 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
716 Value
*pVector
= ADD(MUL(vert_chan_index
, pInputVertStride
), attrib
);
717 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
719 Value
*value
= VEXTRACT(pInput
, C(i
));
720 res
= VINSERT(res
, value
, C(i
));
725 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
727 Value
*pVertex
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pVerts
});
728 Value
*pInputVertStride
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_inputVertStride
});
730 Value
*pVector
= ADD(MUL(vert_index
, pInputVertStride
), attrib
);
732 Value
*pInput
= LOAD(GEP(pVertex
, {pVector
, unwrap(swizzle_index
)}));
738 // GS output stream layout
739 #define VERTEX_COUNT_SIZE 32
740 #define CONTROL_HEADER_SIZE (8*32)
743 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface
*gs_base
,
744 struct lp_build_context
* bld
,
745 LLVMValueRef (*outputs
)[4],
746 LLVMValueRef emitted_vertices_vec
,
747 LLVMValueRef stream_id
)
749 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
751 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
752 const uint32_t headerSize
= VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
;
753 const uint32_t attribSize
= 4 * sizeof(float);
754 const uint32_t vertSize
= attribSize
* SWR_VTX_NUM_SLOTS
;
755 Value
*pVertexOffset
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(vertSize
));
757 Value
*vMask
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_mask
});
758 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, mVWidth
));
760 Value
*pStack
= STACKSAVE();
761 Value
*pTmpPtr
= ALLOCA(mFP32Ty
, C(4)); // used for dummy write for lane masking
763 for (uint32_t attrib
= 0; attrib
< iface
->num_outputs
; ++attrib
) {
764 uint32_t attribSlot
= attrib
;
765 uint32_t sgvChannel
= 0;
766 if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
767 attribSlot
= VERTEX_SGV_SLOT
;
768 sgvChannel
= VERTEX_SGV_POINT_SIZE_COMP
;
769 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_LAYER
) {
770 attribSlot
= VERTEX_SGV_SLOT
;
771 sgvChannel
= VERTEX_SGV_RTAI_COMP
;
772 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_VIEWPORT_INDEX
) {
773 attribSlot
= VERTEX_SGV_SLOT
;
774 sgvChannel
= VERTEX_SGV_VAI_COMP
;
775 } else if (iface
->info
->output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
776 attribSlot
= VERTEX_POSITION_SLOT
;
778 attribSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
779 if (iface
->info
->writes_position
) {
784 Value
*pOutputOffset
= ADD(pVertexOffset
, VIMMED1(headerSize
+ attribSize
* attribSlot
)); // + sgvChannel ?
786 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
787 Value
*pLaneOffset
= VEXTRACT(pOutputOffset
, C(lane
));
788 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
789 Value
*pStreamOffset
= GEP(pStream
, pLaneOffset
);
790 pStreamOffset
= BITCAST(pStreamOffset
, mFP32PtrTy
);
792 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
793 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
795 for (uint32_t channel
= 0; channel
< 4; ++channel
) {
798 if (attribSlot
== VERTEX_SGV_SLOT
)
799 vData
= LOAD(unwrap(outputs
[attrib
][0]));
801 vData
= LOAD(unwrap(outputs
[attrib
][channel
]));
803 if (attribSlot
!= VERTEX_SGV_SLOT
||
804 sgvChannel
== channel
) {
805 vData
= VEXTRACT(vData
, C(lane
));
806 STORE(vData
, pStreamOffset
);
808 pStreamOffset
= GEP(pStreamOffset
, C(1));
813 /* When the output type is not points, the geometry shader may not
814 * output data to multiple streams. So early exit here.
816 if(iface
->pGsState
->outputTopology
!= TOP_POINT_LIST
) {
817 STACKRESTORE(pStack
);
821 // Info about stream id for each vertex
822 // is coded in 2 bits (4 vert per byte "box"):
823 // ----------------- ----------------- ----
824 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
825 // ----------------- ----------------- ----
827 // Calculate where need to put stream id for current vert
829 Value
*pShiftControl
= MUL(unwrap(emitted_vertices_vec
), VIMMED1(2));
831 // Calculate in which box put stream id for current vert.
832 Value
*pOffsetControl
= LSHR(unwrap(emitted_vertices_vec
), VIMMED1(2));
835 Value
*pStreamIdOffset
= ADD(pOffsetControl
, VIMMED1(VERTEX_COUNT_SIZE
));
837 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
838 Value
*pShift
= TRUNC(VEXTRACT(pShiftControl
, C(lane
)), mInt8Ty
);
839 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
841 Value
*pStreamOffset
= GEP(pStream
, VEXTRACT(pStreamIdOffset
, C(lane
)));
843 // Just make sure that not overflow max - stream id = (0,1,2,3)
844 Value
*vVal
= TRUNC(AND(VEXTRACT(unwrap(stream_id
), C(0)), C(0x3)), mInt8Ty
);
846 // Shift it to correct position in byte "box"
847 vVal
= SHL(vVal
, pShift
);
849 // Info about other vertices can be already stored
850 // so we need to read and add bits from current vert info.
851 Value
*storedValue
= LOAD(pStreamOffset
);
852 vVal
= OR(storedValue
, vVal
);
853 STORE(vVal
, pStreamOffset
);
856 STACKRESTORE(pStack
);
860 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface
*gs_base
,
861 struct lp_build_context
* bld
,
862 LLVMValueRef total_emitted_vertices_vec
,
863 LLVMValueRef verts_per_prim_vec
,
864 LLVMValueRef emitted_prims_vec
,
865 LLVMValueRef mask_vec
)
867 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
869 /* When the output type is points, the geometry shader may output data
870 * to multiple streams, and end_primitive has no effect. Info about
871 * stream id for vertices is stored into the same place in memory where
872 * end primitive info is stored so early exit in this case.
874 if (iface
->pGsState
->outputTopology
== TOP_POINT_LIST
) {
878 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
880 Value
*vMask
= LOAD(iface
->pGsCtx
, { 0, SWR_GS_CONTEXT_mask
});
881 Value
*vMask1
= TRUNC(vMask
, VectorType::get(mInt1Ty
, 8));
883 uint32_t vertsPerPrim
= iface
->num_verts_per_prim
;
886 ADD(MUL(unwrap(emitted_prims_vec
), VIMMED1(vertsPerPrim
)),
887 unwrap(verts_per_prim_vec
));
889 vCount
= unwrap(total_emitted_vertices_vec
);
891 Value
*mask
= unwrap(mask_vec
);
892 Value
*cmpMask
= VMASK(ICMP_NE(unwrap(verts_per_prim_vec
), VIMMED1(0)));
893 mask
= AND(mask
, cmpMask
);
894 vMask1
= TRUNC(mask
, VectorType::get(mInt1Ty
, 8));
896 vCount
= SUB(vCount
, VIMMED1(1));
897 Value
*vOffset
= ADD(UDIV(vCount
, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE
));
898 Value
*vValue
= SHL(VIMMED1(1), UREM(vCount
, VIMMED1(8)));
900 vValue
= TRUNC(vValue
, VectorType::get(mInt8Ty
, 8));
902 Value
*pStack
= STACKSAVE();
903 Value
*pTmpPtr
= ALLOCA(mInt8Ty
, C(4)); // used for dummy read/write for lane masking
905 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
) {
906 Value
*vLaneOffset
= VEXTRACT(vOffset
, C(lane
));
907 Value
*pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
908 Value
*pStreamOffset
= GEP(pStream
, vLaneOffset
);
910 Value
*pLaneMask
= VEXTRACT(vMask1
, C(lane
));
911 pStreamOffset
= SELECT(pLaneMask
, pStreamOffset
, pTmpPtr
);
913 Value
*vVal
= LOAD(pStreamOffset
);
914 vVal
= OR(vVal
, VEXTRACT(vValue
, C(lane
)));
915 STORE(vVal
, pStreamOffset
);
918 STACKRESTORE(pStack
);
922 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface
*gs_base
,
923 LLVMValueRef total_emitted_vertices_vec
,
924 LLVMValueRef emitted_prims_vec
, unsigned stream
)
926 swr_gs_llvm_iface
*iface
= (swr_gs_llvm_iface
*)gs_base
;
928 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
930 // Store emit count to each output stream in the first DWORD
931 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
933 Value
* pStream
= LOAD(iface
->pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
934 pStream
= BITCAST(pStream
, mInt32PtrTy
);
935 Value
* pLaneCount
= VEXTRACT(unwrap(total_emitted_vertices_vec
), C(lane
));
936 STORE(pLaneCount
, pStream
);
941 BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context
* bld
)
943 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
945 Value
* loop_var
= ALLOCA(mSimdInt32Ty
);
946 STORE(VBROADCAST(C(0)), loop_var
);
948 iface
->loop_var
= wrap(loop_var
);
950 lp_exec_bgnloop(&bld
->exec_mask
, true);
952 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
953 bld
->system_values
.invocation_id
= wrap((LOAD(unwrap(iface
->loop_var
))));
955 if (verbose_tcs_shader_loop
) {
956 lp_build_print_value(gallivm
, "Prologue LOOP Iteration BEGIN:", bld
->system_values
.invocation_id
);
962 BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context
* bld
)
964 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)bld
->tcs_iface
;
966 struct lp_build_context
*uint_bld
= &bld
->bld_base
.uint_bld
;
968 STORE(ADD(LOAD(unwrap(iface
->loop_var
)), VBROADCAST(C(1))), unwrap(iface
->loop_var
));
969 if (verbose_tcs_shader_loop
) {
970 lp_build_print_value(gallivm
, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface
->loop_var
))));
973 LLVMValueRef tmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_GEQUAL
, wrap(LOAD(unwrap(iface
->loop_var
))),
974 wrap(VBROADCAST(C(iface
->output_vertices
))));
975 lp_exec_mask_cond_push(&bld
->exec_mask
, tmp
);
976 lp_exec_break(&bld
->exec_mask
, &bld
->bld_base
.pc
, false);
977 lp_exec_mask_cond_pop(&bld
->exec_mask
);
978 lp_exec_endloop(bld
->bld_base
.base
.gallivm
, &bld
->exec_mask
);
982 BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface
*tcs_iface
,
983 struct lp_build_tgsi_context
* bld_base
,
984 boolean is_vindex_indirect
,
985 LLVMValueRef vertex_index
,
986 boolean is_aindex_indirect
,
987 LLVMValueRef attrib_index
,
988 LLVMValueRef swizzle_index
)
990 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
992 Value
*vert_index
= unwrap(vertex_index
);
993 Value
*attr_index
= unwrap(attrib_index
);
995 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
997 if (verbose_tcs_shader_in
) {
998 lp_build_printf(gallivm
, "[TCS IN][VTX] ======================================\n");
999 lp_build_print_value(gallivm
, "[TCS IN][VTX] vertex_index: ", vertex_index
);
1000 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1001 lp_build_printf(gallivm
, "[TCS IN][VTX] --------------------------------------\n");
1004 Value
*res
= unwrap(bld_base
->base
.zero
);
1005 if (is_vindex_indirect
|| is_aindex_indirect
) {
1007 struct lp_type type
= bld_base
->base
.type
;
1009 for (i
= 0; i
< type
.length
; i
++) {
1010 Value
*vert_chan_index
= vert_index
;
1011 Value
*attr_chan_index
= attr_index
;
1013 if (is_vindex_indirect
) {
1014 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
1016 if (is_aindex_indirect
) {
1017 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1021 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
1023 Value
*pBase
= GEP(iface
->pTcsCtx
,
1024 { C(0), C(SWR_HS_CONTEXT_vert
), vert_chan_index
,
1025 C(simdvertex_attrib
), attrib
, unwrap(swizzle_index
), C(i
) });
1027 Value
*val
= LOAD(pBase
);
1029 if (verbose_tcs_shader_in
) {
1030 lp_build_print_value(gallivm
, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index
));
1031 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1032 lp_build_print_value(gallivm
, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index
));
1033 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib read from map: ", wrap(attrib
));
1034 lp_build_print_value(gallivm
, "[TCS IN][VTX] swizzle_index: ", swizzle_index
);
1035 lp_build_print_value(gallivm
, "[TCS IN][VTX] Loaded: ", wrap(val
));
1037 res
= VINSERT(res
, val
, C(i
));
1040 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
1042 Value
*pBase
= GEP(iface
->pTcsCtx
,
1043 { C(0), C(SWR_HS_CONTEXT_vert
), vert_index
,
1044 C(simdvertex_attrib
), attrib
, unwrap(swizzle_index
) });
1048 if (verbose_tcs_shader_in
) {
1049 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib_index: ", attrib_index
);
1050 lp_build_print_value(gallivm
, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index
));
1051 lp_build_print_value(gallivm
, "[TCS IN][VTX] attrib read from map: ", wrap(attrib
));
1052 lp_build_print_value(gallivm
, "[TCS IN][VTX] swizzle_index: ", swizzle_index
);
1053 lp_build_print_value(gallivm
, "[TCS IN][VTX] Loaded: ", wrap(res
));
1056 if (verbose_tcs_shader_in
) {
1057 lp_build_print_value(gallivm
, "[TCS IN][VTX] returning: ", wrap(res
));
1063 BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface
*tcs_iface
,
1064 struct lp_build_tgsi_context
* bld_base
,
1065 boolean is_vindex_indirect
,
1066 LLVMValueRef vertex_index
,
1067 boolean is_aindex_indirect
,
1068 LLVMValueRef attrib_index
,
1069 LLVMValueRef swizzle_index
,
1072 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1074 Value
*vert_index
= unwrap(vertex_index
);
1075 Value
*attr_index
= unwrap(attrib_index
);
1077 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1079 if (verbose_tcs_shader_in
) {
1080 lp_build_print_value(gallivm
, "[TCS INOUT] Vertex index: ", vertex_index
);
1081 lp_build_print_value(gallivm
, "[TCS INOUT] Attrib index: ", wrap(attr_index
));
1082 lp_build_print_value(gallivm
, "[TCS INOUT] Swizzle index: ", swizzle_index
);
1085 Value
* res
= unwrap(bld_base
->base
.zero
);
1087 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1088 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1089 Value
* pCpOut
= GEP(p1
, {lane
});
1091 Value
*vert_chan_index
= vert_index
;
1092 Value
*attr_chan_index
= attr_index
;
1094 if (is_vindex_indirect
) {
1095 vert_chan_index
= VEXTRACT(vert_index
, C(lane
));
1096 if (verbose_tcs_shader_in
) {
1097 lp_build_print_value(gallivm
, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index
));
1101 if (is_aindex_indirect
) {
1102 attr_chan_index
= VEXTRACT(attr_index
, C(lane
));
1103 if (verbose_tcs_shader_in
) {
1104 lp_build_print_value(gallivm
, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index
));
1108 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1109 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1110 Value
* tessFactorArray
= nullptr;
1111 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1112 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1114 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1116 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1117 res
= VINSERT(res
, LOAD(tessFactor
), C(lane
));
1118 if (verbose_tcs_shader_in
) {
1119 lp_build_print_value(gallivm
, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane
)));
1120 lp_build_print_value(gallivm
, "[TCS INOUT][FACTOR] loaded value: ", wrap(res
));
1122 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1123 Value
* attr_index_from_map
= LOAD(GEP(iface
->pPatchOutputAttribMap
, {C(0), attr_chan_index
}));
1124 Value
* attr_value
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attr_index_from_map
, unwrap(swizzle_index
)});
1125 res
= VINSERT(res
, LOAD(attr_value
), C(lane
));
1126 if (verbose_tcs_shader_in
) {
1127 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map
));
1128 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane
)));
1129 lp_build_print_value(gallivm
, "[TCS INOUT][PATCH] loaded value: ", wrap(res
));
1132 // Generic attribute
1134 LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_chan_index
}));
1135 if (verbose_tcs_shader_in
) {
1136 lp_build_print_value(gallivm
, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib
));
1138 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
), vert_chan_index
,
1139 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1141 res
= VINSERT(res
, LOAD(attr_chan
), C(lane
));
1142 if (verbose_tcs_shader_in
) {
1143 lp_build_print_value(gallivm
, "[TCS INOUT][VTX] loaded value: ", wrap(res
));
1152 BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface
*tcs_iface
,
1153 struct lp_build_tgsi_context
*bld_base
,
1155 boolean is_vindex_indirect
,
1156 LLVMValueRef vertex_index
,
1157 boolean is_aindex_indirect
,
1158 LLVMValueRef attrib_index
,
1159 LLVMValueRef swizzle_index
,
1161 LLVMValueRef mask_vec
)
1163 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1164 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1166 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1168 if (verbose_tcs_shader_out
) {
1169 lp_build_printf(gallivm
, "[TCS OUT] =============================================\n");
1172 if (verbose_tcs_shader_out
) {
1173 lp_build_print_value(gallivm
, "[TCS OUT] Store mask: ", bld
->exec_mask
.exec_mask
);
1174 lp_build_print_value(gallivm
, "[TCS OUT] Store value: ", value
);
1177 Value
*vert_index
= unwrap(vertex_index
);
1178 Value
*attr_index
= unwrap(attrib_index
);
1180 if (verbose_tcs_shader_out
) {
1181 lp_build_print_value(gallivm
, "[TCS OUT] Vertex index: ", vertex_index
);
1182 lp_build_print_value(gallivm
, "[TCS OUT] Attrib index: ", wrap(attr_index
));
1183 lp_build_print_value(gallivm
, "[TCS OUT] Swizzle index: ", swizzle_index
);
1186 if (is_vindex_indirect
) {
1187 vert_index
= VEXTRACT(vert_index
, C(0));
1188 if (verbose_tcs_shader_out
) {
1189 lp_build_print_value(gallivm
, "[TCS OUT] Extracted vertex index: ", vertex_index
);
1193 if (is_aindex_indirect
) {
1194 attr_index
= VEXTRACT(attr_index
, C(0));
1195 if (verbose_tcs_shader_out
) {
1196 lp_build_print_value(gallivm
, "[TCS OUT] Extracted attrib index: ", wrap(attr_index
));
1200 if (verbose_tcs_shader_out
) {
1201 if (bld
->exec_mask
.has_mask
) {
1202 lp_build_print_value(gallivm
, "[TCS OUT] Exec mask: ", bld
->exec_mask
.exec_mask
);
1205 lp_build_printf(gallivm
, "[TCS OUT] has no mask\n");
1208 for (uint32_t lane
= 0; lane
< mVWidth
; lane
++) {
1209 Value
* p1
= LOAD(iface
->pTcsCtx
, {0, SWR_HS_CONTEXT_pCPout
});
1210 Value
* pCpOut
= GEP(p1
, {lane
});
1212 if (name
== TGSI_SEMANTIC_TESSOUTER
|| name
== TGSI_SEMANTIC_TESSINNER
) {
1213 Value
* tessFactors
= GEP(pCpOut
, {(uint32_t)0, ScalarPatch_tessFactors
});
1214 Value
* tessFactorArray
= nullptr;
1215 if (name
== TGSI_SEMANTIC_TESSOUTER
) {
1216 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors
});
1218 tessFactorArray
= GEP(tessFactors
, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors
});
1220 Value
* tessFactor
= GEP(tessFactorArray
, {C(0), unwrap(swizzle_index
)});
1221 Value
* valueToStore
= VEXTRACT(unwrap(value
), C(lane
));
1222 valueToStore
= BITCAST(valueToStore
, mFP32Ty
);
1224 Value
*originalVal
= LOAD(tessFactor
);
1225 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1226 valueToStore
= SELECT(vMask
, valueToStore
, originalVal
);
1228 STORE(valueToStore
, tessFactor
);
1229 if (verbose_tcs_shader_out
)
1231 lp_build_print_value(gallivm
, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec
);
1232 lp_build_print_value(gallivm
, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore
));
1234 } else if (name
== TGSI_SEMANTIC_PATCH
) {
1235 Value
* attrib
= LOAD(GEP(iface
->pPatchOutputAttribMap
, {C(0), attr_index
}));
1236 if (verbose_tcs_shader_out
) {
1237 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index
));
1238 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index
));
1239 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect
)));
1240 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect
)));
1241 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib
));
1243 Value
* attr
= GEP(pCpOut
, {C(0), C(ScalarPatch_patchData
), C(ScalarCPoint_attrib
), attrib
});
1244 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1245 if (verbose_tcs_shader_out
) {
1246 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane
)));
1247 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] value to store: ", value
);
1248 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store
));
1249 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] chan_index: ", swizzle_index
);
1251 value_to_store
= BITCAST(value_to_store
, mFP32Ty
);
1253 Value
*originalVal
= LOADV(attr
, {C(0), unwrap(swizzle_index
)});
1254 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1255 value_to_store
= SELECT(vMask
, value_to_store
, originalVal
);
1256 if (verbose_tcs_shader_out
) {
1257 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] store mask: ", mask_vec
);
1258 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal
));
1259 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] vMask: ", wrap(vMask
));
1260 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store
));
1263 STOREV(value_to_store
, attr
, {C(0), unwrap(swizzle_index
)});
1264 if (verbose_tcs_shader_out
) {
1265 lp_build_print_value(gallivm
, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store
));
1268 Value
* value_to_store
= VEXTRACT(unwrap(value
), C(lane
));
1269 Value
* attrib
= LOAD(GEP(iface
->pVtxOutputAttribMap
, {C(0), attr_index
}));
1271 if (verbose_tcs_shader_out
) {
1272 lp_build_printf(gallivm
, "[TCS OUT] Writting attribute\n");
1273 lp_build_print_value(gallivm
, "[TCS OUT][VTX] invocation_id: ", bld
->system_values
.invocation_id
);
1274 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index
));
1275 lp_build_print_value(gallivm
, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib
));
1276 lp_build_print_value(gallivm
, "[TCS OUT][VTX] chan_index: ", swizzle_index
);
1277 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value: ", value
);
1278 lp_build_print_value(gallivm
, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store
));
1281 Value
* attr_chan
= GEP(pCpOut
, {C(0), C(ScalarPatch_cp
),
1282 VEXTRACT(unwrap(bld
->system_values
.invocation_id
), C(0)),
1283 C(ScalarCPoint_attrib
), attrib
, unwrap(swizzle_index
)});
1285 // Mask output values if needed
1286 value_to_store
= BITCAST(value_to_store
, mFP32Ty
);
1288 Value
*originalVal
= LOAD(attr_chan
);
1289 Value
*vMask
= TRUNC(VEXTRACT(unwrap(mask_vec
), C(lane
)), mInt1Ty
);
1290 value_to_store
= SELECT(vMask
, value_to_store
, originalVal
);
1292 STORE(value_to_store
, attr_chan
);
1293 if (verbose_tcs_shader_out
) {
1294 lp_build_print_value(gallivm
, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec
);
1295 lp_build_print_value(gallivm
, "[TCS OUT][VTX] stored: ", wrap(value_to_store
));
1302 BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface
*tcs_iface
,
1303 struct lp_build_tgsi_context
*bld_base
)
1305 swr_tcs_llvm_iface
*iface
= (swr_tcs_llvm_iface
*)tcs_iface
;
1306 struct lp_build_tgsi_soa_context
* bld
= (struct lp_build_tgsi_soa_context
*)bld_base
;
1308 if (verbose_tcs_shader_loop
) {
1309 lp_build_print_value(gallivm
, "Barrier LOOP: Iteration %d END\n", iface
->loop_var
);
1312 struct lp_build_context
*uint_bld
= &bld
->bld_base
.uint_bld
;
1314 STORE(ADD(LOAD(unwrap(iface
->loop_var
)), VBROADCAST(C(1))), unwrap(iface
->loop_var
));
1316 LLVMValueRef tmp
= lp_build_cmp(uint_bld
, PIPE_FUNC_GEQUAL
, wrap(LOAD(unwrap(iface
->loop_var
))),
1317 wrap(VBROADCAST(C(iface
->output_vertices
))));
1319 lp_exec_mask_cond_push(&bld
->exec_mask
, tmp
);
1320 lp_exec_break(&bld
->exec_mask
, &bld
->bld_base
.pc
, false);
1321 lp_exec_mask_cond_pop(&bld
->exec_mask
);
1322 lp_exec_endloop(bld
->bld_base
.base
.gallivm
, &bld
->exec_mask
);
1324 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1326 STORE(VBROADCAST(C(0)), unwrap(iface
->loop_var
));
1327 lp_exec_bgnloop(&bld
->exec_mask
, true);
1329 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1331 bld
->system_values
.invocation_id
= wrap((LOAD(unwrap(iface
->loop_var
))));
1333 if (verbose_tcs_shader_loop
) {
1334 lp_build_print_value(gallivm
, "Barrier LOOP: Iteration BEGIN: ", iface
->loop_var
);
1335 lp_build_print_value(gallivm
, "Barrier LOOP: InvocationId: \n", bld
->system_values
.invocation_id
);
1341 BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface
*tes_iface
,
1342 struct lp_build_tgsi_context
* bld_base
,
1343 boolean is_aindex_indirect
,
1344 LLVMValueRef attrib_index
,
1345 LLVMValueRef swizzle_index
)
1347 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1348 Value
*attr_index
= unwrap(attrib_index
);
1349 Value
*res
= unwrap(bld_base
->base
.zero
);
1351 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1353 if (verbose_shader
) {
1354 lp_build_printf(gallivm
, "[TES IN][PATCH] --------------------------------------\n");
1357 if (is_aindex_indirect
) {
1359 struct lp_type type
= bld_base
->base
.type
;
1361 for (i
= 0; i
< type
.length
; i
++) {
1362 Value
*attr_chan_index
= attr_index
;
1364 if (is_aindex_indirect
) {
1365 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1369 LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_chan_index
}));
1371 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1372 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1373 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1374 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1375 if (verbose_shader
) {
1376 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1377 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index
));
1378 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1379 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1380 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1382 res
= VINSERT(res
, Val
, C(i
));
1385 Value
*attrib
= LOAD(GEP(iface
->pPatchAttribMap
, {C(0), attr_index
}));
1387 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1388 Value
*pPatchData
= GEP(pCpIn
, {(uint32_t)0, ScalarPatch_patchData
});
1389 Value
*pAttr
= GEP(pPatchData
, {(uint32_t)0, ScalarCPoint_attrib
});
1390 Value
*Val
= LOADV(pAttr
, {C(0), attrib
, unwrap(swizzle_index
)});
1391 if (verbose_shader
) {
1392 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib_index: ", attrib_index
);
1393 lp_build_print_value(gallivm
, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index
));
1394 lp_build_print_value(gallivm
, "[TES IN][PATCH] attrib read from map: ", wrap(attrib
));
1395 lp_build_print_value(gallivm
, "[TES IN][PATCH] swizzle_index: ", swizzle_index
);
1396 lp_build_print_value(gallivm
, "[TES IN][PATCH] Loaded: ", wrap(Val
));
1398 res
= VBROADCAST(Val
);
1400 if (verbose_shader
) {
1401 lp_build_print_value(gallivm
, "[TES IN][PATCH] returning: ", wrap(res
));
1409 BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface
*tes_iface
,
1410 struct lp_build_tgsi_context
* bld_base
,
1411 boolean is_vindex_indirect
,
1412 LLVMValueRef vertex_index
,
1413 boolean is_aindex_indirect
,
1414 LLVMValueRef attrib_index
,
1415 LLVMValueRef swizzle_index
)
1417 swr_tes_llvm_iface
*iface
= (swr_tes_llvm_iface
*)tes_iface
;
1418 Value
*vert_index
= unwrap(vertex_index
);
1419 Value
*attr_index
= unwrap(attrib_index
);
1421 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1423 if (verbose_shader
) {
1424 lp_build_printf(gallivm
, "[TES IN][VTX] --------------------------------------\n");
1427 Value
*res
= unwrap(bld_base
->base
.zero
);
1428 if (is_vindex_indirect
|| is_aindex_indirect
) {
1430 struct lp_type type
= bld_base
->base
.type
;
1432 for (i
= 0; i
< type
.length
; i
++) {
1433 Value
*vert_chan_index
= vert_index
;
1434 Value
*attr_chan_index
= attr_index
;
1436 if (is_vindex_indirect
) {
1437 vert_chan_index
= VEXTRACT(vert_index
, C(i
));
1439 if (is_aindex_indirect
) {
1440 attr_chan_index
= VEXTRACT(attr_index
, C(i
));
1444 LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_chan_index
}));
1446 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1447 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1448 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_chan_index
});
1449 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1450 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1451 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1452 if (verbose_shader
) {
1453 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1454 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1455 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1456 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1457 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1459 res
= VINSERT(res
, Val
, C(i
));
1462 Value
*attrib
= LOAD(GEP(iface
->pVtxAttribMap
, {C(0), attr_index
}));
1464 Value
*pCpIn
= LOAD(iface
->pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
}, "pCpIn");
1465 Value
*pCp
= GEP(pCpIn
, {0, ScalarPatch_cp
});
1466 Value
*pVertex
= GEP(pCp
, {(Value
*)C(0), vert_index
});
1467 Value
*pAttrTab
= GEP(pVertex
, {uint32_t(0), uint32_t(0)});
1468 Value
*pAttr
= GEP(pAttrTab
, {(Value
*)C(0), attrib
});
1469 Value
*Val
= LOADV(pAttr
, {C(0), unwrap(swizzle_index
)});
1470 if (verbose_shader
) {
1471 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib_index: ", attrib_index
);
1472 lp_build_print_value(gallivm
, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index
));
1473 lp_build_print_value(gallivm
, "[TES IN][VTX] attrib read from map: ", wrap(attrib
));
1474 lp_build_print_value(gallivm
, "[TES IN][VTX] swizzle_index: ", swizzle_index
);
1475 lp_build_print_value(gallivm
, "[TES IN][VTX] Loaded: ", wrap(Val
));
1477 res
= VBROADCAST(Val
);
1479 if (verbose_shader
) {
1480 lp_build_print_value(gallivm
, "[TES IN][VTX] returning: ", wrap(res
));
1489 BuilderSWR::CompileGS(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
1491 SWR_GS_STATE
*pGS
= &ctx
->gs
->gsState
;
1492 struct tgsi_shader_info
*info
= &ctx
->gs
->info
.base
;
1494 memset(pGS
, 0, sizeof(*pGS
));
1496 pGS
->gsEnable
= true;
1498 pGS
->numInputAttribs
= (VERTEX_ATTRIB_START_SLOT
- VERTEX_POSITION_SLOT
) + info
->num_inputs
;
1499 pGS
->outputTopology
=
1500 swr_convert_prim_topology(info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
], 0);
1502 /* It's +1 because emit_vertex in swr is always called exactly one time more
1503 * than max_vertices passed in Geometry Shader. We need to allocate more memory
1504 * to avoid crash/memory overwritten.
1506 pGS
->maxNumVerts
= info
->properties
[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
] + 1;
1507 pGS
->instanceCount
= info
->properties
[TGSI_PROPERTY_GS_INVOCATIONS
];
1509 // If point primitive then assume to use multiple streams
1510 if(pGS
->outputTopology
== TOP_POINT_LIST
) {
1511 pGS
->isSingleStream
= false;
1513 pGS
->isSingleStream
= true;
1514 pGS
->singleStreamID
= 0;
1517 pGS
->vertexAttribOffset
= VERTEX_POSITION_SLOT
;
1518 pGS
->inputVertStride
= pGS
->numInputAttribs
+ pGS
->vertexAttribOffset
;
1519 pGS
->outputVertexSize
= SWR_VTX_NUM_SLOTS
;
1520 pGS
->controlDataSize
= 8; // GS ouputs max of 8 32B units
1521 pGS
->controlDataOffset
= VERTEX_COUNT_SIZE
;
1522 pGS
->outputVertexOffset
= pGS
->controlDataOffset
+ CONTROL_HEADER_SIZE
;
1524 pGS
->allocationSize
=
1525 VERTEX_COUNT_SIZE
+ // vertex count
1526 CONTROL_HEADER_SIZE
+ // control header
1527 (SWR_VTX_NUM_SLOTS
* 16) * // sizeof vertex
1528 pGS
->maxNumVerts
; // num verts
1530 struct swr_geometry_shader
*gs
= ctx
->gs
;
1532 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1533 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1535 memset(outputs
, 0, sizeof(outputs
));
1537 AttrBuilder attrBuilder
;
1538 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1540 std::vector
<Type
*> gsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1541 PointerType::get(mInt8Ty
, 0),
1542 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
1543 FunctionType
*vsFuncType
=
1544 FunctionType::get(Type::getVoidTy(JM()->mContext
), gsArgs
, false);
1546 // create new vertex shader function
1547 auto pFunction
= Function::Create(vsFuncType
,
1548 GlobalValue::ExternalLinkage
,
1550 JM()->mpCurrentModule
);
1551 #if LLVM_VERSION_MAJOR < 5
1552 AttributeSet attrSet
= AttributeSet::get(
1553 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1554 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1556 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1559 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1560 IRB()->SetInsertPoint(block
);
1561 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1563 auto argitr
= pFunction
->arg_begin();
1564 Value
*hPrivateData
= &*argitr
++;
1565 hPrivateData
->setName("hPrivateData");
1566 Value
*pWorkerData
= &*argitr
++;
1567 pWorkerData
->setName("pWorkerData");
1568 Value
*pGsCtx
= &*argitr
++;
1569 pGsCtx
->setName("gsCtx");
1572 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantGS
)});
1573 consts_ptr
->setName("gs_constants");
1574 Value
*const_sizes_ptr
=
1575 GEP(hPrivateData
, {0, swr_draw_context_num_constantsGS
});
1576 const_sizes_ptr
->setName("num_gs_constants");
1578 struct lp_build_sampler_soa
*sampler
=
1579 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_GEOMETRY
);
1580 assert(sampler
!= nullptr);
1582 struct lp_bld_tgsi_system_values system_values
;
1583 memset(&system_values
, 0, sizeof(system_values
));
1584 system_values
.prim_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_PrimitiveID
}));
1585 system_values
.invocation_id
= wrap(LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_InstanceID
}));
1587 std::vector
<Constant
*> mapConstants
;
1588 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1589 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1590 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1591 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1593 unsigned vs_slot
= locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
1594 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1596 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
1598 if (ctx
->vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
1601 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
1602 vs_slot
= VERTEX_POSITION_SLOT
;
1604 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
1605 mapConstants
.push_back(C(vs_slot
));
1608 struct lp_build_mask_context mask
;
1609 Value
*mask_val
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_mask
}, "gsMask");
1610 lp_build_mask_begin(&mask
, gallivm
,
1611 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1613 // zero out cut buffer so we can load/modify/store bits
1614 for (uint32_t lane
= 0; lane
< mVWidth
; ++lane
)
1616 Value
* pStream
= LOAD(pGsCtx
, {0, SWR_GS_CONTEXT_pStreams
, lane
});
1617 #if LLVM_VERSION_MAJOR >= 10
1618 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH
));
1620 MEMSET(pStream
, C((char)0), VERTEX_COUNT_SIZE
+ CONTROL_HEADER_SIZE
, sizeof(float) * KNOB_SIMD_WIDTH
);
1624 struct swr_gs_llvm_iface gs_iface
;
1625 gs_iface
.base
.fetch_input
= ::swr_gs_llvm_fetch_input
;
1626 gs_iface
.base
.emit_vertex
= ::swr_gs_llvm_emit_vertex
;
1627 gs_iface
.base
.end_primitive
= ::swr_gs_llvm_end_primitive
;
1628 gs_iface
.base
.gs_epilogue
= ::swr_gs_llvm_epilogue
;
1629 gs_iface
.pBuilder
= this;
1630 gs_iface
.pGsCtx
= pGsCtx
;
1631 gs_iface
.pGsState
= pGS
;
1632 gs_iface
.num_outputs
= gs
->info
.base
.num_outputs
;
1633 gs_iface
.num_verts_per_prim
=
1634 u_vertices_per_prim((pipe_prim_type
)info
->properties
[TGSI_PROPERTY_GS_OUTPUT_PRIM
]);
1635 gs_iface
.info
= info
;
1636 gs_iface
.pVtxAttribMap
= vtxAttribMap
;
1638 struct lp_build_tgsi_params params
;
1639 memset(¶ms
, 0, sizeof(params
));
1640 params
.type
= lp_type_float_vec(32, 32 * 8);
1641 params
.mask
= & mask
;
1642 params
.consts_ptr
= wrap(consts_ptr
);
1643 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1644 params
.system_values
= &system_values
;
1645 params
.inputs
= inputs
;
1646 params
.context_ptr
= wrap(hPrivateData
);
1647 params
.sampler
= sampler
;
1648 params
.info
= &gs
->info
.base
;
1649 params
.gs_iface
= &gs_iface
.base
;
1651 lp_build_tgsi_soa(gallivm
,
1656 lp_build_mask_end(&mask
);
1658 sampler
->destroy(sampler
);
1660 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1664 gallivm_verify_function(gallivm
, wrap(pFunction
));
1665 gallivm_compile_module(gallivm
);
1668 (PFN_GS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
1670 debug_printf("geom shader %p\n", pFunc
);
1671 assert(pFunc
&& "Error: GeomShader = NULL");
1673 JM()->mIsModuleFinalized
= true;
1679 BuilderSWR::CompileTES(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
1681 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
1682 struct tgsi_shader_info
*info
= &ctx
->tes
->info
.base
;
1684 // tessellation is enabled if TES is present
1685 // clear tessellation state here then
1686 memset(pTS
, 0, sizeof(*pTS
));
1688 pTS
->tsEnable
= true;
1690 unsigned tes_prim_mode
= info
->properties
[TGSI_PROPERTY_TES_PRIM_MODE
];
1691 unsigned tes_spacing
= info
->properties
[TGSI_PROPERTY_TES_SPACING
];
1692 bool tes_vertex_order_cw
= info
->properties
[TGSI_PROPERTY_TES_VERTEX_ORDER_CW
];
1693 bool tes_point_mode
= info
->properties
[TGSI_PROPERTY_TES_POINT_MODE
];
1694 SWR_TS_DOMAIN type
= SWR_TS_ISOLINE
;
1695 SWR_TS_PARTITIONING partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1696 SWR_TS_OUTPUT_TOPOLOGY topology
= SWR_TS_OUTPUT_POINT
;
1697 PRIMITIVE_TOPOLOGY postDSTopology
= TOP_POINT_LIST
;
1699 // TESS_TODO: move this to helper functions to improve readability
1700 switch (tes_prim_mode
) {
1701 case PIPE_PRIM_LINES
:
1702 type
= SWR_TS_ISOLINE
;
1703 postDSTopology
= TOP_LINE_LIST
;
1705 case PIPE_PRIM_TRIANGLES
:
1707 postDSTopology
= TOP_TRIANGLE_LIST
;
1709 case PIPE_PRIM_QUADS
:
1711 // See OpenGL spec - quads are tessellated into triangles
1712 postDSTopology
= TOP_TRIANGLE_LIST
;
1718 switch (tes_spacing
) {
1719 case PIPE_TESS_SPACING_FRACTIONAL_ODD
:
1720 partitioning
= SWR_TS_ODD_FRACTIONAL
;
1722 case PIPE_TESS_SPACING_FRACTIONAL_EVEN
:
1723 partitioning
= SWR_TS_EVEN_FRACTIONAL
;
1725 case PIPE_TESS_SPACING_EQUAL
:
1726 partitioning
= SWR_TS_INTEGER
;
1732 if (tes_point_mode
) {
1733 topology
= SWR_TS_OUTPUT_POINT
;
1734 postDSTopology
= TOP_POINT_LIST
;
1736 else if (tes_prim_mode
== PIPE_PRIM_LINES
) {
1737 topology
= SWR_TS_OUTPUT_LINE
;
1739 else if (tes_vertex_order_cw
) {
1740 topology
= SWR_TS_OUTPUT_TRI_CW
;
1743 topology
= SWR_TS_OUTPUT_TRI_CCW
;
1747 pTS
->tsOutputTopology
= topology
;
1748 pTS
->partitioning
= partitioning
;
1749 pTS
->numDsOutputAttribs
= info
->num_outputs
;
1750 pTS
->postDSTopology
= postDSTopology
;
1752 pTS
->dsAllocationSize
= SWR_VTX_NUM_SLOTS
* MAX_NUM_VERTS_PER_PRIM
;
1753 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1754 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1755 pTS
->dsOutVtxAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
1757 struct swr_tess_evaluation_shader
*tes
= ctx
->tes
;
1759 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
1760 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
1762 memset(outputs
, 0, sizeof(outputs
));
1764 AttrBuilder attrBuilder
;
1765 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
1767 std::vector
<Type
*> tesArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
1768 PointerType::get(mInt8Ty
, 0),
1769 PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
1770 FunctionType
*tesFuncType
=
1771 FunctionType::get(Type::getVoidTy(JM()->mContext
), tesArgs
, false);
1773 // create new vertex shader function
1774 auto pFunction
= Function::Create(tesFuncType
,
1775 GlobalValue::ExternalLinkage
,
1777 JM()->mpCurrentModule
);
1779 #if LLVM_VERSION_MAJOR < 5
1780 AttributeSet attrSet
= AttributeSet::get(
1781 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
1782 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
1784 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
1787 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
1788 IRB()->SetInsertPoint(block
);
1789 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
1791 auto argitr
= pFunction
->arg_begin();
1792 Value
*hPrivateData
= &*argitr
++;
1793 hPrivateData
->setName("hPrivateData");
1794 Value
*pWorkerData
= &*argitr
++;
1795 pWorkerData
->setName("pWorkerData");
1796 Value
*pTesCtx
= &*argitr
++;
1797 pTesCtx
->setName("tesCtx");
1800 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTES
)});
1801 consts_ptr
->setName("tes_constants");
1802 Value
*const_sizes_ptr
=
1803 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTES
});
1804 const_sizes_ptr
->setName("num_tes_constants");
1806 struct lp_build_sampler_soa
*sampler
=
1807 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_EVAL
);
1808 assert(sampler
!= nullptr);
1810 struct lp_bld_tgsi_system_values system_values
;
1811 memset(&system_values
, 0, sizeof(system_values
));
1813 // Load and calculate system values
1814 // Tessellation coordinates (gl_TessCoord)
1815 Value
*vecOffset
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
}, "vecOffset");
1816 Value
*vecStride
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorStride
}, "vecStride");
1817 Value
*vecIndex
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_vectorOffset
});
1819 Value
* tess_coord
= ALLOCA(ArrayType::get(mSimdFP32Ty
, 3));
1821 Value
*tessCoordU
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainU
}), {vecIndex
}, "tessCoordU");
1822 STORE(tessCoordU
, tess_coord
, {0, 0});
1823 Value
*tessCoordV
= LOADV(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pDomainV
}), {vecIndex
}, "tessCoordV");
1824 STORE(tessCoordV
, tess_coord
, {0, 1});
1825 Value
*tessCoordW
= FSUB(FSUB(VIMMED1(1.0f
), tessCoordU
), tessCoordV
, "tessCoordW");
1826 STORE(tessCoordW
, tess_coord
, {0, 2});
1827 system_values
.tess_coord
= wrap(tess_coord
);
1830 system_values
.prim_id
= wrap(VBROADCAST(LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_PrimitiveID
}), "PrimitiveID"));
1832 // Tessellation factors
1833 Value
* pPatch
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pCpIn
});
1834 Value
* pTessFactors
= GEP(pPatch
, {C(0), C(ScalarPatch_tessFactors
)});
1836 assert(SWR_NUM_OUTER_TESS_FACTORS
== 4);
1837 Value
* sys_value_outer_factors
= UndefValue::get(VectorType::get(mFP32Ty
, 4));
1838 for (unsigned i
= 0; i
< SWR_NUM_OUTER_TESS_FACTORS
; i
++) {
1839 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors
, i
});
1840 sys_value_outer_factors
= VINSERT(sys_value_outer_factors
, v
, i
, "gl_TessLevelOuter");
1842 system_values
.tess_outer
= wrap(sys_value_outer_factors
);
1844 assert(SWR_NUM_INNER_TESS_FACTORS
== 2);
1845 Value
* sys_value_inner_factors
= UndefValue::get(VectorType::get(mFP32Ty
, 4));
1846 for (unsigned i
= 0; i
< SWR_NUM_INNER_TESS_FACTORS
; i
++) {
1847 Value
* v
= LOAD(pTessFactors
, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors
, i
});
1848 sys_value_inner_factors
= VINSERT(sys_value_inner_factors
, v
, i
, "gl_TessLevelInner");
1850 system_values
.tess_inner
= wrap(sys_value_inner_factors
);
1854 lp_build_print_value(gallivm
, "tess_coord = ", system_values
.tess_coord
);
1857 struct tgsi_shader_info
*pPrevShader
= nullptr;
1860 pPrevShader
= &ctx
->tcs
->info
.base
;
1863 pPrevShader
= &ctx
->vs
->info
.base
;
1866 // Figure out how many per-patch attributes we have
1867 unsigned perPatchAttrs
= 0;
1868 unsigned genericAttrs
= 0;
1869 unsigned tessLevelAttrs
= 0;
1870 unsigned sgvAttrs
= 0;
1871 for (unsigned slot
= 0; slot
< pPrevShader
->num_outputs
; slot
++) {
1872 switch (pPrevShader
->output_semantic_name
[slot
]) {
1873 case TGSI_SEMANTIC_PATCH
:
1876 case TGSI_SEMANTIC_GENERIC
:
1879 case TGSI_SEMANTIC_TESSINNER
:
1880 case TGSI_SEMANTIC_TESSOUTER
:
1883 case TGSI_SEMANTIC_POSITION
:
1884 case TGSI_SEMANTIC_CLIPDIST
:
1885 case TGSI_SEMANTIC_PSIZE
:
1889 assert(!"Unknown semantic input in TES");
1893 std::vector
<Constant
*> mapConstants
;
1894 Value
*vtxAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1895 Value
*patchAttribMap
= ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
1896 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
1897 ubyte semantic_name
= info
->input_semantic_name
[slot
];
1898 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
1900 // Where in TCS output is my attribute?
1901 // TESS_TODO: revisit after implement pass-through TCS
1902 unsigned tcs_slot
= locate_linkage(semantic_name
, semantic_idx
, pPrevShader
);
1903 assert(tcs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
1905 // Skip tessellation levels - these go to the tessellator, not TES
1906 switch (semantic_name
) {
1907 case TGSI_SEMANTIC_GENERIC
:
1908 tcs_slot
= tcs_slot
+ VERTEX_ATTRIB_START_SLOT
- sgvAttrs
- tessLevelAttrs
;
1910 case TGSI_SEMANTIC_PATCH
:
1911 tcs_slot
= semantic_idx
;
1913 case TGSI_SEMANTIC_POSITION
:
1914 tcs_slot
= VERTEX_POSITION_SLOT
;
1916 case TGSI_SEMANTIC_CLIPDIST
:
1917 case TGSI_SEMANTIC_PSIZE
:
1920 assert(!"Unexpected semantic found while builiding TES input map");
1922 if (semantic_name
== TGSI_SEMANTIC_PATCH
) {
1923 STORE(C(tcs_slot
), patchAttribMap
, {0, slot
});
1925 STORE(C(tcs_slot
), vtxAttribMap
, {0, slot
});
1927 mapConstants
.push_back(C(tcs_slot
));
1930 // Build execution mask
1931 struct lp_build_mask_context mask
;
1932 Value
*mask_val
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_mask
}, "tesMask");
1935 lp_build_print_value(gallivm
, "TES execution mask: ", wrap(mask_val
));
1937 lp_build_mask_begin(&mask
, gallivm
,
1938 lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
1940 struct swr_tes_llvm_iface tes_iface
;
1942 tes_iface
.base
.fetch_vertex_input
= ::swr_tes_llvm_fetch_vtx_input
;
1943 tes_iface
.base
.fetch_patch_input
= ::swr_tes_llvm_fetch_patch_input
;
1945 tes_iface
.pBuilder
= this;
1946 tes_iface
.pTesCtx
= pTesCtx
;
1947 tes_iface
.pTsState
= pTS
;
1948 tes_iface
.num_outputs
= tes
->info
.base
.num_outputs
;
1949 tes_iface
.info
= info
;
1950 tes_iface
.pVtxAttribMap
= vtxAttribMap
;
1951 tes_iface
.pPatchAttribMap
= patchAttribMap
;
1953 struct lp_build_tgsi_params params
;
1954 memset(¶ms
, 0, sizeof(params
));
1955 params
.type
= lp_type_float_vec(32, 32 * 8);
1956 params
.mask
= & mask
;
1957 params
.consts_ptr
= wrap(consts_ptr
);
1958 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
1959 params
.system_values
= &system_values
;
1960 params
.inputs
= inputs
;
1961 params
.context_ptr
= wrap(hPrivateData
);
1962 params
.sampler
= sampler
;
1963 params
.info
= &tes
->info
.base
;
1964 params
.tes_iface
= &tes_iface
.base
;
1967 lp_build_tgsi_soa(gallivm
,
1972 lp_build_mask_end(&mask
);
1974 sampler
->destroy(sampler
);
1976 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
1978 // Write output attributes
1979 Value
*dclOut
= LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_pOutputData
}, "dclOut");
1981 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
1982 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
1983 if (!outputs
[attrib
][channel
])
1986 Value
*val
= LOAD(unwrap(outputs
[attrib
][channel
]));;
1987 Value
*attribOffset
=
1988 LOAD(pTesCtx
, {0, SWR_DS_CONTEXT_outVertexAttribOffset
});
1990 // Assume we write possition
1991 Value
* outputSlot
= C(VERTEX_POSITION_SLOT
);
1992 if (tes
->info
.base
.output_semantic_name
[attrib
] != TGSI_SEMANTIC_POSITION
) {
1993 // No, it's a generic attribute, not a position - let's calculate output slot
1994 uint32_t outSlot
= attrib
;
1995 if (tes
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
) {
1996 // this shader will write position, so in shader's term
1997 // output starts at attrib 1, but we will handle that separately,
1998 // so let's fix the outSlot
2001 outputSlot
= ADD(attribOffset
, C(outSlot
));
2004 Value
*attribVecIndex
=
2005 ADD(MUL(vecStride
, MUL(outputSlot
, C(4))), vecOffset
);
2007 uint32_t outputComponent
= 0;
2008 uint32_t curComp
= outputComponent
+ channel
;
2009 auto outValIndex
= ADD(attribVecIndex
, MUL(vecStride
, C(curComp
)));
2010 STOREV(val
, dclOut
, {outValIndex
});
2012 if (verbose_shader
) {
2013 lp_build_printf(gallivm
,
2014 "TES output [%d][%d]",
2017 lp_build_print_value(gallivm
, " = ", wrap(val
));
2024 JM()->DumpToFile(pFunction
, "src");
2025 gallivm_verify_function(gallivm
, wrap(pFunction
));
2027 gallivm_compile_module(gallivm
);
2028 JM()->DumpToFile(pFunction
, "optimized");
2030 PFN_TES_FUNC pFunc
=
2031 (PFN_TES_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2033 debug_printf("tess evaluation shader %p\n", pFunc
);
2034 assert(pFunc
&& "Error: TessEvaluationShader = NULL");
2036 JM()->DumpAsm(pFunction
, "asm");
2038 JM()->mIsModuleFinalized
= true;
2044 BuilderSWR::CompileTCS(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
2046 SWR_TS_STATE
*pTS
= &ctx
->tsState
;
2047 struct tgsi_shader_info
*info
= &ctx
->tcs
->info
.base
;
2049 pTS
->numHsInputAttribs
= info
->num_inputs
;
2050 pTS
->numHsOutputAttribs
= info
->num_outputs
;
2052 pTS
->hsAllocationSize
= sizeof(ScalarPatch
);
2054 pTS
->vertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
2055 pTS
->srcVertexAttribOffset
= VERTEX_ATTRIB_START_SLOT
;
2057 struct swr_tess_control_shader
*tcs
= ctx
->tcs
;
2059 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2060 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2062 memset(outputs
, 0, sizeof(outputs
));
2064 AttrBuilder attrBuilder
;
2065 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2067 std::vector
<Type
*> tcsArgs
{
2068 PointerType::get(Gen_swr_draw_context(JM()), 0),
2069 PointerType::get(mInt8Ty
, 0),
2070 PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
2071 FunctionType
*tcsFuncType
=
2072 FunctionType::get(Type::getVoidTy(JM()->mContext
), tcsArgs
, false);
2074 // create new vertex shader function
2075 auto pFunction
= Function::Create(tcsFuncType
,
2076 GlobalValue::ExternalLinkage
,
2078 JM()->mpCurrentModule
);
2080 #if LLVM_VERSION_MAJOR < 5
2081 AttributeSet attrSet
= AttributeSet::get(
2082 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2083 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2085 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2088 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2089 IRB()->SetInsertPoint(block
);
2090 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2092 auto argitr
= pFunction
->arg_begin();
2093 Value
*hPrivateData
= &*argitr
++;
2094 hPrivateData
->setName("hPrivateData");
2095 Value
*pWorkerData
= &*argitr
++;
2096 pWorkerData
->setName("pWorkerData");
2097 Value
*pTcsCtx
= &*argitr
++;
2098 pTcsCtx
->setName("tcsCtx");
2101 GEP(hPrivateData
, {C(0), C(swr_draw_context_constantTCS
)});
2102 consts_ptr
->setName("tcs_constants");
2103 Value
*const_sizes_ptr
=
2104 GEP(hPrivateData
, {0, swr_draw_context_num_constantsTCS
});
2105 const_sizes_ptr
->setName("num_tcs_constants");
2107 struct lp_build_sampler_soa
*sampler
=
2108 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_TESS_CTRL
);
2109 assert(sampler
!= nullptr);
2111 struct lp_bld_tgsi_system_values system_values
;
2112 memset(&system_values
, 0, sizeof(system_values
));
2114 system_values
.prim_id
=
2115 wrap(LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_PrimitiveID
}));
2117 system_values
.invocation_id
= wrap(VBROADCAST(C(0)));
2118 system_values
.vertices_in
= wrap(C(tcs
->vertices_per_patch
));
2120 if (verbose_shader
) {
2121 lp_build_print_value(gallivm
, "TCS::prim_id = ", system_values
.prim_id
);
2122 lp_build_print_value(gallivm
, "TCS::invocation_id = ", system_values
.invocation_id
);
2123 lp_build_print_value(gallivm
, "TCS::vertices_in = ", system_values
.vertices_in
);
2126 std::vector
<Constant
*> mapConstants
;
2127 Value
*vtxAttribMap
=
2128 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2130 for (unsigned slot
= 0; slot
< info
->num_inputs
; slot
++) {
2131 ubyte semantic_name
= info
->input_semantic_name
[slot
];
2132 ubyte semantic_idx
= info
->input_semantic_index
[slot
];
2135 locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
2136 assert(vs_slot
< PIPE_MAX_SHADER_OUTPUTS
);
2138 vs_slot
+= VERTEX_ATTRIB_START_SLOT
;
2140 if (ctx
->vs
->info
.base
.output_semantic_name
[0]
2141 == TGSI_SEMANTIC_POSITION
)
2144 if (semantic_name
== TGSI_SEMANTIC_POSITION
)
2145 vs_slot
= VERTEX_POSITION_SLOT
;
2147 STORE(C(vs_slot
), vtxAttribMap
, {0, slot
});
2148 mapConstants
.push_back(C(vs_slot
));
2151 // Prepare map of output attributes. Needed when shader instance wants
2152 // to read own output or output of other instance, which is allowed in TCS
2153 Value
*vtxOutputAttribMap
=
2154 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2155 // Map for per-patch attributes
2156 Value
*patchOutputAttribMap
=
2157 ALLOCA(ArrayType::get(mInt32Ty
, PIPE_MAX_SHADER_INPUTS
));
2158 for (unsigned slot
= 0; slot
< info
->num_outputs
; slot
++) {
2159 ubyte name
= info
->output_semantic_name
[slot
];
2160 int32_t idx
= info
->output_semantic_index
[slot
];
2161 if (name
== TGSI_SEMANTIC_PATCH
) {
2162 STORE(C(idx
), patchOutputAttribMap
, {0, slot
});
2164 int32_t target_slot
= slot
;
2165 if (name
== TGSI_SEMANTIC_GENERIC
) {
2166 target_slot
+= VERTEX_ATTRIB_START_SLOT
;
2168 // Now normalize target slot
2169 for (ubyte as
= 0; as
< slot
; as
++) {
2170 ubyte name
= info
->output_semantic_name
[as
];
2172 case TGSI_SEMANTIC_TESSOUTER
:
2173 case TGSI_SEMANTIC_TESSINNER
:
2174 case TGSI_SEMANTIC_PATCH
:
2175 case TGSI_SEMANTIC_POSITION
:
2179 if (name
== TGSI_SEMANTIC_POSITION
) {
2180 target_slot
= VERTEX_POSITION_SLOT
;
2182 STORE(C(target_slot
), vtxOutputAttribMap
, {0, slot
});
2183 mapConstants
.push_back(C(target_slot
));
2187 struct lp_build_mask_context mask
;
2188 Value
*mask_val
= LOAD(pTcsCtx
, {0, SWR_HS_CONTEXT_mask
}, "tcsMask");
2189 lp_build_mask_begin(
2190 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
2192 struct swr_tcs_llvm_iface tcs_iface
;
2194 tcs_iface
.base
.emit_store_output
= ::swr_tcs_llvm_store_output
;
2195 tcs_iface
.base
.emit_fetch_input
= ::swr_tcs_llvm_fetch_input
;
2196 tcs_iface
.base
.emit_fetch_output
= ::swr_tcs_llvm_fetch_output
;
2197 tcs_iface
.base
.emit_barrier
= ::swr_tcs_llvm_emit_barrier
;
2198 tcs_iface
.base
.emit_prologue
= ::swr_tcs_llvm_emit_prologue
;
2199 tcs_iface
.base
.emit_epilogue
= ::swr_tcs_llvm_emit_epilogue
;
2201 tcs_iface
.pBuilder
= this;
2202 tcs_iface
.pTcsCtx
= pTcsCtx
;
2203 tcs_iface
.pTsState
= pTS
;
2204 tcs_iface
.output_vertices
= info
->properties
[TGSI_PROPERTY_TCS_VERTICES_OUT
];
2205 tcs_iface
.info
= info
;
2206 tcs_iface
.pVtxAttribMap
= vtxAttribMap
;
2207 tcs_iface
.pVtxOutputAttribMap
= vtxOutputAttribMap
;
2208 tcs_iface
.pPatchOutputAttribMap
= patchOutputAttribMap
;
2210 struct lp_build_tgsi_params params
;
2211 memset(¶ms
, 0, sizeof(params
));
2212 params
.type
= lp_type_float_vec(32, 32 * 8);
2213 params
.mask
= &mask
;
2214 params
.consts_ptr
= wrap(consts_ptr
);
2215 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2216 params
.system_values
= &system_values
;
2217 params
.inputs
= inputs
;
2218 params
.context_ptr
= wrap(hPrivateData
);
2219 params
.sampler
= sampler
;
2220 params
.info
= &tcs
->info
.base
;
2221 params
.tcs_iface
= &tcs_iface
.base
;
2223 lp_build_tgsi_soa(gallivm
, tcs
->pipe
.tokens
, ¶ms
, outputs
);
2225 lp_build_mask_end(&mask
);
2227 sampler
->destroy(sampler
);
2229 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2232 JM()->DumpToFile(pFunction
, "src");
2233 gallivm_verify_function(gallivm
, wrap(pFunction
));
2234 gallivm_compile_module(gallivm
);
2235 JM()->DumpToFile(pFunction
, "optimized");
2237 PFN_TCS_FUNC pFunc
=
2238 (PFN_TCS_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2240 debug_printf("tess control shader %p\n", pFunc
);
2241 assert(pFunc
&& "Error: TessControlShader = NULL");
2242 JM()->DumpAsm(pFunction
, "asm");
2244 JM()->mIsModuleFinalized
= true;
2251 swr_compile_gs(struct swr_context
*ctx
, swr_jit_gs_key
&key
)
2254 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2256 PFN_GS_FUNC func
= builder
.CompileGS(ctx
, key
);
2258 ctx
->gs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantGS
>(new VariantGS(builder
.gallivm
, func
))));
2263 swr_compile_tcs(struct swr_context
*ctx
, swr_jit_tcs_key
&key
)
2266 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2268 PFN_TCS_FUNC func
= builder
.CompileTCS(ctx
, key
);
2270 ctx
->tcs
->map
.insert(
2271 std::make_pair(key
, std::unique_ptr
<VariantTCS
>(new VariantTCS(builder
.gallivm
, func
))));
2277 swr_compile_tes(struct swr_context
*ctx
, swr_jit_tes_key
&key
)
2280 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2282 PFN_TES_FUNC func
= builder
.CompileTES(ctx
, key
);
2284 ctx
->tes
->map
.insert(
2285 std::make_pair(key
, std::unique_ptr
<VariantTES
>(new VariantTES(builder
.gallivm
, func
))));
2291 BuilderSWR::WriteVS(Value
*pVal
, Value
*pVsContext
, Value
*pVtxOutput
, unsigned slot
, unsigned channel
)
2293 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
2294 // interleave the simdvertex components into the dest simd16vertex
2295 // slot16offset = slot8offset * 2
2296 // comp16offset = comp8offset * 2 + alternateOffset
2298 Value
*offset
= LOAD(pVsContext
, { 0, SWR_VS_CONTEXT_AlternateOffset
});
2299 Value
*pOut
= GEP(pVtxOutput
, { C(0), C(0), C(slot
* 2), offset
} );
2300 STORE(pVal
, pOut
, {channel
* 2});
2302 Value
*pOut
= GEP(pVtxOutput
, {0, 0, slot
});
2303 STORE(pVal
, pOut
, {0, channel
});
2304 if (verbose_vs_shader
) {
2305 lp_build_printf(gallivm
, "VS: Storing on slot %d, channel %d: ", C(slot
), C(channel
));
2306 lp_build_print_value(gallivm
, "", wrap(pVal
));
2312 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2314 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
2316 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2317 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2319 memset(outputs
, 0, sizeof(outputs
));
2321 AttrBuilder attrBuilder
;
2322 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2324 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2325 PointerType::get(mInt8Ty
, 0),
2326 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
2327 FunctionType
*vsFuncType
=
2328 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
2330 // create new vertex shader function
2331 auto pFunction
= Function::Create(vsFuncType
,
2332 GlobalValue::ExternalLinkage
,
2334 JM()->mpCurrentModule
);
2335 #if LLVM_VERSION_MAJOR < 5
2336 AttributeSet attrSet
= AttributeSet::get(
2337 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2338 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2340 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2343 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2344 IRB()->SetInsertPoint(block
);
2345 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2347 auto argitr
= pFunction
->arg_begin();
2348 Value
*hPrivateData
= &*argitr
++;
2349 hPrivateData
->setName("hPrivateData");
2350 Value
*pWorkerData
= &*argitr
++;
2351 pWorkerData
->setName("pWorkerData");
2352 Value
*pVsCtx
= &*argitr
++;
2353 pVsCtx
->setName("vsCtx");
2355 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
2357 consts_ptr
->setName("vs_constants");
2358 Value
*const_sizes_ptr
=
2359 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
2360 const_sizes_ptr
->setName("num_vs_constants");
2362 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
2364 vtxInput
= BITCAST(vtxInput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2367 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2368 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
2369 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2370 if (mask
& (1 << channel
)) {
2371 inputs
[attrib
][channel
] =
2372 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
2377 struct lp_build_sampler_soa
*sampler
=
2378 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
2379 assert(sampler
!= nullptr);
2381 struct lp_bld_tgsi_system_values system_values
;
2382 memset(&system_values
, 0, sizeof(system_values
));
2383 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
2386 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID16
}));
2388 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
2392 uint32_t vectorWidth
= mVWidth16
;
2394 uint32_t vectorWidth
= mVWidth
;
2397 struct lp_build_tgsi_params params
;
2398 memset(¶ms
, 0, sizeof(params
));
2399 params
.type
= lp_type_float_vec(32, 32 * vectorWidth
);
2400 params
.consts_ptr
= wrap(consts_ptr
);
2401 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2402 params
.system_values
= &system_values
;
2403 params
.inputs
= inputs
;
2404 params
.context_ptr
= wrap(hPrivateData
);
2405 params
.sampler
= sampler
;
2406 params
.info
= &swr_vs
->info
.base
;
2408 lp_build_tgsi_soa(gallivm
,
2409 swr_vs
->pipe
.tokens
,
2413 sampler
->destroy(sampler
);
2415 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2417 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
2419 vtxOutput
= BITCAST(vtxOutput
, PointerType::get(Gen_simd16vertex(JM()), 0));
2422 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2423 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
2424 if (!outputs
[attrib
][channel
])
2430 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
) {
2431 if (channel
!= VERTEX_SGV_POINT_SIZE_COMP
)
2433 val
= LOAD(unwrap(outputs
[attrib
][0]));
2434 outSlot
= VERTEX_SGV_SLOT
;
2435 } else if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_POSITION
) {
2436 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2437 outSlot
= VERTEX_POSITION_SLOT
;
2439 val
= LOAD(unwrap(outputs
[attrib
][channel
]));
2440 outSlot
= VERTEX_ATTRIB_START_SLOT
+ attrib
;
2441 if (swr_vs
->info
.base
.output_semantic_name
[0] == TGSI_SEMANTIC_POSITION
)
2445 WriteVS(val
, pVsCtx
, vtxOutput
, outSlot
, channel
);
2449 if (ctx
->rasterizer
->clip_plane_enable
||
2450 swr_vs
->info
.base
.culldist_writemask
) {
2451 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
2454 if (swr_vs
->info
.base
.writes_clipvertex
) {
2455 cv
= locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
2456 &swr_vs
->info
.base
);
2458 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2459 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
2460 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
2466 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2467 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
2468 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
2469 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
2470 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
2472 tgsi_shader_info
*pLastFE
= &ctx
->vs
->info
.base
;
2475 pLastFE
= &ctx
->gs
->info
.base
;
2477 else if (ctx
->tes
) {
2478 pLastFE
= &ctx
->tes
->info
.base
;
2480 else if (ctx
->tcs
) {
2481 pLastFE
= &ctx
->tcs
->info
.base
;
2484 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
2485 // clip distance overrides user clip planes
2486 if ((pLastFE
->clipdist_writemask
& clip_mask
& (1 << val
)) ||
2487 ((pLastFE
->culldist_writemask
<< pLastFE
->num_written_clipdistance
) & (1 << val
))) {
2488 unsigned cv
= locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1, pLastFE
);
2489 assert(cv
< PIPE_MAX_SHADER_OUTPUTS
);
2491 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
2492 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2494 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
2495 WriteVS(unwrap(dist
), pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2500 if (!(clip_mask
& (1 << val
)))
2503 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
2504 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
2505 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
2506 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
2508 Value
*bpx
= VBROADCAST_16(px
);
2509 Value
*bpy
= VBROADCAST_16(py
);
2510 Value
*bpz
= VBROADCAST_16(pz
);
2511 Value
*bpw
= VBROADCAST_16(pw
);
2513 Value
*bpx
= VBROADCAST(px
);
2514 Value
*bpy
= VBROADCAST(py
);
2515 Value
*bpz
= VBROADCAST(pz
);
2516 Value
*bpw
= VBROADCAST(pw
);
2518 Value
*dist
= FADD(FMUL(unwrap(cx
), bpx
),
2519 FADD(FMUL(unwrap(cy
), bpy
),
2520 FADD(FMUL(unwrap(cz
), bpz
),
2521 FMUL(unwrap(cw
), bpw
))));
2524 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
);
2526 WriteVS(dist
, pVsCtx
, vtxOutput
, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4);
2532 JM()->DumpToFile(pFunction
, "vs_function1");
2533 gallivm_verify_function(gallivm
, wrap(pFunction
));
2534 gallivm_compile_module(gallivm
);
2535 JM()->DumpToFile(pFunction
, "vs_function2");
2537 // lp_debug_dump_value(func);
2539 PFN_VERTEX_FUNC pFunc
=
2540 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
2542 JM()->DumpAsm(pFunction
, "vs_function_asm");
2543 debug_printf("vert shader %p\n", pFunc
);
2544 assert(pFunc
&& "Error: VertShader = NULL");
2546 JM()->mIsModuleFinalized
= true;
2552 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
2554 if (!ctx
->vs
->pipe
.tokens
)
2558 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
2560 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
2562 ctx
->vs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantVS
>(new VariantVS(builder
.gallivm
, func
))));
2567 swr_so_adjust_attrib(unsigned in_attrib
,
2568 swr_vertex_shader
*swr_vs
)
2570 ubyte semantic_name
;
2573 attrib
= in_attrib
+ VERTEX_ATTRIB_START_SLOT
;
2576 semantic_name
= swr_vs
->info
.base
.output_semantic_name
[in_attrib
];
2577 if (semantic_name
== TGSI_SEMANTIC_POSITION
) {
2578 attrib
= VERTEX_POSITION_SLOT
;
2579 } else if (semantic_name
== TGSI_SEMANTIC_PSIZE
) {
2580 attrib
= VERTEX_SGV_SLOT
;
2581 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) {
2582 attrib
= VERTEX_SGV_SLOT
;
2584 if (swr_vs
->info
.base
.writes_position
) {
2594 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
2596 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
2597 if ((info
->output_semantic_name
[i
] == name
)
2598 && (info
->output_semantic_index
[i
] == index
)) {
2607 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
2609 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
2611 struct tgsi_shader_info
*pPrevShader
;
2613 pPrevShader
= &ctx
->gs
->info
.base
;
2615 pPrevShader
= &ctx
->tes
->info
.base
;
2617 pPrevShader
= &ctx
->vs
->info
.base
;
2619 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
2620 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
2622 memset(inputs
, 0, sizeof(inputs
));
2623 memset(outputs
, 0, sizeof(outputs
));
2625 struct lp_build_sampler_soa
*sampler
= NULL
;
2627 AttrBuilder attrBuilder
;
2628 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
2630 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
2631 PointerType::get(mInt8Ty
, 0),
2632 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
2633 FunctionType
*funcType
=
2634 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
2636 auto pFunction
= Function::Create(funcType
,
2637 GlobalValue::ExternalLinkage
,
2639 JM()->mpCurrentModule
);
2640 #if LLVM_VERSION_MAJOR < 5
2641 AttributeSet attrSet
= AttributeSet::get(
2642 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
2643 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
2645 pFunction
->addAttributes(AttributeList::FunctionIndex
, attrBuilder
);
2648 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
2649 IRB()->SetInsertPoint(block
);
2650 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
2652 auto args
= pFunction
->arg_begin();
2653 Value
*hPrivateData
= &*args
++;
2654 hPrivateData
->setName("hPrivateData");
2655 Value
*pWorkerData
= &*args
++;
2656 pWorkerData
->setName("pWorkerData");
2657 Value
*pPS
= &*args
++;
2658 pPS
->setName("psCtx");
2660 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
2661 consts_ptr
->setName("fs_constants");
2662 Value
*const_sizes_ptr
=
2663 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
2664 const_sizes_ptr
->setName("num_fs_constants");
2666 // load *pAttribs, *pPerspAttribs
2667 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
2668 Value
*pPerspAttribs
=
2669 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
2671 swr_fs
->constantMask
= 0;
2672 swr_fs
->flatConstantMask
= 0;
2673 swr_fs
->pointSpriteMask
= 0;
2675 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
2676 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
2677 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
2678 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
2684 Value
*vi
= nullptr, *vj
= nullptr;
2685 switch (interpLoc
) {
2686 case TGSI_INTERPOLATE_LOC_CENTER
:
2687 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
2688 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
2690 case TGSI_INTERPOLATE_LOC_CENTROID
:
2691 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
2692 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
2694 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2695 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
2696 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
2701 Value
*vw
= nullptr, *pAttribs
;
2702 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2703 interpMode
== TGSI_INTERPOLATE_COLOR
) {
2704 pAttribs
= pPerspAttribs
;
2705 switch (interpLoc
) {
2706 case TGSI_INTERPOLATE_LOC_CENTER
:
2707 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
2709 case TGSI_INTERPOLATE_LOC_CENTROID
:
2710 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
2712 case TGSI_INTERPOLATE_LOC_SAMPLE
:
2713 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
2717 pAttribs
= pRawAttribs
;
2723 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
2724 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
2726 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
2728 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
2729 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
2730 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
2732 inputs
[attrib
][0] = wrap(ff
);
2733 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2734 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2735 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2737 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
2738 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
2739 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
2740 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
2741 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
2743 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
2744 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
2746 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
2748 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
2750 } else if (semantic_name
== TGSI_SEMANTIC_LAYER
) { // gl_Layer
2751 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_renderTargetArrayIndex
});
2752 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vRenderTargetArrayIndex");
2753 inputs
[attrib
][0] = wrap(ff
);
2754 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2755 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2756 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2758 } else if (semantic_name
== TGSI_SEMANTIC_VIEWPORT_INDEX
) { // gl_ViewportIndex
2759 Value
*ff
= LOAD(pPS
, {0, SWR_PS_CONTEXT_viewportIndex
});
2760 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vViewportIndex");
2761 inputs
[attrib
][0] = wrap(ff
);
2762 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2763 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2764 inputs
[attrib
][3] = wrap(VIMMED1(0.0f
));
2767 unsigned linkedAttrib
=
2768 locate_linkage(semantic_name
, semantic_idx
, pPrevShader
) - 1;
2770 uint32_t extraAttribs
= 0;
2771 if (semantic_name
== TGSI_SEMANTIC_PRIMID
&& !ctx
->gs
) {
2772 /* non-gs generated primID - need to grab from swizzleMap override */
2773 linkedAttrib
= pPrevShader
->num_outputs
- 1;
2774 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2776 } else if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
2777 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
2778 /* we add an extra attrib to the backendState in swr_update_derived. */
2779 linkedAttrib
= pPrevShader
->num_outputs
+ extraAttribs
- 1;
2780 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
2782 } else if (linkedAttrib
+ 1 == 0xFFFFFFFF) {
2783 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
2784 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
2785 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
2786 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
2787 /* If we're reading in color and 2-sided lighting is enabled, we have
2790 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
2793 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2794 swr_fs
->constantMask
|= 1 << linkedAttrib
;
2795 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2796 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
2800 unsigned bcolorAttrib
= 0xFFFFFFFF;
2801 Value
*offset
= NULL
;
2802 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
2803 bcolorAttrib
= locate_linkage(
2804 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, pPrevShader
);
2805 /* Neither front nor back colors were available. Nothing to load. */
2806 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
2808 /* If there is no front color, just always use the back color. */
2809 if (linkedAttrib
+ 1 == 0xFFFFFFFF)
2810 linkedAttrib
= bcolorAttrib
;
2812 if (bcolorAttrib
!= 0xFFFFFFFF) {
2814 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2815 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
2816 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
2817 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
2820 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
2824 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
2826 offset
= MUL(back
, C(diff
));
2827 offset
->setName("offset");
2832 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2833 if (mask
& (1 << channel
)) {
2834 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
2835 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
2836 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
2839 indexA
= ADD(indexA
, offset
);
2840 indexB
= ADD(indexB
, offset
);
2841 indexC
= ADD(indexC
, offset
);
2844 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
2845 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
2846 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
2848 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
2849 inputs
[attrib
][channel
] = wrap(va
);
2851 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
2855 Value
*interp
= FMUL(va
, vi
);
2856 Value
*interp1
= FMUL(vb
, vj
);
2857 interp
= FADD(interp
, interp1
);
2858 interp
= FADD(interp
, vc
);
2859 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
2860 interpMode
== TGSI_INTERPOLATE_COLOR
)
2861 interp
= FMUL(interp
, vw
);
2862 inputs
[attrib
][channel
] = wrap(interp
);
2868 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
2869 assert(sampler
!= nullptr);
2871 struct lp_bld_tgsi_system_values system_values
;
2872 memset(&system_values
, 0, sizeof(system_values
));
2874 struct lp_build_mask_context mask
;
2875 bool uses_mask
= false;
2877 if (swr_fs
->info
.base
.uses_kill
||
2878 key
.poly_stipple_enable
) {
2879 Value
*vActiveMask
= NULL
;
2880 if (swr_fs
->info
.base
.uses_kill
) {
2881 vActiveMask
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
2883 if (key
.poly_stipple_enable
) {
2884 // first get fragment xy coords and clip to stipple bounds
2885 Value
*vXf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
});
2886 Value
*vYf
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
});
2887 Value
*vXu
= FP_TO_UI(vXf
, mSimdInt32Ty
);
2888 Value
*vYu
= FP_TO_UI(vYf
, mSimdInt32Ty
);
2890 // stipple pattern is 32x32, which means that one line of stipple
2891 // is stored in one word:
2892 // vXstipple is bit offset inside 32-bit stipple word
2893 // vYstipple is word index is stipple array
2894 Value
*vXstipple
= AND(vXu
, VIMMED1(0x1f)); // & (32-1)
2895 Value
*vYstipple
= AND(vYu
, VIMMED1(0x1f)); // & (32-1)
2897 // grab stipple pattern base address
2898 Value
*stipplePtr
= GEP(hPrivateData
, {0, swr_draw_context_polyStipple
, 0});
2899 stipplePtr
= BITCAST(stipplePtr
, mInt8PtrTy
);
2901 // peform a gather to grab stipple words for each lane
2902 Value
*vStipple
= GATHERDD(VUNDEF_I(), stipplePtr
, vYstipple
,
2903 VIMMED1(0xffffffff), 4);
2905 // create a mask with one bit corresponding to the x stipple
2906 // and AND it with the pattern, to see if we have a bit
2907 Value
*vBitMask
= LSHR(VIMMED1(0x80000000), vXstipple
);
2908 Value
*vStippleMask
= AND(vStipple
, vBitMask
);
2909 vStippleMask
= ICMP_NE(vStippleMask
, VIMMED1(0));
2910 vStippleMask
= VMASK(vStippleMask
);
2912 if (swr_fs
->info
.base
.uses_kill
) {
2913 vActiveMask
= AND(vActiveMask
, vStippleMask
);
2915 vActiveMask
= vStippleMask
;
2918 lp_build_mask_begin(
2919 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask
));
2923 struct lp_build_tgsi_params params
;
2924 memset(¶ms
, 0, sizeof(params
));
2925 params
.type
= lp_type_float_vec(32, 32 * 8);
2926 params
.mask
= uses_mask
? &mask
: NULL
;
2927 params
.consts_ptr
= wrap(consts_ptr
);
2928 params
.const_sizes_ptr
= wrap(const_sizes_ptr
);
2929 params
.system_values
= &system_values
;
2930 params
.inputs
= inputs
;
2931 params
.context_ptr
= wrap(hPrivateData
);
2932 params
.sampler
= sampler
;
2933 params
.info
= &swr_fs
->info
.base
;
2935 lp_build_tgsi_soa(gallivm
,
2936 swr_fs
->pipe
.tokens
,
2940 sampler
->destroy(sampler
);
2942 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2944 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
2946 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
2947 case TGSI_SEMANTIC_POSITION
: {
2950 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
2951 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
2954 case TGSI_SEMANTIC_COLOR
: {
2955 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
2956 if (!outputs
[attrib
][channel
])
2960 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
2961 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
2962 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
2963 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
2966 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
2972 SWR_PS_CONTEXT_shaded
,
2973 swr_fs
->info
.base
.output_semantic_index
[attrib
],
2981 "unknown output from FS %s[%d]\n",
2982 tgsi_semantic_names
[swr_fs
->info
.base
2983 .output_semantic_name
[attrib
]],
2984 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
2990 LLVMValueRef mask_result
= 0;
2992 mask_result
= lp_build_mask_end(&mask
);
2995 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
2998 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
3003 gallivm_verify_function(gallivm
, wrap(pFunction
));
3005 gallivm_compile_module(gallivm
);
3007 // after the gallivm passes, we have to lower the core's intrinsics
3008 llvm::legacy::FunctionPassManager
lowerPass(JM()->mpCurrentModule
);
3009 lowerPass
.add(createLowerX86Pass(this));
3010 lowerPass
.run(*pFunction
);
3012 PFN_PIXEL_KERNEL kernel
=
3013 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
3014 debug_printf("frag shader %p\n", kernel
);
3015 assert(kernel
&& "Error: FragShader = NULL");
3017 JM()->mIsModuleFinalized
= true;
3023 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
3025 if (!ctx
->fs
->pipe
.tokens
)
3029 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
3031 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
3033 ctx
->fs
->map
.insert(std::make_pair(key
, std::unique_ptr
<VariantFS
>(new VariantFS(builder
.gallivm
, func
))));