15347aaf9450488a7d6ec3689d82b921a9a20796
[mesa.git] / src / gallium / drivers / swr / swr_shader.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 #include <llvm/Config/llvm-config.h>
25
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
29 #undef DEBUG
30 #endif
31
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
36
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
39 #endif
40
41 #include "state.h"
42 #include "gen_state_llvm.h"
43 #include "builder.h"
44 #include "functionpasses/passes.h"
45
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53 #include "gallivm/lp_bld_const.h"
54 #include "gallivm/lp_bld_printf.h"
55 #include "gallivm/lp_bld_logic.h"
56
57 #include "swr_context.h"
58 #include "gen_surf_state_llvm.h"
59 #include "gen_swr_context_llvm.h"
60 #include "swr_resource.h"
61 #include "swr_state.h"
62 #include "swr_screen.h"
63
64
65 /////////////////////////////////////////////////////////////////////////
66
67 #include <stdio.h>
68 #include <inttypes.h>
69
70 #include "util/u_debug.h"
71 #include "util/u_memory.h"
72 #include "util/u_string.h"
73
74 #include "gallivm/lp_bld_type.h"
75
76 #if defined(DEBUG) && defined(SWR_VERBOSE_SHADER)
77 constexpr bool verbose_shader = true;
78 constexpr bool verbose_tcs_shader_in = true;
79 constexpr bool verbose_tcs_shader_out = true;
80 constexpr bool verbose_tcs_shader_loop = true;
81 constexpr bool verbose_vs_shader = true;
82 #else
83 constexpr bool verbose_shader = false;
84 constexpr bool verbose_tcs_shader_in = false;
85 constexpr bool verbose_tcs_shader_out = false;
86 constexpr bool verbose_tcs_shader_loop = false;
87 constexpr bool verbose_vs_shader = false;
88 #endif
89
90 using namespace SwrJit;
91
92 static unsigned
93 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
94
95 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
96 {
97 return !memcmp(&lhs, &rhs, sizeof(lhs));
98 }
99
100 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
101 {
102 return !memcmp(&lhs, &rhs, sizeof(lhs));
103 }
104
105 bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs)
106 {
107 return !memcmp(&lhs, &rhs, sizeof(lhs));
108 }
109
110 bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
111 {
112 return !memcmp(&lhs, &rhs, sizeof(lhs));
113 }
114
115 bool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs)
116 {
117 return !memcmp(&lhs, &rhs, sizeof(lhs));
118 }
119
120 bool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs)
121 {
122 return !memcmp(&lhs, &rhs, sizeof(lhs));
123 }
124
125
126 static void
127 swr_generate_sampler_key(const struct lp_tgsi_info &info,
128 struct swr_context *ctx,
129 enum pipe_shader_type shader_type,
130 struct swr_jit_sampler_key &key)
131 {
132 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
133
134 for (unsigned i = 0; i < key.nr_samplers; i++) {
135 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
136 lp_sampler_static_sampler_state(
137 &key.sampler[i].sampler_state,
138 ctx->samplers[shader_type][i]);
139 }
140 }
141
142 /*
143 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
144 * are dx10-style? Can't really have mixed opcodes, at least not
145 * if we want to skip the holes here (without rescanning tgsi).
146 */
147 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
148 key.nr_sampler_views =
149 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
150 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
151 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
152 const struct pipe_sampler_view *view =
153 ctx->sampler_views[shader_type][i];
154 lp_sampler_static_texture_state(
155 &key.sampler[i].texture_state, view);
156 if (view) {
157 struct swr_resource *swr_res = swr_resource(view->texture);
158 const struct util_format_description *desc =
159 util_format_description(view->format);
160 if (swr_res->has_depth && swr_res->has_stencil &&
161 !util_format_has_depth(desc))
162 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
163 }
164 }
165 }
166 } else {
167 key.nr_sampler_views = key.nr_samplers;
168 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
169 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
170 const struct pipe_sampler_view *view =
171 ctx->sampler_views[shader_type][i];
172 lp_sampler_static_texture_state(
173 &key.sampler[i].texture_state, view);
174 if (view) {
175 struct swr_resource *swr_res = swr_resource(view->texture);
176 const struct util_format_description *desc =
177 util_format_description(view->format);
178 if (swr_res->has_depth && swr_res->has_stencil &&
179 !util_format_has_depth(desc))
180 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
181 }
182 }
183 }
184 }
185 }
186
187 void
188 swr_generate_fs_key(struct swr_jit_fs_key &key,
189 struct swr_context *ctx,
190 swr_fragment_shader *swr_fs)
191 {
192 memset((void*)&key, 0, sizeof(key));
193
194 key.nr_cbufs = ctx->framebuffer.nr_cbufs;
195 key.light_twoside = ctx->rasterizer->light_twoside;
196 key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
197
198 struct tgsi_shader_info *pPrevShader;
199 if (ctx->gs)
200 pPrevShader = &ctx->gs->info.base;
201 else if (ctx->tes)
202 pPrevShader = &ctx->tes->info.base;
203 else
204 pPrevShader = &ctx->vs->info.base;
205
206 memcpy(&key.vs_output_semantic_name,
207 &pPrevShader->output_semantic_name,
208 sizeof(key.vs_output_semantic_name));
209 memcpy(&key.vs_output_semantic_idx,
210 &pPrevShader->output_semantic_index,
211 sizeof(key.vs_output_semantic_idx));
212
213 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
214
215 key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable &&
216 ctx->poly_stipple.prim_is_poly;
217 }
218
219 void
220 swr_generate_vs_key(struct swr_jit_vs_key &key,
221 struct swr_context *ctx,
222 swr_vertex_shader *swr_vs)
223 {
224 memset((void*)&key, 0, sizeof(key));
225
226 key.clip_plane_mask =
227 swr_vs->info.base.clipdist_writemask ?
228 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
229 ctx->rasterizer->clip_plane_enable;
230
231 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
232 }
233
234 void
235 swr_generate_fetch_key(struct swr_jit_fetch_key &key,
236 struct swr_vertex_element_state *velems)
237 {
238 memset((void*)&key, 0, sizeof(key));
239
240 key.fsState = velems->fsState;
241 }
242
243 void
244 swr_generate_gs_key(struct swr_jit_gs_key &key,
245 struct swr_context *ctx,
246 swr_geometry_shader *swr_gs)
247 {
248 memset((void*)&key, 0, sizeof(key));
249
250 struct tgsi_shader_info *pPrevShader = nullptr;
251
252 if (ctx->tes) {
253 pPrevShader = &ctx->tes->info.base;
254 } else {
255 pPrevShader = &ctx->vs->info.base;
256 }
257
258 memcpy(&key.vs_output_semantic_name,
259 &pPrevShader->output_semantic_name,
260 sizeof(key.vs_output_semantic_name));
261 memcpy(&key.vs_output_semantic_idx,
262 &pPrevShader->output_semantic_index,
263 sizeof(key.vs_output_semantic_idx));
264
265 swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
266 }
267
268 void
269 swr_generate_tcs_key(struct swr_jit_tcs_key &key,
270 struct swr_context *ctx,
271 swr_tess_control_shader *swr_tcs)
272 {
273 memset((void*)&key, 0, sizeof(key));
274
275 struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
276
277 memcpy(&key.vs_output_semantic_name,
278 &pPrevShader->output_semantic_name,
279 sizeof(key.vs_output_semantic_name));
280 memcpy(&key.vs_output_semantic_idx,
281 &pPrevShader->output_semantic_index,
282 sizeof(key.vs_output_semantic_idx));
283
284 key.clip_plane_mask =
285 swr_tcs->info.base.clipdist_writemask ?
286 swr_tcs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
287 ctx->rasterizer->clip_plane_enable;
288
289 swr_generate_sampler_key(swr_tcs->info, ctx, PIPE_SHADER_TESS_CTRL, key);
290 }
291
292 void
293 swr_generate_tes_key(struct swr_jit_tes_key &key,
294 struct swr_context *ctx,
295 swr_tess_evaluation_shader *swr_tes)
296 {
297 memset((void*)&key, 0, sizeof(key));
298
299 struct tgsi_shader_info *pPrevShader = nullptr;
300
301 if (ctx->tcs) {
302 pPrevShader = &ctx->tcs->info.base;
303 }
304 else {
305 pPrevShader = &ctx->vs->info.base;
306 }
307
308 SWR_ASSERT(pPrevShader != nullptr, "TES: No TCS or VS defined");
309
310 memcpy(&key.prev_output_semantic_name,
311 &pPrevShader->output_semantic_name,
312 sizeof(key.prev_output_semantic_name));
313 memcpy(&key.prev_output_semantic_idx,
314 &pPrevShader->output_semantic_index,
315 sizeof(key.prev_output_semantic_idx));
316
317 key.clip_plane_mask =
318 swr_tes->info.base.clipdist_writemask ?
319 swr_tes->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
320 ctx->rasterizer->clip_plane_enable;
321
322 swr_generate_sampler_key(swr_tes->info, ctx, PIPE_SHADER_TESS_EVAL, key);
323 }
324
325 struct BuilderSWR : public Builder {
326 BuilderSWR(JitManager *pJitMgr, const char *pName)
327 : Builder(pJitMgr)
328 {
329 pJitMgr->SetupNewModule();
330 gallivm = gallivm_create(pName, wrap(&JM()->mContext), NULL);
331 pJitMgr->mpCurrentModule = unwrap(gallivm->module);
332 }
333
334 ~BuilderSWR() {
335 gallivm_free_ir(gallivm);
336 }
337
338 void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
339 unsigned slot, unsigned channel);
340
341 struct gallivm_state *gallivm;
342 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
343 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
344 PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
345 PFN_TCS_FUNC CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key);
346 PFN_TES_FUNC CompileTES(struct swr_context *ctx, swr_jit_tes_key &key);
347
348 // GS-specific emit functions
349 LLVMValueRef
350 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
351 struct lp_build_context * bld,
352 boolean is_vindex_indirect,
353 LLVMValueRef vertex_index,
354 boolean is_aindex_indirect,
355 LLVMValueRef attrib_index,
356 LLVMValueRef swizzle_index);
357 void
358 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
359 struct lp_build_context * bld,
360 LLVMValueRef (*outputs)[4],
361 LLVMValueRef emitted_vertices_vec,
362 LLVMValueRef stream_id);
363
364 void
365 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
366 struct lp_build_context * bld,
367 LLVMValueRef total_emitted_vertices_vec_ptr,
368 LLVMValueRef verts_per_prim_vec,
369 LLVMValueRef emitted_prims_vec,
370 LLVMValueRef mask_vec);
371
372 void
373 swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
374 LLVMValueRef total_emitted_vertices_vec,
375 LLVMValueRef emitted_prims_vec, unsigned stream);
376
377 // TCS-specific emit functions
378 void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld);
379 void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld);
380
381 LLVMValueRef
382 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
383 struct lp_build_tgsi_context * bld_base,
384 boolean is_vindex_indirect,
385 LLVMValueRef vertex_index,
386 boolean is_aindex_indirect,
387 LLVMValueRef attrib_index,
388 LLVMValueRef swizzle_index);
389
390 LLVMValueRef
391 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
392 struct lp_build_tgsi_context * bld_base,
393 boolean is_vindex_indirect,
394 LLVMValueRef vertex_index,
395 boolean is_aindex_indirect,
396 LLVMValueRef attrib_index,
397 LLVMValueRef swizzle_index,
398 uint32_t name);
399
400 void
401 swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
402 struct lp_build_tgsi_context * bld_base,
403 unsigned name,
404 boolean is_vindex_indirect,
405 LLVMValueRef vertex_index,
406 boolean is_aindex_indirect,
407 LLVMValueRef attrib_index,
408 LLVMValueRef swizzle_index,
409 LLVMValueRef value,
410 LLVMValueRef mask_vec);
411
412 // Barrier implementation (available only in TCS)
413 void
414 swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
415 struct lp_build_tgsi_context *bld_base);
416
417 // TES-specific emit functions
418 LLVMValueRef
419 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
420 struct lp_build_tgsi_context * bld_base,
421 boolean is_vindex_indirect,
422 LLVMValueRef vertex_index,
423 boolean is_aindex_indirect,
424 LLVMValueRef attrib_index,
425 LLVMValueRef swizzle_index);
426
427 LLVMValueRef
428 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
429 struct lp_build_tgsi_context * bld_base,
430 boolean is_aindex_indirect,
431 LLVMValueRef attrib_index,
432 LLVMValueRef swizzle_index);
433 };
434
435 struct swr_gs_llvm_iface {
436 struct lp_build_gs_iface base;
437 struct tgsi_shader_info *info;
438
439 BuilderSWR *pBuilder;
440
441 Value *pGsCtx;
442 SWR_GS_STATE *pGsState;
443 uint32_t num_outputs;
444 uint32_t num_verts_per_prim;
445
446 Value *pVtxAttribMap;
447 };
448
449 struct swr_tcs_llvm_iface {
450 struct lp_build_tcs_iface base;
451 struct tgsi_shader_info *info;
452
453 BuilderSWR *pBuilder;
454
455 Value *pTcsCtx;
456 SWR_TS_STATE *pTsState;
457
458 uint32_t output_vertices;
459
460 LLVMValueRef loop_var;
461
462 Value *pVtxAttribMap;
463 Value *pVtxOutputAttribMap;
464 Value *pPatchOutputAttribMap;
465 };
466
467 struct swr_tes_llvm_iface {
468 struct lp_build_tes_iface base;
469 struct tgsi_shader_info *info;
470
471 BuilderSWR *pBuilder;
472
473 Value *pTesCtx;
474 SWR_TS_STATE *pTsState;
475
476 uint32_t num_outputs;
477
478 Value *pVtxAttribMap;
479 Value *pPatchAttribMap;
480 };
481
482 // trampoline functions so we can use the builder llvm construction methods
483 static LLVMValueRef
484 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
485 struct lp_build_context * bld,
486 boolean is_vindex_indirect,
487 LLVMValueRef vertex_index,
488 boolean is_aindex_indirect,
489 LLVMValueRef attrib_index,
490 LLVMValueRef swizzle_index)
491 {
492 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
493
494 return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld,
495 is_vindex_indirect,
496 vertex_index,
497 is_aindex_indirect,
498 attrib_index,
499 swizzle_index);
500 }
501
502 static void
503 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
504 struct lp_build_context * bld,
505 LLVMValueRef (*outputs)[4],
506 LLVMValueRef emitted_vertices_vec,
507 LLVMValueRef mask_vec,
508 LLVMValueRef stream_id)
509 {
510 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
511
512 iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld,
513 outputs,
514 emitted_vertices_vec,
515 stream_id);
516 }
517
518 static void
519 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
520 struct lp_build_context * bld,
521 LLVMValueRef total_emitted_vertices_vec_ptr,
522 LLVMValueRef verts_per_prim_vec,
523 LLVMValueRef emitted_prims_vec,
524 LLVMValueRef mask_vec, unsigned stream_id)
525 {
526 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
527
528 iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld,
529 total_emitted_vertices_vec_ptr,
530 verts_per_prim_vec,
531 emitted_prims_vec,
532 mask_vec);
533 }
534
535 static void
536 swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
537 LLVMValueRef total_emitted_vertices_vec,
538 LLVMValueRef emitted_prims_vec, unsigned stream)
539 {
540 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
541
542 iface->pBuilder->swr_gs_llvm_epilogue(gs_base,
543 total_emitted_vertices_vec,
544 emitted_prims_vec, stream);
545 }
546
547 static LLVMValueRef
548 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
549 struct lp_build_context * bld,
550 boolean is_vindex_indirect,
551 LLVMValueRef vertex_index,
552 boolean is_aindex_indirect,
553 LLVMValueRef attrib_index,
554 LLVMValueRef swizzle_index)
555 {
556 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
557 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
558
559 return iface->pBuilder->swr_tcs_llvm_fetch_input(tcs_iface, bld_base,
560 is_vindex_indirect,
561 vertex_index,
562 is_aindex_indirect,
563 attrib_index,
564 swizzle_index);
565 }
566
567 static LLVMValueRef
568 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
569 struct lp_build_context * bld,
570 boolean is_vindex_indirect,
571 LLVMValueRef vertex_index,
572 boolean is_aindex_indirect,
573 LLVMValueRef attrib_index,
574 LLVMValueRef swizzle_index,
575 uint32_t name)
576 {
577 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
578 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
579
580 return iface->pBuilder->swr_tcs_llvm_fetch_output(tcs_iface, bld_base,
581 is_vindex_indirect,
582 vertex_index,
583 is_aindex_indirect,
584 attrib_index,
585 swizzle_index,
586 name);
587 }
588
589
590 static void
591 swr_tcs_llvm_emit_prologue(struct lp_build_context* bld)
592 {
593 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
594 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
595 iface->pBuilder->swr_tcs_llvm_emit_prologue(bld_base);
596 }
597
598 static void
599 swr_tcs_llvm_emit_epilogue(struct lp_build_context* bld)
600 {
601 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
602 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
603 iface->pBuilder->swr_tcs_llvm_emit_epilogue(bld_base);
604 }
605
606 static
607 void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
608 struct lp_build_context * bld,
609 unsigned name,
610 boolean is_vindex_indirect,
611 LLVMValueRef vertex_index,
612 boolean is_aindex_indirect,
613 LLVMValueRef attrib_index,
614 boolean is_sindex_indirect,
615 LLVMValueRef swizzle_index,
616 LLVMValueRef value,
617 LLVMValueRef mask_vec)
618 {
619 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
620 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
621
622 iface->pBuilder->swr_tcs_llvm_store_output(tcs_iface,
623 bld_base,
624 name,
625 is_vindex_indirect,
626 vertex_index,
627 is_aindex_indirect,
628 attrib_index,
629 swizzle_index,
630 value,
631 mask_vec);
632 }
633
634
635 static
636 void swr_tcs_llvm_emit_barrier(struct lp_build_context *bld)
637 {
638 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
639 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
640
641 iface->pBuilder->swr_tcs_llvm_emit_barrier(bld_base->tcs_iface, &bld_base->bld_base);
642 }
643
644
645 static LLVMValueRef
646 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
647 struct lp_build_context * bld,
648 boolean is_vindex_indirect,
649 LLVMValueRef vertex_index,
650 boolean is_aindex_indirect,
651 LLVMValueRef attrib_index,
652 LLVMValueRef swizzle_index)
653 {
654 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
655 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
656
657 return iface->pBuilder->swr_tes_llvm_fetch_vtx_input(tes_iface, bld_base,
658 is_vindex_indirect,
659 vertex_index,
660 is_aindex_indirect,
661 attrib_index,
662 swizzle_index);
663 }
664
665 static LLVMValueRef
666 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
667 struct lp_build_context * bld,
668 boolean is_aindex_indirect,
669 LLVMValueRef attrib_index,
670 LLVMValueRef swizzle_index)
671 {
672 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
673 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
674
675 return iface->pBuilder->swr_tes_llvm_fetch_patch_input(tes_iface, bld_base,
676 is_aindex_indirect,
677 attrib_index,
678 swizzle_index);
679 }
680
681 LLVMValueRef
682 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
683 struct lp_build_context * bld,
684 boolean is_vindex_indirect,
685 LLVMValueRef vertex_index,
686 boolean is_aindex_indirect,
687 LLVMValueRef attrib_index,
688 LLVMValueRef swizzle_index)
689 {
690 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
691 Value *vert_index = unwrap(vertex_index);
692 Value *attr_index = unwrap(attrib_index);
693
694 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
695
696 if (is_vindex_indirect || is_aindex_indirect) {
697 int i;
698 Value *res = unwrap(bld->zero);
699 struct lp_type type = bld->type;
700
701 for (i = 0; i < type.length; i++) {
702 Value *vert_chan_index = vert_index;
703 Value *attr_chan_index = attr_index;
704
705 if (is_vindex_indirect) {
706 vert_chan_index = VEXTRACT(vert_index, C(i));
707 }
708 if (is_aindex_indirect) {
709 attr_chan_index = VEXTRACT(attr_index, C(i));
710 }
711
712 Value *attrib =
713 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
714
715 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
716 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
717
718 Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib);
719 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
720
721 Value *value = VEXTRACT(pInput, C(i));
722 res = VINSERT(res, value, C(i));
723 }
724
725 return wrap(res);
726 } else {
727 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
728
729 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
730 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
731
732 Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib);
733
734 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
735
736 return wrap(pInput);
737 }
738 }
739
740 // GS output stream layout
741 #define VERTEX_COUNT_SIZE 32
742 #define CONTROL_HEADER_SIZE (8*32)
743
744 void
745 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
746 struct lp_build_context * bld,
747 LLVMValueRef (*outputs)[4],
748 LLVMValueRef emitted_vertices_vec,
749 LLVMValueRef stream_id)
750 {
751 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
752
753 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
754 const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
755 const uint32_t attribSize = 4 * sizeof(float);
756 const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
757 Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize));
758
759 Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask});
760 Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, mVWidth));
761
762 Value *pStack = STACKSAVE();
763 Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking
764
765 for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
766 uint32_t attribSlot = attrib;
767 uint32_t sgvChannel = 0;
768 if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
769 attribSlot = VERTEX_SGV_SLOT;
770 sgvChannel = VERTEX_SGV_POINT_SIZE_COMP;
771 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) {
772 attribSlot = VERTEX_SGV_SLOT;
773 sgvChannel = VERTEX_SGV_RTAI_COMP;
774 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_VIEWPORT_INDEX) {
775 attribSlot = VERTEX_SGV_SLOT;
776 sgvChannel = VERTEX_SGV_VAI_COMP;
777 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
778 attribSlot = VERTEX_POSITION_SLOT;
779 } else {
780 attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
781 if (iface->info->writes_position) {
782 attribSlot--;
783 }
784 }
785
786 Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ?
787
788 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
789 Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane));
790 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
791 Value *pStreamOffset = GEP(pStream, pLaneOffset);
792 pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy);
793
794 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
795 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
796
797 for (uint32_t channel = 0; channel < 4; ++channel) {
798 Value *vData;
799
800 if (attribSlot == VERTEX_SGV_SLOT)
801 vData = LOAD(unwrap(outputs[attrib][0]));
802 else
803 vData = LOAD(unwrap(outputs[attrib][channel]));
804
805 if (attribSlot != VERTEX_SGV_SLOT ||
806 sgvChannel == channel) {
807 vData = VEXTRACT(vData, C(lane));
808 STORE(vData, pStreamOffset);
809 }
810 pStreamOffset = GEP(pStreamOffset, C(1));
811 }
812 }
813 }
814
815 /* When the output type is not points, the geometry shader may not
816 * output data to multiple streams. So early exit here.
817 */
818 if(iface->pGsState->outputTopology != TOP_POINT_LIST) {
819 STACKRESTORE(pStack);
820 return;
821 }
822
823 // Info about stream id for each vertex
824 // is coded in 2 bits (4 vert per byte "box"):
825 // ----------------- ----------------- ----
826 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
827 // ----------------- ----------------- ----
828
829 // Calculate where need to put stream id for current vert
830 // in 1 byte "box".
831 Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2));
832
833 // Calculate in which box put stream id for current vert.
834 Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2));
835
836 // Skip count header
837 Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE));
838
839 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
840 Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty);
841 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
842
843 Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane)));
844
845 // Just make sure that not overflow max - stream id = (0,1,2,3)
846 Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty);
847
848 // Shift it to correct position in byte "box"
849 vVal = SHL(vVal, pShift);
850
851 // Info about other vertices can be already stored
852 // so we need to read and add bits from current vert info.
853 Value *storedValue = LOAD(pStreamOffset);
854 vVal = OR(storedValue, vVal);
855 STORE(vVal, pStreamOffset);
856 }
857
858 STACKRESTORE(pStack);
859 }
860
861 void
862 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
863 struct lp_build_context * bld,
864 LLVMValueRef total_emitted_vertices_vec,
865 LLVMValueRef verts_per_prim_vec,
866 LLVMValueRef emitted_prims_vec,
867 LLVMValueRef mask_vec)
868 {
869 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
870
871 /* When the output type is points, the geometry shader may output data
872 * to multiple streams, and end_primitive has no effect. Info about
873 * stream id for vertices is stored into the same place in memory where
874 * end primitive info is stored so early exit in this case.
875 */
876 if (iface->pGsState->outputTopology == TOP_POINT_LIST) {
877 return;
878 }
879
880 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
881
882 Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
883 Value *vMask1 = TRUNC(vMask, getVectorType(mInt1Ty, 8));
884
885 uint32_t vertsPerPrim = iface->num_verts_per_prim;
886
887 Value *vCount =
888 ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)),
889 unwrap(verts_per_prim_vec));
890
891 vCount = unwrap(total_emitted_vertices_vec);
892
893 Value *mask = unwrap(mask_vec);
894 Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
895 mask = AND(mask, cmpMask);
896 vMask1 = TRUNC(mask, getVectorType(mInt1Ty, 8));
897
898 vCount = SUB(vCount, VIMMED1(1));
899 Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE));
900 Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
901
902 vValue = TRUNC(vValue, getVectorType(mInt8Ty, 8));
903
904 Value *pStack = STACKSAVE();
905 Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking
906
907 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
908 Value *vLaneOffset = VEXTRACT(vOffset, C(lane));
909 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
910 Value *pStreamOffset = GEP(pStream, vLaneOffset);
911
912 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
913 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
914
915 Value *vVal = LOAD(pStreamOffset);
916 vVal = OR(vVal, VEXTRACT(vValue, C(lane)));
917 STORE(vVal, pStreamOffset);
918 }
919
920 STACKRESTORE(pStack);
921 }
922
923 void
924 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
925 LLVMValueRef total_emitted_vertices_vec,
926 LLVMValueRef emitted_prims_vec, unsigned stream)
927 {
928 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
929
930 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
931
932 // Store emit count to each output stream in the first DWORD
933 for (uint32_t lane = 0; lane < mVWidth; ++lane)
934 {
935 Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
936 pStream = BITCAST(pStream, mInt32PtrTy);
937 Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane));
938 STORE(pLaneCount, pStream);
939 }
940 }
941
942 void
943 BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld)
944 {
945 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
946
947 Value* loop_var = ALLOCA(mSimdInt32Ty);
948 STORE(VBROADCAST(C(0)), loop_var);
949
950 iface->loop_var = wrap(loop_var);
951
952 lp_exec_bgnloop(&bld->exec_mask, true);
953
954 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
955 bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var))));
956
957 if (verbose_tcs_shader_loop) {
958 lp_build_print_value(gallivm, "Prologue LOOP Iteration BEGIN:", bld->system_values.invocation_id);
959 }
960
961 }
962
963 void
964 BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld)
965 {
966 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
967
968 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
969
970 STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var));
971 if (verbose_tcs_shader_loop) {
972 lp_build_print_value(gallivm, "Epilogue LOOP: ", wrap(LOAD(unwrap(iface->loop_var))));
973 }
974
975 LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))),
976 wrap(VBROADCAST(C(iface->output_vertices))));
977 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
978 lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false);
979 lp_exec_mask_cond_pop(&bld->exec_mask);
980 lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask);
981 }
982
983 LLVMValueRef
984 BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
985 struct lp_build_tgsi_context * bld_base,
986 boolean is_vindex_indirect,
987 LLVMValueRef vertex_index,
988 boolean is_aindex_indirect,
989 LLVMValueRef attrib_index,
990 LLVMValueRef swizzle_index)
991 {
992 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
993
994 Value *vert_index = unwrap(vertex_index);
995 Value *attr_index = unwrap(attrib_index);
996
997 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
998
999 if (verbose_tcs_shader_in) {
1000 lp_build_printf(gallivm, "[TCS IN][VTX] ======================================\n");
1001 lp_build_print_value(gallivm, "[TCS IN][VTX] vertex_index: ", vertex_index);
1002 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
1003 lp_build_printf(gallivm, "[TCS IN][VTX] --------------------------------------\n");
1004 }
1005
1006 Value *res = unwrap(bld_base->base.zero);
1007 if (is_vindex_indirect || is_aindex_indirect) {
1008 int i;
1009 struct lp_type type = bld_base->base.type;
1010
1011 for (i = 0; i < type.length; i++) {
1012 Value *vert_chan_index = vert_index;
1013 Value *attr_chan_index = attr_index;
1014
1015 if (is_vindex_indirect) {
1016 vert_chan_index = VEXTRACT(vert_index, C(i));
1017 }
1018 if (is_aindex_indirect) {
1019 attr_chan_index = VEXTRACT(attr_index, C(i));
1020 }
1021
1022 Value *attrib =
1023 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
1024
1025 Value *pBase = GEP(iface->pTcsCtx,
1026 { C(0), C(SWR_HS_CONTEXT_vert), vert_chan_index,
1027 C(simdvertex_attrib), attrib, unwrap(swizzle_index), C(i) });
1028
1029 Value *val = LOAD(pBase);
1030
1031 if (verbose_tcs_shader_in) {
1032 lp_build_print_value(gallivm, "[TCS IN][VTX] vert_chan_index: ", wrap(vert_chan_index));
1033 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
1034 lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index));
1035 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib));
1036 lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index);
1037 lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(val));
1038 }
1039 res = VINSERT(res, val, C(i));
1040 }
1041 } else {
1042 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
1043
1044 Value *pBase = GEP(iface->pTcsCtx,
1045 { C(0), C(SWR_HS_CONTEXT_vert), vert_index,
1046 C(simdvertex_attrib), attrib, unwrap(swizzle_index) });
1047
1048 res = LOAD(pBase);
1049
1050 if (verbose_tcs_shader_in) {
1051 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib_index: ", attrib_index);
1052 lp_build_print_value(gallivm, "[TCS IN][VTX] attr_chan_index: ", wrap(attr_index));
1053 lp_build_print_value(gallivm, "[TCS IN][VTX] attrib read from map: ", wrap(attrib));
1054 lp_build_print_value(gallivm, "[TCS IN][VTX] swizzle_index: ", swizzle_index);
1055 lp_build_print_value(gallivm, "[TCS IN][VTX] Loaded: ", wrap(res));
1056 }
1057 }
1058 if (verbose_tcs_shader_in) {
1059 lp_build_print_value(gallivm, "[TCS IN][VTX] returning: ", wrap(res));
1060 }
1061 return wrap(res);
1062 }
1063
1064 LLVMValueRef
1065 BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
1066 struct lp_build_tgsi_context * bld_base,
1067 boolean is_vindex_indirect,
1068 LLVMValueRef vertex_index,
1069 boolean is_aindex_indirect,
1070 LLVMValueRef attrib_index,
1071 LLVMValueRef swizzle_index,
1072 uint32_t name)
1073 {
1074 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
1075
1076 Value *vert_index = unwrap(vertex_index);
1077 Value *attr_index = unwrap(attrib_index);
1078
1079 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1080
1081 if (verbose_tcs_shader_in) {
1082 lp_build_print_value(gallivm, "[TCS INOUT] Vertex index: ", vertex_index);
1083 lp_build_print_value(gallivm, "[TCS INOUT] Attrib index: ", wrap(attr_index));
1084 lp_build_print_value(gallivm, "[TCS INOUT] Swizzle index: ", swizzle_index);
1085 }
1086
1087 Value* res = unwrap(bld_base->base.zero);
1088
1089 for (uint32_t lane = 0; lane < mVWidth; lane++) {
1090 Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
1091 Value* pCpOut = GEP(p1, {lane});
1092
1093 Value *vert_chan_index = vert_index;
1094 Value *attr_chan_index = attr_index;
1095
1096 if (is_vindex_indirect) {
1097 vert_chan_index = VEXTRACT(vert_index, C(lane));
1098 if (verbose_tcs_shader_in) {
1099 lp_build_print_value(gallivm, "[TCS INOUT] Extracted vertex index: ", wrap(vert_chan_index));
1100 }
1101 }
1102
1103 if (is_aindex_indirect) {
1104 attr_chan_index = VEXTRACT(attr_index, C(lane));
1105 if (verbose_tcs_shader_in) {
1106 lp_build_print_value(gallivm, "[TCS INOUT] Extracted attrib index: ", wrap(attr_chan_index));
1107 }
1108 }
1109
1110 if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
1111 Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
1112 Value* tessFactorArray = nullptr;
1113 if (name == TGSI_SEMANTIC_TESSOUTER) {
1114 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
1115 } else {
1116 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
1117 }
1118 Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
1119 res = VINSERT(res, LOAD(tessFactor), C(lane));
1120 if (verbose_tcs_shader_in) {
1121 lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] lane (patch-id): ", wrap(C(lane)));
1122 lp_build_print_value(gallivm, "[TCS INOUT][FACTOR] loaded value: ", wrap(res));
1123 }
1124 } else if (name == TGSI_SEMANTIC_PATCH) {
1125 Value* attr_index_from_map = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_chan_index}));
1126 Value* attr_value = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index_from_map, unwrap(swizzle_index)});
1127 res = VINSERT(res, LOAD(attr_value), C(lane));
1128 if (verbose_tcs_shader_in) {
1129 lp_build_print_value(gallivm, "[TCS INOUT][PATCH] attr index loaded from map: ", wrap(attr_index_from_map));
1130 lp_build_print_value(gallivm, "[TCS INOUT][PATCH] lane (patch-id): ", wrap(C(lane)));
1131 lp_build_print_value(gallivm, "[TCS INOUT][PATCH] loaded value: ", wrap(res));
1132 }
1133 } else {
1134 // Generic attribute
1135 Value *attrib =
1136 LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_chan_index}));
1137 if (verbose_tcs_shader_in) {
1138 lp_build_print_value(gallivm, "[TCS INOUT][VTX] Attrib index from map: ", wrap(attrib));
1139 }
1140 Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_chan_index,
1141 C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
1142
1143 res = VINSERT(res, LOAD(attr_chan), C(lane));
1144 if (verbose_tcs_shader_in) {
1145 lp_build_print_value(gallivm, "[TCS INOUT][VTX] loaded value: ", wrap(res));
1146 }
1147 }
1148 }
1149
1150 return wrap(res);
1151 }
1152
1153 void
1154 BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
1155 struct lp_build_tgsi_context *bld_base,
1156 unsigned name,
1157 boolean is_vindex_indirect,
1158 LLVMValueRef vertex_index,
1159 boolean is_aindex_indirect,
1160 LLVMValueRef attrib_index,
1161 LLVMValueRef swizzle_index,
1162 LLVMValueRef value,
1163 LLVMValueRef mask_vec)
1164 {
1165 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
1166 struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
1167
1168 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1169
1170 if (verbose_tcs_shader_out) {
1171 lp_build_printf(gallivm, "[TCS OUT] =============================================\n");
1172 }
1173
1174 if (verbose_tcs_shader_out) {
1175 lp_build_print_value(gallivm, "[TCS OUT] Store mask: ", bld->exec_mask.exec_mask);
1176 lp_build_print_value(gallivm, "[TCS OUT] Store value: ", value);
1177 }
1178
1179 Value *vert_index = unwrap(vertex_index);
1180 Value *attr_index = unwrap(attrib_index);
1181
1182 if (verbose_tcs_shader_out) {
1183 lp_build_print_value(gallivm, "[TCS OUT] Vertex index: ", vertex_index);
1184 lp_build_print_value(gallivm, "[TCS OUT] Attrib index: ", wrap(attr_index));
1185 lp_build_print_value(gallivm, "[TCS OUT] Swizzle index: ", swizzle_index);
1186 }
1187
1188 if (is_vindex_indirect) {
1189 vert_index = VEXTRACT(vert_index, C(0));
1190 if (verbose_tcs_shader_out) {
1191 lp_build_print_value(gallivm, "[TCS OUT] Extracted vertex index: ", vertex_index);
1192 }
1193 }
1194
1195 if (is_aindex_indirect) {
1196 attr_index = VEXTRACT(attr_index, C(0));
1197 if (verbose_tcs_shader_out) {
1198 lp_build_print_value(gallivm, "[TCS OUT] Extracted attrib index: ", wrap(attr_index));
1199 }
1200 }
1201
1202 if (verbose_tcs_shader_out) {
1203 if (bld->exec_mask.has_mask) {
1204 lp_build_print_value(gallivm, "[TCS OUT] Exec mask: ", bld->exec_mask.exec_mask);
1205 }
1206 else {
1207 lp_build_printf(gallivm, "[TCS OUT] has no mask\n");
1208 }
1209 }
1210 for (uint32_t lane = 0; lane < mVWidth; lane++) {
1211 Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
1212 Value* pCpOut = GEP(p1, {lane});
1213
1214 if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
1215 Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
1216 Value* tessFactorArray = nullptr;
1217 if (name == TGSI_SEMANTIC_TESSOUTER) {
1218 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
1219 } else {
1220 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
1221 }
1222 Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
1223 Value* valueToStore = VEXTRACT(unwrap(value), C(lane));
1224 valueToStore = BITCAST(valueToStore, mFP32Ty);
1225 if (mask_vec) {
1226 Value *originalVal = LOAD(tessFactor);
1227 Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
1228 valueToStore = SELECT(vMask, valueToStore, originalVal);
1229 }
1230 STORE(valueToStore, tessFactor);
1231 if (verbose_tcs_shader_out)
1232 {
1233 lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Mask_vec mask: ", mask_vec);
1234 lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore));
1235 }
1236 } else if (name == TGSI_SEMANTIC_PATCH) {
1237 Value* attrib = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_index}));
1238 if (verbose_tcs_shader_out) {
1239 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index));
1240 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index));
1241 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect)));
1242 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect)));
1243 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib));
1244 }
1245 Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attrib});
1246 Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
1247 if (verbose_tcs_shader_out) {
1248 lp_build_print_value(gallivm, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane)));
1249 lp_build_print_value(gallivm, "[TCS OUT][PATCH] value to store: ", value);
1250 lp_build_print_value(gallivm, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store));
1251 lp_build_print_value(gallivm, "[TCS OUT][PATCH] chan_index: ", swizzle_index);
1252 }
1253 value_to_store = BITCAST(value_to_store, mFP32Ty);
1254 if (mask_vec) {
1255 Value *originalVal = LOADV(attr, {C(0), unwrap(swizzle_index)});
1256 Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
1257 value_to_store = SELECT(vMask, value_to_store, originalVal);
1258 if (verbose_tcs_shader_out) {
1259 lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", mask_vec);
1260 lp_build_print_value(gallivm, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal));
1261 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vMask: ", wrap(vMask));
1262 lp_build_print_value(gallivm, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store));
1263 }
1264 }
1265 STOREV(value_to_store, attr, {C(0), unwrap(swizzle_index)});
1266 if (verbose_tcs_shader_out) {
1267 lp_build_print_value(gallivm, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store));
1268 }
1269 } else {
1270 Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
1271 Value* attrib = LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
1272
1273 if (verbose_tcs_shader_out) {
1274 lp_build_printf(gallivm, "[TCS OUT] Writting attribute\n");
1275 lp_build_print_value(gallivm, "[TCS OUT][VTX] invocation_id: ", bld->system_values.invocation_id);
1276 lp_build_print_value(gallivm, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index));
1277 lp_build_print_value(gallivm, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib));
1278 lp_build_print_value(gallivm, "[TCS OUT][VTX] chan_index: ", swizzle_index);
1279 lp_build_print_value(gallivm, "[TCS OUT][VTX] value: ", value);
1280 lp_build_print_value(gallivm, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store));
1281 }
1282
1283 Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp),
1284 VEXTRACT(unwrap(bld->system_values.invocation_id), C(0)),
1285 C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
1286
1287 // Mask output values if needed
1288 value_to_store = BITCAST(value_to_store, mFP32Ty);
1289 if (mask_vec) {
1290 Value *originalVal = LOAD(attr_chan);
1291 Value *vMask = TRUNC(VEXTRACT(unwrap(mask_vec), C(lane)), mInt1Ty);
1292 value_to_store = SELECT(vMask, value_to_store, originalVal);
1293 }
1294 STORE(value_to_store, attr_chan);
1295 if (verbose_tcs_shader_out) {
1296 lp_build_print_value(gallivm, "[TCS OUT][VTX] Mask_vec mask: ", mask_vec);
1297 lp_build_print_value(gallivm, "[TCS OUT][VTX] stored: ", wrap(value_to_store));
1298 }
1299 }
1300 }
1301 }
1302
1303 void
1304 BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
1305 struct lp_build_tgsi_context *bld_base)
1306 {
1307 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
1308 struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
1309
1310 if (verbose_tcs_shader_loop) {
1311 lp_build_print_value(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_var);
1312 }
1313
1314 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
1315
1316 STORE(ADD(LOAD(unwrap(iface->loop_var)), VBROADCAST(C(1))), unwrap(iface->loop_var));
1317
1318 LLVMValueRef tmp = lp_build_cmp(uint_bld, PIPE_FUNC_GEQUAL, wrap(LOAD(unwrap(iface->loop_var))),
1319 wrap(VBROADCAST(C(iface->output_vertices))));
1320
1321 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
1322 lp_exec_break(&bld->exec_mask, &bld->bld_base.pc, false);
1323 lp_exec_mask_cond_pop(&bld->exec_mask);
1324 lp_exec_endloop(bld->bld_base.base.gallivm, &bld->exec_mask);
1325
1326 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1327
1328 STORE(VBROADCAST(C(0)), unwrap(iface->loop_var));
1329 lp_exec_bgnloop(&bld->exec_mask, true);
1330
1331 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1332
1333 bld->system_values.invocation_id = wrap((LOAD(unwrap(iface->loop_var))));
1334
1335 if (verbose_tcs_shader_loop) {
1336 lp_build_print_value(gallivm, "Barrier LOOP: Iteration BEGIN: ", iface->loop_var);
1337 lp_build_print_value(gallivm, "Barrier LOOP: InvocationId: \n", bld->system_values.invocation_id);
1338 }
1339 }
1340
1341
1342 LLVMValueRef
1343 BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
1344 struct lp_build_tgsi_context * bld_base,
1345 boolean is_aindex_indirect,
1346 LLVMValueRef attrib_index,
1347 LLVMValueRef swizzle_index)
1348 {
1349 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
1350 Value *attr_index = unwrap(attrib_index);
1351 Value *res = unwrap(bld_base->base.zero);
1352
1353 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1354
1355 if (verbose_shader) {
1356 lp_build_printf(gallivm, "[TES IN][PATCH] --------------------------------------\n");
1357 }
1358
1359 if (is_aindex_indirect) {
1360 int i;
1361 struct lp_type type = bld_base->base.type;
1362
1363 for (i = 0; i < type.length; i++) {
1364 Value *attr_chan_index = attr_index;
1365
1366 if (is_aindex_indirect) {
1367 attr_chan_index = VEXTRACT(attr_index, C(i));
1368 }
1369
1370 Value *attrib =
1371 LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_chan_index}));
1372
1373 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1374 Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
1375 Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
1376 Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
1377 if (verbose_shader) {
1378 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
1379 lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index));
1380 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
1381 lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
1382 lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
1383 }
1384 res = VINSERT(res, Val, C(i));
1385 }
1386 } else {
1387 Value *attrib = LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_index}));
1388
1389 Value *pCpIn = LOAD(iface->pTesCtx, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1390 Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
1391 Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
1392 Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
1393 if (verbose_shader) {
1394 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
1395 lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index));
1396 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
1397 lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
1398 lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
1399 }
1400 res = VBROADCAST(Val);
1401 }
1402 if (verbose_shader) {
1403 lp_build_print_value(gallivm, "[TES IN][PATCH] returning: ", wrap(res));
1404 }
1405 return wrap(res);
1406 }
1407
1408
1409
1410 LLVMValueRef
1411 BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
1412 struct lp_build_tgsi_context * bld_base,
1413 boolean is_vindex_indirect,
1414 LLVMValueRef vertex_index,
1415 boolean is_aindex_indirect,
1416 LLVMValueRef attrib_index,
1417 LLVMValueRef swizzle_index)
1418 {
1419 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
1420 Value *vert_index = unwrap(vertex_index);
1421 Value *attr_index = unwrap(attrib_index);
1422
1423 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1424
1425 if (verbose_shader) {
1426 lp_build_printf(gallivm, "[TES IN][VTX] --------------------------------------\n");
1427 }
1428
1429 Value *res = unwrap(bld_base->base.zero);
1430 if (is_vindex_indirect || is_aindex_indirect) {
1431 int i;
1432 struct lp_type type = bld_base->base.type;
1433
1434 for (i = 0; i < type.length; i++) {
1435 Value *vert_chan_index = vert_index;
1436 Value *attr_chan_index = attr_index;
1437
1438 if (is_vindex_indirect) {
1439 vert_chan_index = VEXTRACT(vert_index, C(i));
1440 }
1441 if (is_aindex_indirect) {
1442 attr_chan_index = VEXTRACT(attr_index, C(i));
1443 }
1444
1445 Value *attrib =
1446 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
1447
1448 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1449 Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
1450 Value *pVertex = GEP(pCp, {(Value*)C(0), vert_chan_index});
1451 Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
1452 Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
1453 Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
1454 if (verbose_shader) {
1455 lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
1456 lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
1457 lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
1458 lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
1459 lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
1460 }
1461 res = VINSERT(res, Val, C(i));
1462 }
1463 } else {
1464 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
1465
1466 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1467 Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
1468 Value *pVertex = GEP(pCp, {(Value*)C(0), vert_index});
1469 Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
1470 Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
1471 Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
1472 if (verbose_shader) {
1473 lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
1474 lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
1475 lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
1476 lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
1477 lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
1478 }
1479 res = VBROADCAST(Val);
1480 }
1481 if (verbose_shader) {
1482 lp_build_print_value(gallivm, "[TES IN][VTX] returning: ", wrap(res));
1483 }
1484 return wrap(res);
1485 }
1486
1487
1488
1489
1490 PFN_GS_FUNC
1491 BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
1492 {
1493 SWR_GS_STATE *pGS = &ctx->gs->gsState;
1494 struct tgsi_shader_info *info = &ctx->gs->info.base;
1495
1496 memset(pGS, 0, sizeof(*pGS));
1497
1498 pGS->gsEnable = true;
1499
1500 pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
1501 pGS->outputTopology =
1502 swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM], 0);
1503
1504 /* It's +1 because emit_vertex in swr is always called exactly one time more
1505 * than max_vertices passed in Geometry Shader. We need to allocate more memory
1506 * to avoid crash/memory overwritten.
1507 */
1508 pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1;
1509 pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
1510
1511 // If point primitive then assume to use multiple streams
1512 if(pGS->outputTopology == TOP_POINT_LIST) {
1513 pGS->isSingleStream = false;
1514 } else {
1515 pGS->isSingleStream = true;
1516 pGS->singleStreamID = 0;
1517 }
1518
1519 pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
1520 pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
1521 pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
1522 pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
1523 pGS->controlDataOffset = VERTEX_COUNT_SIZE;
1524 pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
1525
1526 pGS->allocationSize =
1527 VERTEX_COUNT_SIZE + // vertex count
1528 CONTROL_HEADER_SIZE + // control header
1529 (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
1530 pGS->maxNumVerts; // num verts
1531
1532 struct swr_geometry_shader *gs = ctx->gs;
1533
1534 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1535 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1536
1537 memset(outputs, 0, sizeof(outputs));
1538
1539 AttrBuilder attrBuilder;
1540 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1541
1542 std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
1543 PointerType::get(mInt8Ty, 0),
1544 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
1545 FunctionType *vsFuncType =
1546 FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
1547
1548 // create new vertex shader function
1549 auto pFunction = Function::Create(vsFuncType,
1550 GlobalValue::ExternalLinkage,
1551 "GS",
1552 JM()->mpCurrentModule);
1553 #if LLVM_VERSION_MAJOR < 5
1554 AttributeSet attrSet = AttributeSet::get(
1555 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
1556 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
1557 #else
1558 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
1559 #endif
1560
1561 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
1562 IRB()->SetInsertPoint(block);
1563 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
1564
1565 auto argitr = pFunction->arg_begin();
1566 Value *hPrivateData = &*argitr++;
1567 hPrivateData->setName("hPrivateData");
1568 Value *pWorkerData = &*argitr++;
1569 pWorkerData->setName("pWorkerData");
1570 Value *pGsCtx = &*argitr++;
1571 pGsCtx->setName("gsCtx");
1572
1573 Value *consts_ptr =
1574 GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
1575 consts_ptr->setName("gs_constants");
1576 Value *const_sizes_ptr =
1577 GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
1578 const_sizes_ptr->setName("num_gs_constants");
1579
1580 struct lp_build_sampler_soa *sampler =
1581 swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
1582 assert(sampler != nullptr);
1583
1584 struct lp_bld_tgsi_system_values system_values;
1585 memset(&system_values, 0, sizeof(system_values));
1586 system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
1587 system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
1588
1589 std::vector<Constant*> mapConstants;
1590 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
1591 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
1592 ubyte semantic_name = info->input_semantic_name[slot];
1593 ubyte semantic_idx = info->input_semantic_index[slot];
1594
1595 unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
1596 assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS);
1597
1598 vs_slot += VERTEX_ATTRIB_START_SLOT;
1599
1600 if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
1601 vs_slot--;
1602
1603 if (semantic_name == TGSI_SEMANTIC_POSITION)
1604 vs_slot = VERTEX_POSITION_SLOT;
1605
1606 STORE(C(vs_slot), vtxAttribMap, {0, slot});
1607 mapConstants.push_back(C(vs_slot));
1608 }
1609
1610 struct lp_build_mask_context mask;
1611 Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
1612 lp_build_mask_begin(&mask, gallivm,
1613 lp_type_float_vec(32, 32 * 8), wrap(mask_val));
1614
1615 // zero out cut buffer so we can load/modify/store bits
1616 for (uint32_t lane = 0; lane < mVWidth; ++lane)
1617 {
1618 Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
1619 #if LLVM_VERSION_MAJOR >= 10
1620 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH));
1621 #else
1622 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
1623 #endif
1624 }
1625
1626 struct swr_gs_llvm_iface gs_iface;
1627 gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
1628 gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
1629 gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
1630 gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
1631 gs_iface.pBuilder = this;
1632 gs_iface.pGsCtx = pGsCtx;
1633 gs_iface.pGsState = pGS;
1634 gs_iface.num_outputs = gs->info.base.num_outputs;
1635 gs_iface.num_verts_per_prim =
1636 u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
1637 gs_iface.info = info;
1638 gs_iface.pVtxAttribMap = vtxAttribMap;
1639
1640 struct lp_build_tgsi_params params;
1641 memset(&params, 0, sizeof(params));
1642 params.type = lp_type_float_vec(32, 32 * 8);
1643 params.mask = & mask;
1644 params.consts_ptr = wrap(consts_ptr);
1645 params.const_sizes_ptr = wrap(const_sizes_ptr);
1646 params.system_values = &system_values;
1647 params.inputs = inputs;
1648 params.context_ptr = wrap(hPrivateData);
1649 params.sampler = sampler;
1650 params.info = &gs->info.base;
1651 params.gs_iface = &gs_iface.base;
1652
1653 lp_build_tgsi_soa(gallivm,
1654 gs->pipe.tokens,
1655 &params,
1656 outputs);
1657
1658 lp_build_mask_end(&mask);
1659
1660 sampler->destroy(sampler);
1661
1662 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1663
1664 RET_VOID();
1665
1666 gallivm_verify_function(gallivm, wrap(pFunction));
1667 gallivm_compile_module(gallivm);
1668
1669 PFN_GS_FUNC pFunc =
1670 (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
1671
1672 debug_printf("geom shader %p\n", pFunc);
1673 assert(pFunc && "Error: GeomShader = NULL");
1674
1675 JM()->mIsModuleFinalized = true;
1676
1677 return pFunc;
1678 }
1679
1680 PFN_TES_FUNC
1681 BuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key)
1682 {
1683 SWR_TS_STATE *pTS = &ctx->tsState;
1684 struct tgsi_shader_info *info = &ctx->tes->info.base;
1685
1686 // tessellation is enabled if TES is present
1687 // clear tessellation state here then
1688 memset(pTS, 0, sizeof(*pTS));
1689
1690 pTS->tsEnable = true;
1691
1692 unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
1693 unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
1694 bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
1695 bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
1696 SWR_TS_DOMAIN type = SWR_TS_ISOLINE;
1697 SWR_TS_PARTITIONING partitioning = SWR_TS_EVEN_FRACTIONAL;
1698 SWR_TS_OUTPUT_TOPOLOGY topology = SWR_TS_OUTPUT_POINT;
1699 PRIMITIVE_TOPOLOGY postDSTopology = TOP_POINT_LIST;
1700
1701 // TESS_TODO: move this to helper functions to improve readability
1702 switch (tes_prim_mode) {
1703 case PIPE_PRIM_LINES:
1704 type = SWR_TS_ISOLINE;
1705 postDSTopology = TOP_LINE_LIST;
1706 break;
1707 case PIPE_PRIM_TRIANGLES:
1708 type = SWR_TS_TRI;
1709 postDSTopology = TOP_TRIANGLE_LIST;
1710 break;
1711 case PIPE_PRIM_QUADS:
1712 type = SWR_TS_QUAD;
1713 // See OpenGL spec - quads are tessellated into triangles
1714 postDSTopology = TOP_TRIANGLE_LIST;
1715 break;
1716 default:
1717 assert(0);
1718 }
1719
1720 switch (tes_spacing) {
1721 case PIPE_TESS_SPACING_FRACTIONAL_ODD:
1722 partitioning = SWR_TS_ODD_FRACTIONAL;
1723 break;
1724 case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
1725 partitioning = SWR_TS_EVEN_FRACTIONAL;
1726 break;
1727 case PIPE_TESS_SPACING_EQUAL:
1728 partitioning = SWR_TS_INTEGER;
1729 break;
1730 default:
1731 assert(0);
1732 }
1733
1734 if (tes_point_mode) {
1735 topology = SWR_TS_OUTPUT_POINT;
1736 postDSTopology = TOP_POINT_LIST;
1737 }
1738 else if (tes_prim_mode == PIPE_PRIM_LINES) {
1739 topology = SWR_TS_OUTPUT_LINE;
1740 }
1741 else if (tes_vertex_order_cw) {
1742 topology = SWR_TS_OUTPUT_TRI_CW;
1743 }
1744 else {
1745 topology = SWR_TS_OUTPUT_TRI_CCW;
1746 }
1747
1748 pTS->domain = type;
1749 pTS->tsOutputTopology = topology;
1750 pTS->partitioning = partitioning;
1751 pTS->numDsOutputAttribs = info->num_outputs;
1752 pTS->postDSTopology = postDSTopology;
1753
1754 pTS->dsAllocationSize = SWR_VTX_NUM_SLOTS * MAX_NUM_VERTS_PER_PRIM;
1755 pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
1756 pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
1757 pTS->dsOutVtxAttribOffset = VERTEX_ATTRIB_START_SLOT;
1758
1759 struct swr_tess_evaluation_shader *tes = ctx->tes;
1760
1761 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1762 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1763
1764 memset(outputs, 0, sizeof(outputs));
1765
1766 AttrBuilder attrBuilder;
1767 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1768
1769 std::vector<Type *> tesArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
1770 PointerType::get(mInt8Ty, 0),
1771 PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
1772 FunctionType *tesFuncType =
1773 FunctionType::get(Type::getVoidTy(JM()->mContext), tesArgs, false);
1774
1775 // create new vertex shader function
1776 auto pFunction = Function::Create(tesFuncType,
1777 GlobalValue::ExternalLinkage,
1778 "TES",
1779 JM()->mpCurrentModule);
1780
1781 #if LLVM_VERSION_MAJOR < 5
1782 AttributeSet attrSet = AttributeSet::get(
1783 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
1784 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
1785 #else
1786 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
1787 #endif
1788
1789 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
1790 IRB()->SetInsertPoint(block);
1791 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
1792
1793 auto argitr = pFunction->arg_begin();
1794 Value *hPrivateData = &*argitr++;
1795 hPrivateData->setName("hPrivateData");
1796 Value *pWorkerData = &*argitr++;
1797 pWorkerData->setName("pWorkerData");
1798 Value *pTesCtx = &*argitr++;
1799 pTesCtx->setName("tesCtx");
1800
1801 Value *consts_ptr =
1802 GEP(hPrivateData, {C(0), C(swr_draw_context_constantTES)});
1803 consts_ptr->setName("tes_constants");
1804 Value *const_sizes_ptr =
1805 GEP(hPrivateData, {0, swr_draw_context_num_constantsTES});
1806 const_sizes_ptr->setName("num_tes_constants");
1807
1808 struct lp_build_sampler_soa *sampler =
1809 swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_EVAL);
1810 assert(sampler != nullptr);
1811
1812 struct lp_bld_tgsi_system_values system_values;
1813 memset(&system_values, 0, sizeof(system_values));
1814
1815 // Load and calculate system values
1816 // Tessellation coordinates (gl_TessCoord)
1817 Value *vecOffset = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}, "vecOffset");
1818 Value *vecStride = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorStride}, "vecStride");
1819 Value *vecIndex = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset});
1820
1821 Value* tess_coord = ALLOCA(ArrayType::get(mSimdFP32Ty, 3));
1822
1823 Value *tessCoordU = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainU}), {vecIndex}, "tessCoordU");
1824 STORE(tessCoordU, tess_coord, {0, 0});
1825 Value *tessCoordV = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainV}), {vecIndex}, "tessCoordV");
1826 STORE(tessCoordV, tess_coord, {0, 1});
1827 Value *tessCoordW = FSUB(FSUB(VIMMED1(1.0f), tessCoordU), tessCoordV, "tessCoordW");
1828 STORE(tessCoordW, tess_coord, {0, 2});
1829 system_values.tess_coord = wrap(tess_coord);
1830
1831 // Primitive ID
1832 system_values.prim_id = wrap(VBROADCAST(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_PrimitiveID}), "PrimitiveID"));
1833
1834 // Tessellation factors
1835 Value* pPatch = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pCpIn});
1836 Value* pTessFactors = GEP(pPatch, {C(0), C(ScalarPatch_tessFactors)});
1837
1838 assert(SWR_NUM_OUTER_TESS_FACTORS == 4);
1839 Value* sys_value_outer_factors = UndefValue::get(getVectorType(mFP32Ty, 4));
1840 for (unsigned i = 0; i < SWR_NUM_OUTER_TESS_FACTORS; i++) {
1841 Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors, i});
1842 sys_value_outer_factors = VINSERT(sys_value_outer_factors, v, i, "gl_TessLevelOuter");
1843 }
1844 system_values.tess_outer = wrap(sys_value_outer_factors);
1845
1846 assert(SWR_NUM_INNER_TESS_FACTORS == 2);
1847 Value* sys_value_inner_factors = UndefValue::get(getVectorType(mFP32Ty, 4));
1848 for (unsigned i = 0; i < SWR_NUM_INNER_TESS_FACTORS; i++) {
1849 Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors, i});
1850 sys_value_inner_factors = VINSERT(sys_value_inner_factors, v, i, "gl_TessLevelInner");
1851 }
1852 system_values.tess_inner = wrap(sys_value_inner_factors);
1853
1854 if (verbose_shader)
1855 {
1856 lp_build_print_value(gallivm, "tess_coord = ", system_values.tess_coord);
1857 }
1858
1859 struct tgsi_shader_info *pPrevShader = nullptr;
1860
1861 if (ctx->tcs) {
1862 pPrevShader = &ctx->tcs->info.base;
1863 }
1864 else {
1865 pPrevShader = &ctx->vs->info.base;
1866 }
1867
1868 // Figure out how many per-patch attributes we have
1869 unsigned perPatchAttrs = 0;
1870 unsigned genericAttrs = 0;
1871 unsigned tessLevelAttrs = 0;
1872 unsigned sgvAttrs = 0;
1873 for (unsigned slot = 0; slot < pPrevShader->num_outputs; slot++) {
1874 switch (pPrevShader->output_semantic_name[slot]) {
1875 case TGSI_SEMANTIC_PATCH:
1876 perPatchAttrs++;
1877 break;
1878 case TGSI_SEMANTIC_GENERIC:
1879 genericAttrs++;
1880 break;
1881 case TGSI_SEMANTIC_TESSINNER:
1882 case TGSI_SEMANTIC_TESSOUTER:
1883 tessLevelAttrs++;
1884 break;
1885 case TGSI_SEMANTIC_POSITION:
1886 case TGSI_SEMANTIC_CLIPDIST:
1887 case TGSI_SEMANTIC_PSIZE:
1888 sgvAttrs++;
1889 break;
1890 default:
1891 assert(!"Unknown semantic input in TES");
1892 }
1893 }
1894
1895 std::vector<Constant *> mapConstants;
1896 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
1897 Value *patchAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
1898 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
1899 ubyte semantic_name = info->input_semantic_name[slot];
1900 ubyte semantic_idx = info->input_semantic_index[slot];
1901
1902 // Where in TCS output is my attribute?
1903 // TESS_TODO: revisit after implement pass-through TCS
1904 unsigned tcs_slot = locate_linkage(semantic_name, semantic_idx, pPrevShader);
1905 assert(tcs_slot < PIPE_MAX_SHADER_OUTPUTS);
1906
1907 // Skip tessellation levels - these go to the tessellator, not TES
1908 switch (semantic_name) {
1909 case TGSI_SEMANTIC_GENERIC:
1910 tcs_slot = tcs_slot + VERTEX_ATTRIB_START_SLOT - sgvAttrs - tessLevelAttrs;
1911 break;
1912 case TGSI_SEMANTIC_PATCH:
1913 tcs_slot = semantic_idx;
1914 break;
1915 case TGSI_SEMANTIC_POSITION:
1916 tcs_slot = VERTEX_POSITION_SLOT;
1917 break;
1918 case TGSI_SEMANTIC_CLIPDIST:
1919 case TGSI_SEMANTIC_PSIZE:
1920 break;
1921 default:
1922 assert(!"Unexpected semantic found while builiding TES input map");
1923 }
1924 if (semantic_name == TGSI_SEMANTIC_PATCH) {
1925 STORE(C(tcs_slot), patchAttribMap, {0, slot});
1926 } else {
1927 STORE(C(tcs_slot), vtxAttribMap, {0, slot});
1928 }
1929 mapConstants.push_back(C(tcs_slot));
1930 }
1931
1932 // Build execution mask
1933 struct lp_build_mask_context mask;
1934 Value *mask_val = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_mask}, "tesMask");
1935
1936 if (verbose_shader)
1937 lp_build_print_value(gallivm, "TES execution mask: ", wrap(mask_val));
1938
1939 lp_build_mask_begin(&mask, gallivm,
1940 lp_type_float_vec(32, 32 * 8), wrap(mask_val));
1941
1942 struct swr_tes_llvm_iface tes_iface;
1943
1944 tes_iface.base.fetch_vertex_input = ::swr_tes_llvm_fetch_vtx_input;
1945 tes_iface.base.fetch_patch_input = ::swr_tes_llvm_fetch_patch_input;
1946
1947 tes_iface.pBuilder = this;
1948 tes_iface.pTesCtx = pTesCtx;
1949 tes_iface.pTsState = pTS;
1950 tes_iface.num_outputs = tes->info.base.num_outputs;
1951 tes_iface.info = info;
1952 tes_iface.pVtxAttribMap = vtxAttribMap;
1953 tes_iface.pPatchAttribMap = patchAttribMap;
1954
1955 struct lp_build_tgsi_params params;
1956 memset(&params, 0, sizeof(params));
1957 params.type = lp_type_float_vec(32, 32 * 8);
1958 params.mask = & mask;
1959 params.consts_ptr = wrap(consts_ptr);
1960 params.const_sizes_ptr = wrap(const_sizes_ptr);
1961 params.system_values = &system_values;
1962 params.inputs = inputs;
1963 params.context_ptr = wrap(hPrivateData);
1964 params.sampler = sampler;
1965 params.info = &tes->info.base;
1966 params.tes_iface = &tes_iface.base;
1967
1968 // Build LLVM IR
1969 lp_build_tgsi_soa(gallivm,
1970 tes->pipe.tokens,
1971 &params,
1972 outputs);
1973
1974 lp_build_mask_end(&mask);
1975
1976 sampler->destroy(sampler);
1977
1978 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1979
1980 // Write output attributes
1981 Value *dclOut = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pOutputData}, "dclOut");
1982
1983 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
1984 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1985 if (!outputs[attrib][channel])
1986 continue;
1987
1988 Value *val = LOAD(unwrap(outputs[attrib][channel]));;
1989 Value *attribOffset =
1990 LOAD(pTesCtx, {0, SWR_DS_CONTEXT_outVertexAttribOffset});
1991
1992 // Assume we write possition
1993 Value* outputSlot = C(VERTEX_POSITION_SLOT);
1994 if (tes->info.base.output_semantic_name[attrib] != TGSI_SEMANTIC_POSITION) {
1995 // No, it's a generic attribute, not a position - let's calculate output slot
1996 uint32_t outSlot = attrib;
1997 if (tes->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
1998 // this shader will write position, so in shader's term
1999 // output starts at attrib 1, but we will handle that separately,
2000 // so let's fix the outSlot
2001 outSlot--;
2002 }
2003 outputSlot = ADD(attribOffset, C(outSlot));
2004 }
2005
2006 Value *attribVecIndex =
2007 ADD(MUL(vecStride, MUL(outputSlot, C(4))), vecOffset);
2008
2009 uint32_t outputComponent = 0;
2010 uint32_t curComp = outputComponent + channel;
2011 auto outValIndex = ADD(attribVecIndex, MUL(vecStride, C(curComp)));
2012 STOREV(val, dclOut, {outValIndex});
2013
2014 if (verbose_shader) {
2015 lp_build_printf(gallivm,
2016 "TES output [%d][%d]",
2017 C(attrib),
2018 C(channel));
2019 lp_build_print_value(gallivm, " = ", wrap(val));
2020 }
2021 }
2022 }
2023
2024 RET_VOID();
2025
2026 JM()->DumpToFile(pFunction, "src");
2027 gallivm_verify_function(gallivm, wrap(pFunction));
2028
2029 gallivm_compile_module(gallivm);
2030 JM()->DumpToFile(pFunction, "optimized");
2031
2032 PFN_TES_FUNC pFunc =
2033 (PFN_TES_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
2034
2035 debug_printf("tess evaluation shader %p\n", pFunc);
2036 assert(pFunc && "Error: TessEvaluationShader = NULL");
2037
2038 JM()->DumpAsm(pFunction, "asm");
2039
2040 JM()->mIsModuleFinalized = true;
2041
2042 return pFunc;
2043 }
2044
2045 PFN_TCS_FUNC
2046 BuilderSWR::CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key)
2047 {
2048 SWR_TS_STATE *pTS = &ctx->tsState;
2049 struct tgsi_shader_info *info = &ctx->tcs->info.base;
2050
2051 pTS->numHsInputAttribs = info->num_inputs;
2052 pTS->numHsOutputAttribs = info->num_outputs;
2053
2054 pTS->hsAllocationSize = sizeof(ScalarPatch);
2055
2056 pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
2057 pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
2058
2059 struct swr_tess_control_shader *tcs = ctx->tcs;
2060
2061 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
2062 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2063
2064 memset(outputs, 0, sizeof(outputs));
2065
2066 AttrBuilder attrBuilder;
2067 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
2068
2069 std::vector<Type *> tcsArgs{
2070 PointerType::get(Gen_swr_draw_context(JM()), 0),
2071 PointerType::get(mInt8Ty, 0),
2072 PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
2073 FunctionType *tcsFuncType =
2074 FunctionType::get(Type::getVoidTy(JM()->mContext), tcsArgs, false);
2075
2076 // create new vertex shader function
2077 auto pFunction = Function::Create(tcsFuncType,
2078 GlobalValue::ExternalLinkage,
2079 "TCS",
2080 JM()->mpCurrentModule);
2081
2082 #if LLVM_VERSION_MAJOR < 5
2083 AttributeSet attrSet = AttributeSet::get(
2084 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
2085 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
2086 #else
2087 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
2088 #endif
2089
2090 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
2091 IRB()->SetInsertPoint(block);
2092 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
2093
2094 auto argitr = pFunction->arg_begin();
2095 Value *hPrivateData = &*argitr++;
2096 hPrivateData->setName("hPrivateData");
2097 Value *pWorkerData = &*argitr++;
2098 pWorkerData->setName("pWorkerData");
2099 Value *pTcsCtx = &*argitr++;
2100 pTcsCtx->setName("tcsCtx");
2101
2102 Value *consts_ptr =
2103 GEP(hPrivateData, {C(0), C(swr_draw_context_constantTCS)});
2104 consts_ptr->setName("tcs_constants");
2105 Value *const_sizes_ptr =
2106 GEP(hPrivateData, {0, swr_draw_context_num_constantsTCS});
2107 const_sizes_ptr->setName("num_tcs_constants");
2108
2109 struct lp_build_sampler_soa *sampler =
2110 swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_CTRL);
2111 assert(sampler != nullptr);
2112
2113 struct lp_bld_tgsi_system_values system_values;
2114 memset(&system_values, 0, sizeof(system_values));
2115
2116 system_values.prim_id =
2117 wrap(LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_PrimitiveID}));
2118
2119 system_values.invocation_id = wrap(VBROADCAST(C(0)));
2120 system_values.vertices_in = wrap(C(tcs->vertices_per_patch));
2121
2122 if (verbose_shader) {
2123 lp_build_print_value(gallivm, "TCS::prim_id = ", system_values.prim_id);
2124 lp_build_print_value(gallivm, "TCS::invocation_id = ", system_values.invocation_id);
2125 lp_build_print_value(gallivm, "TCS::vertices_in = ", system_values.vertices_in);
2126 }
2127
2128 std::vector<Constant *> mapConstants;
2129 Value *vtxAttribMap =
2130 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
2131
2132 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
2133 ubyte semantic_name = info->input_semantic_name[slot];
2134 ubyte semantic_idx = info->input_semantic_index[slot];
2135
2136 unsigned vs_slot =
2137 locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
2138 assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS);
2139
2140 vs_slot += VERTEX_ATTRIB_START_SLOT;
2141
2142 if (ctx->vs->info.base.output_semantic_name[0]
2143 == TGSI_SEMANTIC_POSITION)
2144 vs_slot--;
2145
2146 if (semantic_name == TGSI_SEMANTIC_POSITION)
2147 vs_slot = VERTEX_POSITION_SLOT;
2148
2149 STORE(C(vs_slot), vtxAttribMap, {0, slot});
2150 mapConstants.push_back(C(vs_slot));
2151 }
2152
2153 // Prepare map of output attributes. Needed when shader instance wants
2154 // to read own output or output of other instance, which is allowed in TCS
2155 Value *vtxOutputAttribMap =
2156 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
2157 // Map for per-patch attributes
2158 Value *patchOutputAttribMap =
2159 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
2160 for (unsigned slot = 0; slot < info->num_outputs; slot++) {
2161 ubyte name = info->output_semantic_name[slot];
2162 int32_t idx = info->output_semantic_index[slot];
2163 if (name == TGSI_SEMANTIC_PATCH) {
2164 STORE(C(idx), patchOutputAttribMap, {0, slot});
2165 } else {
2166 int32_t target_slot = slot;
2167 if (name == TGSI_SEMANTIC_GENERIC) {
2168 target_slot += VERTEX_ATTRIB_START_SLOT;
2169 }
2170 // Now normalize target slot
2171 for (ubyte as = 0; as < slot; as++) {
2172 ubyte name = info->output_semantic_name[as];
2173 switch (name) {
2174 case TGSI_SEMANTIC_TESSOUTER:
2175 case TGSI_SEMANTIC_TESSINNER:
2176 case TGSI_SEMANTIC_PATCH:
2177 case TGSI_SEMANTIC_POSITION:
2178 target_slot--;
2179 }
2180 }
2181 if (name == TGSI_SEMANTIC_POSITION) {
2182 target_slot = VERTEX_POSITION_SLOT;
2183 }
2184 STORE(C(target_slot), vtxOutputAttribMap, {0, slot});
2185 mapConstants.push_back(C(target_slot));
2186 }
2187 }
2188
2189 struct lp_build_mask_context mask;
2190 Value *mask_val = LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_mask}, "tcsMask");
2191 lp_build_mask_begin(
2192 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
2193
2194 struct swr_tcs_llvm_iface tcs_iface;
2195
2196 tcs_iface.base.emit_store_output = ::swr_tcs_llvm_store_output;
2197 tcs_iface.base.emit_fetch_input = ::swr_tcs_llvm_fetch_input;
2198 tcs_iface.base.emit_fetch_output = ::swr_tcs_llvm_fetch_output;
2199 tcs_iface.base.emit_barrier = ::swr_tcs_llvm_emit_barrier;
2200 tcs_iface.base.emit_prologue = ::swr_tcs_llvm_emit_prologue;
2201 tcs_iface.base.emit_epilogue = ::swr_tcs_llvm_emit_epilogue;
2202
2203 tcs_iface.pBuilder = this;
2204 tcs_iface.pTcsCtx = pTcsCtx;
2205 tcs_iface.pTsState = pTS;
2206 tcs_iface.output_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
2207 tcs_iface.info = info;
2208 tcs_iface.pVtxAttribMap = vtxAttribMap;
2209 tcs_iface.pVtxOutputAttribMap = vtxOutputAttribMap;
2210 tcs_iface.pPatchOutputAttribMap = patchOutputAttribMap;
2211
2212 struct lp_build_tgsi_params params;
2213 memset(&params, 0, sizeof(params));
2214 params.type = lp_type_float_vec(32, 32 * 8);
2215 params.mask = &mask;
2216 params.consts_ptr = wrap(consts_ptr);
2217 params.const_sizes_ptr = wrap(const_sizes_ptr);
2218 params.system_values = &system_values;
2219 params.inputs = inputs;
2220 params.context_ptr = wrap(hPrivateData);
2221 params.sampler = sampler;
2222 params.info = &tcs->info.base;
2223 params.tcs_iface = &tcs_iface.base;
2224
2225 lp_build_tgsi_soa(gallivm, tcs->pipe.tokens, &params, outputs);
2226
2227 lp_build_mask_end(&mask);
2228
2229 sampler->destroy(sampler);
2230
2231 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2232 RET_VOID();
2233
2234 JM()->DumpToFile(pFunction, "src");
2235 gallivm_verify_function(gallivm, wrap(pFunction));
2236 gallivm_compile_module(gallivm);
2237 JM()->DumpToFile(pFunction, "optimized");
2238
2239 PFN_TCS_FUNC pFunc =
2240 (PFN_TCS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
2241
2242 debug_printf("tess control shader %p\n", pFunc);
2243 assert(pFunc && "Error: TessControlShader = NULL");
2244 JM()->DumpAsm(pFunction, "asm");
2245
2246 JM()->mIsModuleFinalized = true;
2247
2248 return pFunc;
2249 }
2250
2251
2252 PFN_GS_FUNC
2253 swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
2254 {
2255 BuilderSWR builder(
2256 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2257 "GS");
2258 PFN_GS_FUNC func = builder.CompileGS(ctx, key);
2259
2260 ctx->gs->map.insert(std::make_pair(key, std::unique_ptr<VariantGS>(new VariantGS(builder.gallivm, func))));
2261 return func;
2262 }
2263
2264 PFN_TCS_FUNC
2265 swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key)
2266 {
2267 BuilderSWR builder(
2268 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2269 "TCS");
2270 PFN_TCS_FUNC func = builder.CompileTCS(ctx, key);
2271
2272 ctx->tcs->map.insert(
2273 std::make_pair(key, std::unique_ptr<VariantTCS>(new VariantTCS(builder.gallivm, func))));
2274
2275 return func;
2276 }
2277
2278 PFN_TES_FUNC
2279 swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key)
2280 {
2281 BuilderSWR builder(
2282 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2283 "TES");
2284 PFN_TES_FUNC func = builder.CompileTES(ctx, key);
2285
2286 ctx->tes->map.insert(
2287 std::make_pair(key, std::unique_ptr<VariantTES>(new VariantTES(builder.gallivm, func))));
2288
2289 return func;
2290 }
2291
2292 void
2293 BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
2294 {
2295 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
2296 // interleave the simdvertex components into the dest simd16vertex
2297 // slot16offset = slot8offset * 2
2298 // comp16offset = comp8offset * 2 + alternateOffset
2299
2300 Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
2301 Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
2302 STORE(pVal, pOut, {channel * 2});
2303 #else
2304 Value *pOut = GEP(pVtxOutput, {0, 0, slot});
2305 STORE(pVal, pOut, {0, channel});
2306 if (verbose_vs_shader) {
2307 lp_build_printf(gallivm, "VS: Storing on slot %d, channel %d: ", C(slot), C(channel));
2308 lp_build_print_value(gallivm, "", wrap(pVal));
2309 }
2310 #endif
2311 }
2312
2313 PFN_VERTEX_FUNC
2314 BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
2315 {
2316 struct swr_vertex_shader *swr_vs = ctx->vs;
2317
2318 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
2319 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2320
2321 memset(outputs, 0, sizeof(outputs));
2322
2323 AttrBuilder attrBuilder;
2324 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
2325
2326 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
2327 PointerType::get(mInt8Ty, 0),
2328 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
2329 FunctionType *vsFuncType =
2330 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
2331
2332 // create new vertex shader function
2333 auto pFunction = Function::Create(vsFuncType,
2334 GlobalValue::ExternalLinkage,
2335 "VS",
2336 JM()->mpCurrentModule);
2337 #if LLVM_VERSION_MAJOR < 5
2338 AttributeSet attrSet = AttributeSet::get(
2339 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
2340 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
2341 #else
2342 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
2343 #endif
2344
2345 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
2346 IRB()->SetInsertPoint(block);
2347 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
2348
2349 auto argitr = pFunction->arg_begin();
2350 Value *hPrivateData = &*argitr++;
2351 hPrivateData->setName("hPrivateData");
2352 Value *pWorkerData = &*argitr++;
2353 pWorkerData->setName("pWorkerData");
2354 Value *pVsCtx = &*argitr++;
2355 pVsCtx->setName("vsCtx");
2356
2357 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
2358
2359 consts_ptr->setName("vs_constants");
2360 Value *const_sizes_ptr =
2361 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
2362 const_sizes_ptr->setName("num_vs_constants");
2363
2364 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
2365 #if USE_SIMD16_VS
2366 vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
2367 #endif
2368
2369 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
2370 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
2371 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2372 if (mask & (1 << channel)) {
2373 inputs[attrib][channel] =
2374 wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
2375 }
2376 }
2377 }
2378
2379 struct lp_build_sampler_soa *sampler =
2380 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
2381 assert(sampler != nullptr);
2382
2383 struct lp_bld_tgsi_system_values system_values;
2384 memset(&system_values, 0, sizeof(system_values));
2385 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
2386
2387 #if USE_SIMD16_VS
2388 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
2389 #else
2390 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
2391 #endif
2392
2393 #if USE_SIMD16_VS
2394 uint32_t vectorWidth = mVWidth16;
2395 #else
2396 uint32_t vectorWidth = mVWidth;
2397 #endif
2398
2399 struct lp_build_tgsi_params params;
2400 memset(&params, 0, sizeof(params));
2401 params.type = lp_type_float_vec(32, 32 * vectorWidth);
2402 params.consts_ptr = wrap(consts_ptr);
2403 params.const_sizes_ptr = wrap(const_sizes_ptr);
2404 params.system_values = &system_values;
2405 params.inputs = inputs;
2406 params.context_ptr = wrap(hPrivateData);
2407 params.sampler = sampler;
2408 params.info = &swr_vs->info.base;
2409
2410 lp_build_tgsi_soa(gallivm,
2411 swr_vs->pipe.tokens,
2412 &params,
2413 outputs);
2414
2415 sampler->destroy(sampler);
2416
2417 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2418
2419 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
2420 #if USE_SIMD16_VS
2421 vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
2422 #endif
2423
2424 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2425 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
2426 if (!outputs[attrib][channel])
2427 continue;
2428
2429 Value *val;
2430 uint32_t outSlot;
2431
2432 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
2433 if (channel != VERTEX_SGV_POINT_SIZE_COMP)
2434 continue;
2435 val = LOAD(unwrap(outputs[attrib][0]));
2436 outSlot = VERTEX_SGV_SLOT;
2437 } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
2438 val = LOAD(unwrap(outputs[attrib][channel]));
2439 outSlot = VERTEX_POSITION_SLOT;
2440 } else {
2441 val = LOAD(unwrap(outputs[attrib][channel]));
2442 outSlot = VERTEX_ATTRIB_START_SLOT + attrib;
2443 if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
2444 outSlot--;
2445 }
2446
2447 WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
2448 }
2449 }
2450
2451 if (ctx->rasterizer->clip_plane_enable ||
2452 swr_vs->info.base.culldist_writemask) {
2453 unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
2454
2455 unsigned cv = 0;
2456 if (swr_vs->info.base.writes_clipvertex) {
2457 cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
2458 &swr_vs->info.base);
2459 } else {
2460 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
2461 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
2462 swr_vs->info.base.output_semantic_index[i] == 0) {
2463 cv = i;
2464 break;
2465 }
2466 }
2467 }
2468 assert(cv < PIPE_MAX_SHADER_OUTPUTS);
2469 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
2470 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
2471 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
2472 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
2473
2474 tgsi_shader_info *pLastFE = &ctx->vs->info.base;
2475
2476 if (ctx->gs) {
2477 pLastFE = &ctx->gs->info.base;
2478 }
2479 else if (ctx->tes) {
2480 pLastFE = &ctx->tes->info.base;
2481 }
2482 else if (ctx->tcs) {
2483 pLastFE = &ctx->tcs->info.base;
2484 }
2485
2486 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
2487 // clip distance overrides user clip planes
2488 if ((pLastFE->clipdist_writemask & clip_mask & (1 << val)) ||
2489 ((pLastFE->culldist_writemask << pLastFE->num_written_clipdistance) & (1 << val))) {
2490 unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, pLastFE);
2491 assert(cv < PIPE_MAX_SHADER_OUTPUTS);
2492 if (val < 4) {
2493 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
2494 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
2495 } else {
2496 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
2497 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
2498 }
2499 continue;
2500 }
2501
2502 if (!(clip_mask & (1 << val)))
2503 continue;
2504
2505 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
2506 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
2507 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
2508 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
2509 #if USE_SIMD16_VS
2510 Value *bpx = VBROADCAST_16(px);
2511 Value *bpy = VBROADCAST_16(py);
2512 Value *bpz = VBROADCAST_16(pz);
2513 Value *bpw = VBROADCAST_16(pw);
2514 #else
2515 Value *bpx = VBROADCAST(px);
2516 Value *bpy = VBROADCAST(py);
2517 Value *bpz = VBROADCAST(pz);
2518 Value *bpw = VBROADCAST(pw);
2519 #endif
2520 Value *dist = FADD(FMUL(unwrap(cx), bpx),
2521 FADD(FMUL(unwrap(cy), bpy),
2522 FADD(FMUL(unwrap(cz), bpz),
2523 FMUL(unwrap(cw), bpw))));
2524
2525 if (val < 4)
2526 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
2527 else
2528 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
2529 }
2530 }
2531
2532 RET_VOID();
2533
2534 JM()->DumpToFile(pFunction, "vs_function1");
2535 gallivm_verify_function(gallivm, wrap(pFunction));
2536 gallivm_compile_module(gallivm);
2537 JM()->DumpToFile(pFunction, "vs_function2");
2538
2539 // lp_debug_dump_value(func);
2540
2541 PFN_VERTEX_FUNC pFunc =
2542 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
2543
2544 JM()->DumpAsm(pFunction, "vs_function_asm");
2545 debug_printf("vert shader %p\n", pFunc);
2546 assert(pFunc && "Error: VertShader = NULL");
2547
2548 JM()->mIsModuleFinalized = true;
2549
2550 return pFunc;
2551 }
2552
2553 PFN_VERTEX_FUNC
2554 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
2555 {
2556 if (!ctx->vs->pipe.tokens)
2557 return NULL;
2558
2559 BuilderSWR builder(
2560 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2561 "VS");
2562 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
2563
2564 ctx->vs->map.insert(std::make_pair(key, std::unique_ptr<VariantVS>(new VariantVS(builder.gallivm, func))));
2565 return func;
2566 }
2567
2568 unsigned
2569 swr_so_adjust_attrib(unsigned in_attrib,
2570 swr_vertex_shader *swr_vs)
2571 {
2572 ubyte semantic_name;
2573 unsigned attrib;
2574
2575 attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
2576
2577 if (swr_vs) {
2578 semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
2579 if (semantic_name == TGSI_SEMANTIC_POSITION) {
2580 attrib = VERTEX_POSITION_SLOT;
2581 } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
2582 attrib = VERTEX_SGV_SLOT;
2583 } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
2584 attrib = VERTEX_SGV_SLOT;
2585 } else {
2586 if (swr_vs->info.base.writes_position) {
2587 attrib--;
2588 }
2589 }
2590 }
2591
2592 return attrib;
2593 }
2594
2595 static unsigned
2596 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
2597 {
2598 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
2599 if ((info->output_semantic_name[i] == name)
2600 && (info->output_semantic_index[i] == index)) {
2601 return i;
2602 }
2603 }
2604
2605 return 0xFFFFFFFF;
2606 }
2607
2608 PFN_PIXEL_KERNEL
2609 BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
2610 {
2611 struct swr_fragment_shader *swr_fs = ctx->fs;
2612
2613 struct tgsi_shader_info *pPrevShader;
2614 if (ctx->gs)
2615 pPrevShader = &ctx->gs->info.base;
2616 else if (ctx->tes)
2617 pPrevShader = &ctx->tes->info.base;
2618 else
2619 pPrevShader = &ctx->vs->info.base;
2620
2621 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
2622 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2623
2624 memset(inputs, 0, sizeof(inputs));
2625 memset(outputs, 0, sizeof(outputs));
2626
2627 struct lp_build_sampler_soa *sampler = NULL;
2628
2629 AttrBuilder attrBuilder;
2630 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
2631
2632 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
2633 PointerType::get(mInt8Ty, 0),
2634 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
2635 FunctionType *funcType =
2636 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
2637
2638 auto pFunction = Function::Create(funcType,
2639 GlobalValue::ExternalLinkage,
2640 "FS",
2641 JM()->mpCurrentModule);
2642 #if LLVM_VERSION_MAJOR < 5
2643 AttributeSet attrSet = AttributeSet::get(
2644 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
2645 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
2646 #else
2647 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
2648 #endif
2649
2650 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
2651 IRB()->SetInsertPoint(block);
2652 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
2653
2654 auto args = pFunction->arg_begin();
2655 Value *hPrivateData = &*args++;
2656 hPrivateData->setName("hPrivateData");
2657 Value *pWorkerData = &*args++;
2658 pWorkerData->setName("pWorkerData");
2659 Value *pPS = &*args++;
2660 pPS->setName("psCtx");
2661
2662 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
2663 consts_ptr->setName("fs_constants");
2664 Value *const_sizes_ptr =
2665 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
2666 const_sizes_ptr->setName("num_fs_constants");
2667
2668 // load *pAttribs, *pPerspAttribs
2669 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
2670 Value *pPerspAttribs =
2671 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
2672
2673 swr_fs->constantMask = 0;
2674 swr_fs->flatConstantMask = 0;
2675 swr_fs->pointSpriteMask = 0;
2676
2677 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
2678 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
2679 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
2680 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
2681
2682 if (!mask)
2683 continue;
2684
2685 // load i,j
2686 Value *vi = nullptr, *vj = nullptr;
2687 switch (interpLoc) {
2688 case TGSI_INTERPOLATE_LOC_CENTER:
2689 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
2690 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
2691 break;
2692 case TGSI_INTERPOLATE_LOC_CENTROID:
2693 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
2694 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
2695 break;
2696 case TGSI_INTERPOLATE_LOC_SAMPLE:
2697 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
2698 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
2699 break;
2700 }
2701
2702 // load/compute w
2703 Value *vw = nullptr, *pAttribs;
2704 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
2705 interpMode == TGSI_INTERPOLATE_COLOR) {
2706 pAttribs = pPerspAttribs;
2707 switch (interpLoc) {
2708 case TGSI_INTERPOLATE_LOC_CENTER:
2709 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
2710 break;
2711 case TGSI_INTERPOLATE_LOC_CENTROID:
2712 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
2713 break;
2714 case TGSI_INTERPOLATE_LOC_SAMPLE:
2715 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
2716 break;
2717 }
2718 } else {
2719 pAttribs = pRawAttribs;
2720 vw = VIMMED1(1.f);
2721 }
2722
2723 vw->setName("w");
2724
2725 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
2726 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
2727
2728 if (semantic_name == TGSI_SEMANTIC_FACE) {
2729 Value *ff =
2730 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
2731 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
2732 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
2733
2734 inputs[attrib][0] = wrap(ff);
2735 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2736 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2737 inputs[attrib][3] = wrap(VIMMED1(1.0f));
2738 continue;
2739 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
2740 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
2741 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
2742 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
2743 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
2744 } else {
2745 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
2746 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
2747 }
2748 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
2749 inputs[attrib][3] =
2750 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
2751 continue;
2752 } else if (semantic_name == TGSI_SEMANTIC_LAYER) { // gl_Layer
2753 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_renderTargetArrayIndex});
2754 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vRenderTargetArrayIndex");
2755 inputs[attrib][0] = wrap(ff);
2756 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2757 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2758 inputs[attrib][3] = wrap(VIMMED1(0.0f));
2759 continue;
2760 } else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { // gl_ViewportIndex
2761 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_viewportIndex});
2762 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vViewportIndex");
2763 inputs[attrib][0] = wrap(ff);
2764 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2765 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2766 inputs[attrib][3] = wrap(VIMMED1(0.0f));
2767 continue;
2768 }
2769 unsigned linkedAttrib =
2770 locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1;
2771
2772 uint32_t extraAttribs = 0;
2773 if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) {
2774 /* non-gs generated primID - need to grab from swizzleMap override */
2775 linkedAttrib = pPrevShader->num_outputs - 1;
2776 swr_fs->constantMask |= 1 << linkedAttrib;
2777 extraAttribs++;
2778 } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
2779 key.sprite_coord_enable & (1 << semantic_idx)) {
2780 /* we add an extra attrib to the backendState in swr_update_derived. */
2781 linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1;
2782 swr_fs->pointSpriteMask |= (1 << linkedAttrib);
2783 extraAttribs++;
2784 } else if (linkedAttrib + 1 == 0xFFFFFFFF) {
2785 inputs[attrib][0] = wrap(VIMMED1(0.0f));
2786 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2787 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2788 inputs[attrib][3] = wrap(VIMMED1(1.0f));
2789 /* If we're reading in color and 2-sided lighting is enabled, we have
2790 * to keep going.
2791 */
2792 if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
2793 continue;
2794 } else {
2795 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
2796 swr_fs->constantMask |= 1 << linkedAttrib;
2797 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
2798 swr_fs->flatConstantMask |= 1 << linkedAttrib;
2799 }
2800 }
2801
2802 unsigned bcolorAttrib = 0xFFFFFFFF;
2803 Value *offset = NULL;
2804 if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
2805 bcolorAttrib = locate_linkage(
2806 TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader);
2807 /* Neither front nor back colors were available. Nothing to load. */
2808 if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
2809 continue;
2810 /* If there is no front color, just always use the back color. */
2811 if (linkedAttrib + 1 == 0xFFFFFFFF)
2812 linkedAttrib = bcolorAttrib;
2813
2814 if (bcolorAttrib != 0xFFFFFFFF) {
2815 bcolorAttrib -= 1;
2816 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
2817 swr_fs->constantMask |= 1 << bcolorAttrib;
2818 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
2819 swr_fs->flatConstantMask |= 1 << bcolorAttrib;
2820 }
2821
2822 unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
2823
2824 if (diff) {
2825 Value *back =
2826 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
2827
2828 offset = MUL(back, C(diff));
2829 offset->setName("offset");
2830 }
2831 }
2832 }
2833
2834 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2835 if (mask & (1 << channel)) {
2836 Value *indexA = C(linkedAttrib * 12 + channel);
2837 Value *indexB = C(linkedAttrib * 12 + channel + 4);
2838 Value *indexC = C(linkedAttrib * 12 + channel + 8);
2839
2840 if (offset) {
2841 indexA = ADD(indexA, offset);
2842 indexB = ADD(indexB, offset);
2843 indexC = ADD(indexC, offset);
2844 }
2845
2846 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
2847 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
2848 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
2849
2850 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
2851 inputs[attrib][channel] = wrap(va);
2852 } else {
2853 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
2854
2855 vc = FMUL(vk, vc);
2856
2857 Value *interp = FMUL(va, vi);
2858 Value *interp1 = FMUL(vb, vj);
2859 interp = FADD(interp, interp1);
2860 interp = FADD(interp, vc);
2861 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
2862 interpMode == TGSI_INTERPOLATE_COLOR)
2863 interp = FMUL(interp, vw);
2864 inputs[attrib][channel] = wrap(interp);
2865 }
2866 }
2867 }
2868 }
2869
2870 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
2871 assert(sampler != nullptr);
2872
2873 struct lp_bld_tgsi_system_values system_values;
2874 memset(&system_values, 0, sizeof(system_values));
2875
2876 struct lp_build_mask_context mask;
2877 bool uses_mask = false;
2878
2879 if (swr_fs->info.base.uses_kill ||
2880 key.poly_stipple_enable) {
2881 Value *vActiveMask = NULL;
2882 if (swr_fs->info.base.uses_kill) {
2883 vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
2884 }
2885 if (key.poly_stipple_enable) {
2886 // first get fragment xy coords and clip to stipple bounds
2887 Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL});
2888 Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL});
2889 Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty);
2890 Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty);
2891
2892 // stipple pattern is 32x32, which means that one line of stipple
2893 // is stored in one word:
2894 // vXstipple is bit offset inside 32-bit stipple word
2895 // vYstipple is word index is stipple array
2896 Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1)
2897 Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1)
2898
2899 // grab stipple pattern base address
2900 Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0});
2901 stipplePtr = BITCAST(stipplePtr, mInt8PtrTy);
2902
2903 // peform a gather to grab stipple words for each lane
2904 Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple,
2905 VIMMED1(0xffffffff), 4);
2906
2907 // create a mask with one bit corresponding to the x stipple
2908 // and AND it with the pattern, to see if we have a bit
2909 Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple);
2910 Value *vStippleMask = AND(vStipple, vBitMask);
2911 vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0));
2912 vStippleMask = VMASK(vStippleMask);
2913
2914 if (swr_fs->info.base.uses_kill) {
2915 vActiveMask = AND(vActiveMask, vStippleMask);
2916 } else {
2917 vActiveMask = vStippleMask;
2918 }
2919 }
2920 lp_build_mask_begin(
2921 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask));
2922 uses_mask = true;
2923 }
2924
2925 struct lp_build_tgsi_params params;
2926 memset(&params, 0, sizeof(params));
2927 params.type = lp_type_float_vec(32, 32 * 8);
2928 params.mask = uses_mask ? &mask : NULL;
2929 params.consts_ptr = wrap(consts_ptr);
2930 params.const_sizes_ptr = wrap(const_sizes_ptr);
2931 params.system_values = &system_values;
2932 params.inputs = inputs;
2933 params.context_ptr = wrap(hPrivateData);
2934 params.sampler = sampler;
2935 params.info = &swr_fs->info.base;
2936
2937 lp_build_tgsi_soa(gallivm,
2938 swr_fs->pipe.tokens,
2939 &params,
2940 outputs);
2941
2942 sampler->destroy(sampler);
2943
2944 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2945
2946 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
2947 attrib++) {
2948 switch (swr_fs->info.base.output_semantic_name[attrib]) {
2949 case TGSI_SEMANTIC_POSITION: {
2950 // write z
2951 LLVMValueRef outZ =
2952 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
2953 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
2954 break;
2955 }
2956 case TGSI_SEMANTIC_COLOR: {
2957 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2958 if (!outputs[attrib][channel])
2959 continue;
2960
2961 LLVMValueRef out =
2962 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
2963 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
2964 swr_fs->info.base.output_semantic_index[attrib] == 0) {
2965 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
2966 STORE(unwrap(out),
2967 pPS,
2968 {0, SWR_PS_CONTEXT_shaded, rt, channel});
2969 }
2970 } else {
2971 STORE(unwrap(out),
2972 pPS,
2973 {0,
2974 SWR_PS_CONTEXT_shaded,
2975 swr_fs->info.base.output_semantic_index[attrib],
2976 channel});
2977 }
2978 }
2979 break;
2980 }
2981 default: {
2982 fprintf(stderr,
2983 "unknown output from FS %s[%d]\n",
2984 tgsi_semantic_names[swr_fs->info.base
2985 .output_semantic_name[attrib]],
2986 swr_fs->info.base.output_semantic_index[attrib]);
2987 break;
2988 }
2989 }
2990 }
2991
2992 LLVMValueRef mask_result = 0;
2993 if (uses_mask) {
2994 mask_result = lp_build_mask_end(&mask);
2995 }
2996
2997 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2998
2999 if (uses_mask) {
3000 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
3001 }
3002
3003 RET_VOID();
3004
3005 gallivm_verify_function(gallivm, wrap(pFunction));
3006
3007 gallivm_compile_module(gallivm);
3008
3009 // after the gallivm passes, we have to lower the core's intrinsics
3010 llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule);
3011 lowerPass.add(createLowerX86Pass(this));
3012 lowerPass.run(*pFunction);
3013
3014 PFN_PIXEL_KERNEL kernel =
3015 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
3016 debug_printf("frag shader %p\n", kernel);
3017 assert(kernel && "Error: FragShader = NULL");
3018
3019 JM()->mIsModuleFinalized = true;
3020
3021 return kernel;
3022 }
3023
3024 PFN_PIXEL_KERNEL
3025 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
3026 {
3027 if (!ctx->fs->pipe.tokens)
3028 return NULL;
3029
3030 BuilderSWR builder(
3031 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
3032 "FS");
3033 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
3034
3035 ctx->fs->map.insert(std::make_pair(key, std::unique_ptr<VariantFS>(new VariantFS(builder.gallivm, func))));
3036 return func;
3037 }