gallium/swr: Fix various asserts and security issues
[mesa.git] / src / gallium / drivers / swr / swr_shader.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 #include <llvm/Config/llvm-config.h>
25
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
29 #undef DEBUG
30 #endif
31
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
36
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
39 #endif
40
41 #include "state.h"
42 #include "gen_state_llvm.h"
43 #include "builder.h"
44 #include "functionpasses/passes.h"
45
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53 #include "gallivm/lp_bld_const.h"
54 #include "gallivm/lp_bld_printf.h"
55
56 #include "swr_context.h"
57 #include "gen_surf_state_llvm.h"
58 #include "gen_swr_context_llvm.h"
59 #include "swr_resource.h"
60 #include "swr_state.h"
61 #include "swr_screen.h"
62
63
64 /////////////////////////////////////////////////////////////////////////
65
66 #include <stdio.h>
67 #include <inttypes.h>
68
69 #include "util/u_debug.h"
70 #include "util/u_memory.h"
71 #include "util/u_string.h"
72
73 #include "gallivm/lp_bld_type.h"
74
75 #ifdef DEBUG
76 constexpr bool verbose_shader = true;
77 #else
78 constexpr bool verbose_shader = false;
79 #endif
80
81 using namespace SwrJit;
82
83 static unsigned
84 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
85
86 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
87 {
88 return !memcmp(&lhs, &rhs, sizeof(lhs));
89 }
90
91 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
92 {
93 return !memcmp(&lhs, &rhs, sizeof(lhs));
94 }
95
96 bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs)
97 {
98 return !memcmp(&lhs, &rhs, sizeof(lhs));
99 }
100
101 bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
102 {
103 return !memcmp(&lhs, &rhs, sizeof(lhs));
104 }
105
106 bool operator==(const swr_jit_tcs_key &lhs, const swr_jit_tcs_key &rhs)
107 {
108 return !memcmp(&lhs, &rhs, sizeof(lhs));
109 }
110
111 bool operator==(const swr_jit_tes_key &lhs, const swr_jit_tes_key &rhs)
112 {
113 return !memcmp(&lhs, &rhs, sizeof(lhs));
114 }
115
116
117 static void
118 swr_generate_sampler_key(const struct lp_tgsi_info &info,
119 struct swr_context *ctx,
120 enum pipe_shader_type shader_type,
121 struct swr_jit_sampler_key &key)
122 {
123 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
124
125 for (unsigned i = 0; i < key.nr_samplers; i++) {
126 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
127 lp_sampler_static_sampler_state(
128 &key.sampler[i].sampler_state,
129 ctx->samplers[shader_type][i]);
130 }
131 }
132
133 /*
134 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
135 * are dx10-style? Can't really have mixed opcodes, at least not
136 * if we want to skip the holes here (without rescanning tgsi).
137 */
138 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
139 key.nr_sampler_views =
140 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
141 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
142 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
143 const struct pipe_sampler_view *view =
144 ctx->sampler_views[shader_type][i];
145 lp_sampler_static_texture_state(
146 &key.sampler[i].texture_state, view);
147 if (view) {
148 struct swr_resource *swr_res = swr_resource(view->texture);
149 const struct util_format_description *desc =
150 util_format_description(view->format);
151 if (swr_res->has_depth && swr_res->has_stencil &&
152 !util_format_has_depth(desc))
153 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
154 }
155 }
156 }
157 } else {
158 key.nr_sampler_views = key.nr_samplers;
159 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
160 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
161 const struct pipe_sampler_view *view =
162 ctx->sampler_views[shader_type][i];
163 lp_sampler_static_texture_state(
164 &key.sampler[i].texture_state, view);
165 if (view) {
166 struct swr_resource *swr_res = swr_resource(view->texture);
167 const struct util_format_description *desc =
168 util_format_description(view->format);
169 if (swr_res->has_depth && swr_res->has_stencil &&
170 !util_format_has_depth(desc))
171 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
172 }
173 }
174 }
175 }
176 }
177
178 void
179 swr_generate_fs_key(struct swr_jit_fs_key &key,
180 struct swr_context *ctx,
181 swr_fragment_shader *swr_fs)
182 {
183 memset(&key, 0, sizeof(key));
184
185 key.nr_cbufs = ctx->framebuffer.nr_cbufs;
186 key.light_twoside = ctx->rasterizer->light_twoside;
187 key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
188
189 struct tgsi_shader_info *pPrevShader;
190 if (ctx->gs)
191 pPrevShader = &ctx->gs->info.base;
192 else if (ctx->tes)
193 pPrevShader = &ctx->tes->info.base;
194 else
195 pPrevShader = &ctx->vs->info.base;
196
197 memcpy(&key.vs_output_semantic_name,
198 &pPrevShader->output_semantic_name,
199 sizeof(key.vs_output_semantic_name));
200 memcpy(&key.vs_output_semantic_idx,
201 &pPrevShader->output_semantic_index,
202 sizeof(key.vs_output_semantic_idx));
203
204 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
205
206 key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable &&
207 ctx->poly_stipple.prim_is_poly;
208 }
209
210 void
211 swr_generate_vs_key(struct swr_jit_vs_key &key,
212 struct swr_context *ctx,
213 swr_vertex_shader *swr_vs)
214 {
215 memset(&key, 0, sizeof(key));
216
217 key.clip_plane_mask =
218 swr_vs->info.base.clipdist_writemask ?
219 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
220 ctx->rasterizer->clip_plane_enable;
221
222 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
223 }
224
225 void
226 swr_generate_fetch_key(struct swr_jit_fetch_key &key,
227 struct swr_vertex_element_state *velems)
228 {
229 memset(&key, 0, sizeof(key));
230
231 key.fsState = velems->fsState;
232 }
233
234 void
235 swr_generate_gs_key(struct swr_jit_gs_key &key,
236 struct swr_context *ctx,
237 swr_geometry_shader *swr_gs)
238 {
239 memset(&key, 0, sizeof(key));
240
241 struct tgsi_shader_info *pPrevShader = nullptr;
242
243 if (ctx->tes) {
244 pPrevShader = &ctx->tes->info.base;
245 } else {
246 pPrevShader = &ctx->vs->info.base;
247 }
248
249 memcpy(&key.vs_output_semantic_name,
250 &pPrevShader->output_semantic_name,
251 sizeof(key.vs_output_semantic_name));
252 memcpy(&key.vs_output_semantic_idx,
253 &pPrevShader->output_semantic_index,
254 sizeof(key.vs_output_semantic_idx));
255
256 swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
257 }
258
259 void
260 swr_generate_tcs_key(struct swr_jit_tcs_key &key,
261 struct swr_context *ctx,
262 swr_tess_control_shader *swr_tcs)
263 {
264 memset(&key, 0, sizeof(key));
265
266 struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
267
268 memcpy(&key.vs_output_semantic_name,
269 &pPrevShader->output_semantic_name,
270 sizeof(key.vs_output_semantic_name));
271 memcpy(&key.vs_output_semantic_idx,
272 &pPrevShader->output_semantic_index,
273 sizeof(key.vs_output_semantic_idx));
274
275 key.clip_plane_mask =
276 swr_tcs->info.base.clipdist_writemask ?
277 swr_tcs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
278 ctx->rasterizer->clip_plane_enable;
279
280 swr_generate_sampler_key(swr_tcs->info, ctx, PIPE_SHADER_TESS_CTRL, key);
281 }
282
283 void
284 swr_generate_tes_key(struct swr_jit_tes_key &key,
285 struct swr_context *ctx,
286 swr_tess_evaluation_shader *swr_tes)
287 {
288 memset(&key, 0, sizeof(key));
289
290 struct tgsi_shader_info *pPrevShader = nullptr;
291
292 if (ctx->tcs) {
293 pPrevShader = &ctx->tcs->info.base;
294 }
295 else {
296 pPrevShader = &ctx->vs->info.base;
297 }
298
299 SWR_ASSERT(pPrevShader != nullptr, "TES: No TCS or VS defined");
300
301 memcpy(&key.prev_output_semantic_name,
302 &pPrevShader->output_semantic_name,
303 sizeof(key.prev_output_semantic_name));
304 memcpy(&key.prev_output_semantic_idx,
305 &pPrevShader->output_semantic_index,
306 sizeof(key.prev_output_semantic_idx));
307
308 key.clip_plane_mask =
309 swr_tes->info.base.clipdist_writemask ?
310 swr_tes->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
311 ctx->rasterizer->clip_plane_enable;
312
313 swr_generate_sampler_key(swr_tes->info, ctx, PIPE_SHADER_TESS_EVAL, key);
314 }
315
316 struct BuilderSWR : public Builder {
317 BuilderSWR(JitManager *pJitMgr, const char *pName)
318 : Builder(pJitMgr)
319 {
320 pJitMgr->SetupNewModule();
321 gallivm = gallivm_create(pName, wrap(&JM()->mContext));
322 pJitMgr->mpCurrentModule = unwrap(gallivm->module);
323 }
324
325 ~BuilderSWR() {
326 gallivm_free_ir(gallivm);
327 }
328
329 void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
330 unsigned slot, unsigned channel);
331
332 struct gallivm_state *gallivm;
333 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
334 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
335 PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
336 PFN_TCS_FUNC CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key);
337 PFN_TES_FUNC CompileTES(struct swr_context *ctx, swr_jit_tes_key &key);
338
339 // GS-specific emit functions
340 LLVMValueRef
341 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
342 struct lp_build_context * bld,
343 boolean is_vindex_indirect,
344 LLVMValueRef vertex_index,
345 boolean is_aindex_indirect,
346 LLVMValueRef attrib_index,
347 LLVMValueRef swizzle_index);
348 void
349 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
350 struct lp_build_context * bld,
351 LLVMValueRef (*outputs)[4],
352 LLVMValueRef emitted_vertices_vec,
353 LLVMValueRef stream_id);
354
355 void
356 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
357 struct lp_build_context * bld,
358 LLVMValueRef total_emitted_vertices_vec_ptr,
359 LLVMValueRef verts_per_prim_vec,
360 LLVMValueRef emitted_prims_vec,
361 LLVMValueRef mask_vec);
362
363 void
364 swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
365 LLVMValueRef total_emitted_vertices_vec,
366 LLVMValueRef emitted_prims_vec);
367
368 // TCS-specific emit functions
369 void swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld);
370 void swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld);
371
372 LLVMValueRef
373 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
374 struct lp_build_tgsi_context * bld_base,
375 boolean is_vindex_indirect,
376 LLVMValueRef vertex_index,
377 boolean is_aindex_indirect,
378 LLVMValueRef attrib_index,
379 LLVMValueRef swizzle_index);
380
381 LLVMValueRef
382 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
383 struct lp_build_tgsi_context * bld_base,
384 boolean is_vindex_indirect,
385 LLVMValueRef vertex_index,
386 boolean is_aindex_indirect,
387 LLVMValueRef attrib_index,
388 LLVMValueRef swizzle_index,
389 uint32_t name);
390
391 void
392 swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
393 struct lp_build_tgsi_context * bld_base,
394 unsigned name,
395 boolean is_vindex_indirect,
396 LLVMValueRef vertex_index,
397 boolean is_aindex_indirect,
398 LLVMValueRef attrib_index,
399 LLVMValueRef swizzle_index,
400 LLVMValueRef value);
401
402 // Barrier implementation (available only in TCS)
403 void
404 swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
405 struct lp_build_tgsi_context *bld_base);
406
407 // TES-specific emit functions
408 LLVMValueRef
409 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
410 struct lp_build_tgsi_context * bld_base,
411 boolean is_vindex_indirect,
412 LLVMValueRef vertex_index,
413 boolean is_aindex_indirect,
414 LLVMValueRef attrib_index,
415 LLVMValueRef swizzle_index);
416
417 LLVMValueRef
418 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
419 struct lp_build_tgsi_context * bld_base,
420 boolean is_aindex_indirect,
421 LLVMValueRef attrib_index,
422 LLVMValueRef swizzle_index);
423 };
424
425 struct swr_gs_llvm_iface {
426 struct lp_build_gs_iface base;
427 struct tgsi_shader_info *info;
428
429 BuilderSWR *pBuilder;
430
431 Value *pGsCtx;
432 SWR_GS_STATE *pGsState;
433 uint32_t num_outputs;
434 uint32_t num_verts_per_prim;
435
436 Value *pVtxAttribMap;
437 };
438
439 struct swr_tcs_llvm_iface {
440 struct lp_build_tcs_iface base;
441 struct tgsi_shader_info *info;
442
443 BuilderSWR *pBuilder;
444
445 Value *pTcsCtx;
446 SWR_TS_STATE *pTsState;
447
448 uint32_t output_vertices;
449
450 struct lp_build_for_loop_state loop_state;
451
452 Value *pVtxAttribMap;
453 Value *pVtxOutputAttribMap;
454 Value *pPatchOutputAttribMap;
455 };
456
457 struct swr_tes_llvm_iface {
458 struct lp_build_tes_iface base;
459 struct tgsi_shader_info *info;
460
461 BuilderSWR *pBuilder;
462
463 Value *pTesCtx;
464 SWR_TS_STATE *pTsState;
465
466 uint32_t num_outputs;
467
468 Value *pVtxAttribMap;
469 Value *pPatchAttribMap;
470 };
471
472 // trampoline functions so we can use the builder llvm construction methods
473 static LLVMValueRef
474 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
475 struct lp_build_context * bld,
476 boolean is_vindex_indirect,
477 LLVMValueRef vertex_index,
478 boolean is_aindex_indirect,
479 LLVMValueRef attrib_index,
480 LLVMValueRef swizzle_index)
481 {
482 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
483
484 return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld,
485 is_vindex_indirect,
486 vertex_index,
487 is_aindex_indirect,
488 attrib_index,
489 swizzle_index);
490 }
491
492 static void
493 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
494 struct lp_build_context * bld,
495 LLVMValueRef (*outputs)[4],
496 LLVMValueRef emitted_vertices_vec,
497 LLVMValueRef stream_id)
498 {
499 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
500
501 iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld,
502 outputs,
503 emitted_vertices_vec,
504 stream_id);
505 }
506
507 static void
508 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
509 struct lp_build_context * bld,
510 LLVMValueRef total_emitted_vertices_vec_ptr,
511 LLVMValueRef verts_per_prim_vec,
512 LLVMValueRef emitted_prims_vec,
513 LLVMValueRef mask_vec)
514 {
515 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
516
517 iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld,
518 total_emitted_vertices_vec_ptr,
519 verts_per_prim_vec,
520 emitted_prims_vec,
521 mask_vec);
522 }
523
524 static void
525 swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
526 LLVMValueRef total_emitted_vertices_vec,
527 LLVMValueRef emitted_prims_vec)
528 {
529 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
530
531 iface->pBuilder->swr_gs_llvm_epilogue(gs_base,
532 total_emitted_vertices_vec,
533 emitted_prims_vec);
534 }
535
536 static LLVMValueRef
537 swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
538 struct lp_build_context * bld,
539 boolean is_vindex_indirect,
540 LLVMValueRef vertex_index,
541 boolean is_aindex_indirect,
542 LLVMValueRef attrib_index,
543 LLVMValueRef swizzle_index)
544 {
545 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
546 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
547
548 return iface->pBuilder->swr_tcs_llvm_fetch_input(tcs_iface, bld_base,
549 is_vindex_indirect,
550 vertex_index,
551 is_aindex_indirect,
552 attrib_index,
553 swizzle_index);
554 }
555
556 static LLVMValueRef
557 swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
558 struct lp_build_context * bld,
559 boolean is_vindex_indirect,
560 LLVMValueRef vertex_index,
561 boolean is_aindex_indirect,
562 LLVMValueRef attrib_index,
563 LLVMValueRef swizzle_index,
564 uint32_t name)
565 {
566 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
567 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
568
569 return iface->pBuilder->swr_tcs_llvm_fetch_output(tcs_iface, bld_base,
570 is_vindex_indirect,
571 vertex_index,
572 is_aindex_indirect,
573 attrib_index,
574 swizzle_index,
575 name);
576 }
577
578
579 static void
580 swr_tcs_llvm_emit_prologue(struct lp_build_context* bld)
581 {
582 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
583 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
584 iface->pBuilder->swr_tcs_llvm_emit_prologue(bld_base);
585 }
586
587 static void
588 swr_tcs_llvm_emit_epilogue(struct lp_build_context* bld)
589 {
590 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
591 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
592 iface->pBuilder->swr_tcs_llvm_emit_epilogue(bld_base);
593 }
594
595 static
596 void swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
597 struct lp_build_context * bld,
598 unsigned name,
599 boolean is_vindex_indirect,
600 LLVMValueRef vertex_index,
601 boolean is_aindex_indirect,
602 LLVMValueRef attrib_index,
603 LLVMValueRef swizzle_index,
604 LLVMValueRef value)
605 {
606 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
607 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
608
609 iface->pBuilder->swr_tcs_llvm_store_output(tcs_iface,
610 bld_base,
611 name,
612 is_vindex_indirect,
613 vertex_index,
614 is_aindex_indirect,
615 attrib_index,
616 swizzle_index,
617 value);
618 }
619
620
621 static
622 void swr_tcs_llvm_emit_barrier(struct lp_build_context *bld)
623 {
624 lp_build_tgsi_soa_context* bld_base = (lp_build_tgsi_soa_context*)bld;
625 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld_base->tcs_iface;
626
627 iface->pBuilder->swr_tcs_llvm_emit_barrier(bld_base->tcs_iface, &bld_base->bld_base);
628 }
629
630
631 static LLVMValueRef
632 swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
633 struct lp_build_context * bld,
634 boolean is_vindex_indirect,
635 LLVMValueRef vertex_index,
636 boolean is_aindex_indirect,
637 LLVMValueRef attrib_index,
638 LLVMValueRef swizzle_index)
639 {
640 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
641 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
642
643 return iface->pBuilder->swr_tes_llvm_fetch_vtx_input(tes_iface, bld_base,
644 is_vindex_indirect,
645 vertex_index,
646 is_aindex_indirect,
647 attrib_index,
648 swizzle_index);
649 }
650
651 static LLVMValueRef
652 swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
653 struct lp_build_context * bld,
654 boolean is_aindex_indirect,
655 LLVMValueRef attrib_index,
656 LLVMValueRef swizzle_index)
657 {
658 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
659 struct lp_build_tgsi_context *bld_base = (struct lp_build_tgsi_context*)bld;
660
661 return iface->pBuilder->swr_tes_llvm_fetch_patch_input(tes_iface, bld_base,
662 is_aindex_indirect,
663 attrib_index,
664 swizzle_index);
665 }
666
667 LLVMValueRef
668 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
669 struct lp_build_context * bld,
670 boolean is_vindex_indirect,
671 LLVMValueRef vertex_index,
672 boolean is_aindex_indirect,
673 LLVMValueRef attrib_index,
674 LLVMValueRef swizzle_index)
675 {
676 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
677 Value *vert_index = unwrap(vertex_index);
678 Value *attr_index = unwrap(attrib_index);
679
680 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
681
682 if (is_vindex_indirect || is_aindex_indirect) {
683 int i;
684 Value *res = unwrap(bld->zero);
685 struct lp_type type = bld->type;
686
687 for (i = 0; i < type.length; i++) {
688 Value *vert_chan_index = vert_index;
689 Value *attr_chan_index = attr_index;
690
691 if (is_vindex_indirect) {
692 vert_chan_index = VEXTRACT(vert_index, C(i));
693 }
694 if (is_aindex_indirect) {
695 attr_chan_index = VEXTRACT(attr_index, C(i));
696 }
697
698 Value *attrib =
699 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
700
701 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
702 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
703
704 Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib);
705 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
706
707 Value *value = VEXTRACT(pInput, C(i));
708 res = VINSERT(res, value, C(i));
709 }
710
711 return wrap(res);
712 } else {
713 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
714
715 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
716 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
717
718 Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib);
719
720 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
721
722 return wrap(pInput);
723 }
724 }
725
726 // GS output stream layout
727 #define VERTEX_COUNT_SIZE 32
728 #define CONTROL_HEADER_SIZE (8*32)
729
730 void
731 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
732 struct lp_build_context * bld,
733 LLVMValueRef (*outputs)[4],
734 LLVMValueRef emitted_vertices_vec,
735 LLVMValueRef stream_id)
736 {
737 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
738
739 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
740 const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
741 const uint32_t attribSize = 4 * sizeof(float);
742 const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
743 Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize));
744
745 Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask});
746 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, mVWidth));
747
748 Value *pStack = STACKSAVE();
749 Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking
750
751 for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
752 uint32_t attribSlot = attrib;
753 uint32_t sgvChannel = 0;
754 if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
755 attribSlot = VERTEX_SGV_SLOT;
756 sgvChannel = VERTEX_SGV_POINT_SIZE_COMP;
757 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) {
758 attribSlot = VERTEX_SGV_SLOT;
759 sgvChannel = VERTEX_SGV_RTAI_COMP;
760 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_VIEWPORT_INDEX) {
761 attribSlot = VERTEX_SGV_SLOT;
762 sgvChannel = VERTEX_SGV_VAI_COMP;
763 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
764 attribSlot = VERTEX_POSITION_SLOT;
765 } else {
766 attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
767 if (iface->info->writes_position) {
768 attribSlot--;
769 }
770 }
771
772 Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ?
773
774 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
775 Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane));
776 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
777 Value *pStreamOffset = GEP(pStream, pLaneOffset);
778 pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy);
779
780 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
781 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
782
783 for (uint32_t channel = 0; channel < 4; ++channel) {
784 Value *vData;
785
786 if (attribSlot == VERTEX_SGV_SLOT)
787 vData = LOAD(unwrap(outputs[attrib][0]));
788 else
789 vData = LOAD(unwrap(outputs[attrib][channel]));
790
791 if (attribSlot != VERTEX_SGV_SLOT ||
792 sgvChannel == channel) {
793 vData = VEXTRACT(vData, C(lane));
794 STORE(vData, pStreamOffset);
795 }
796 pStreamOffset = GEP(pStreamOffset, C(1));
797 }
798 }
799 }
800
801 /* When the output type is not points, the geometry shader may not
802 * output data to multiple streams. So early exit here.
803 */
804 if(iface->pGsState->outputTopology != TOP_POINT_LIST) {
805 STACKRESTORE(pStack);
806 return;
807 }
808
809 // Info about stream id for each vertex
810 // is coded in 2 bits (4 vert per byte "box"):
811 // ----------------- ----------------- ----
812 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
813 // ----------------- ----------------- ----
814
815 // Calculate where need to put stream id for current vert
816 // in 1 byte "box".
817 Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2));
818
819 // Calculate in which box put stream id for current vert.
820 Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2));
821
822 // Skip count header
823 Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE));
824
825 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
826 Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty);
827 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
828
829 Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane)));
830
831 // Just make sure that not overflow max - stream id = (0,1,2,3)
832 Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty);
833
834 // Shift it to correct position in byte "box"
835 vVal = SHL(vVal, pShift);
836
837 // Info about other vertices can be already stored
838 // so we need to read and add bits from current vert info.
839 Value *storedValue = LOAD(pStreamOffset);
840 vVal = OR(storedValue, vVal);
841 STORE(vVal, pStreamOffset);
842 }
843
844 STACKRESTORE(pStack);
845 }
846
847 void
848 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
849 struct lp_build_context * bld,
850 LLVMValueRef total_emitted_vertices_vec,
851 LLVMValueRef verts_per_prim_vec,
852 LLVMValueRef emitted_prims_vec,
853 LLVMValueRef mask_vec)
854 {
855 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
856
857 /* When the output type is points, the geometry shader may output data
858 * to multiple streams, and end_primitive has no effect. Info about
859 * stream id for vertices is stored into the same place in memory where
860 * end primitive info is stored so early exit in this case.
861 */
862 if (iface->pGsState->outputTopology == TOP_POINT_LIST) {
863 return;
864 }
865
866 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
867
868 Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
869 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8));
870
871 uint32_t vertsPerPrim = iface->num_verts_per_prim;
872
873 Value *vCount =
874 ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)),
875 unwrap(verts_per_prim_vec));
876
877 vCount = unwrap(total_emitted_vertices_vec);
878
879 Value *mask = unwrap(mask_vec);
880 Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
881 mask = AND(mask, cmpMask);
882 vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8));
883
884 vCount = SUB(vCount, VIMMED1(1));
885 Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE));
886 Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
887
888 vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8));
889
890 Value *pStack = STACKSAVE();
891 Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking
892
893 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
894 Value *vLaneOffset = VEXTRACT(vOffset, C(lane));
895 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
896 Value *pStreamOffset = GEP(pStream, vLaneOffset);
897
898 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
899 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
900
901 Value *vVal = LOAD(pStreamOffset);
902 vVal = OR(vVal, VEXTRACT(vValue, C(lane)));
903 STORE(vVal, pStreamOffset);
904 }
905
906 STACKRESTORE(pStack);
907 }
908
909 void
910 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
911 LLVMValueRef total_emitted_vertices_vec,
912 LLVMValueRef emitted_prims_vec)
913 {
914 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
915
916 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
917
918 // Store emit count to each output stream in the first DWORD
919 for (uint32_t lane = 0; lane < mVWidth; ++lane)
920 {
921 Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
922 pStream = BITCAST(pStream, mInt32PtrTy);
923 Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane));
924 STORE(pLaneCount, pStream);
925 }
926 }
927
928 void
929 BuilderSWR::swr_tcs_llvm_emit_prologue(struct lp_build_tgsi_soa_context* bld)
930 {
931 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
932
933 // Iterate for all the vertices in the output patch
934 lp_build_for_loop_begin(&iface->loop_state, gallivm,
935 lp_build_const_int32(gallivm, 0),
936 LLVMIntULT,
937 lp_build_const_int32(gallivm, iface->output_vertices),
938 lp_build_const_int32(gallivm, 1));
939
940 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
941 bld->system_values.invocation_id = wrap(VBROADCAST(unwrap(iface->loop_state.counter)));
942
943 if (verbose_shader) {
944 lp_build_printf(gallivm, "Prologue LOOP: Iteration %d BEGIN\n", iface->loop_state.counter);
945 lp_build_print_value(gallivm, "LOOP: InvocationId: \n", bld->system_values.invocation_id);
946 }
947 }
948
949 void
950 BuilderSWR::swr_tcs_llvm_emit_epilogue(struct lp_build_tgsi_soa_context* bld)
951 {
952 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)bld->tcs_iface;
953
954 if (verbose_shader) {
955 lp_build_printf(gallivm, "Epilogue LOOP: Iteration %d END\n", iface->loop_state.counter);
956 }
957 lp_build_for_loop_end(&iface->loop_state);
958 }
959
960 LLVMValueRef
961 BuilderSWR::swr_tcs_llvm_fetch_input(const struct lp_build_tcs_iface *tcs_iface,
962 struct lp_build_tgsi_context * bld_base,
963 boolean is_vindex_indirect,
964 LLVMValueRef vertex_index,
965 boolean is_aindex_indirect,
966 LLVMValueRef attrib_index,
967 LLVMValueRef swizzle_index)
968 {
969 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
970 Value *vert_index = unwrap(vertex_index);
971 Value *attr_index = unwrap(attrib_index);
972
973 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
974
975 if (verbose_shader) {
976 lp_build_print_value(gallivm, "TCS: Vertex index: ", vertex_index);
977 lp_build_print_value(gallivm, "TCS: Attrib index: ", attrib_index);
978 lp_build_print_value(gallivm, "TCS: Swizzle index: ", swizzle_index);
979 }
980
981 if (is_vindex_indirect) {
982 vert_index = VEXTRACT(vert_index, C(0));
983 if (verbose_shader) {
984 lp_build_print_value(gallivm, "TCS: Extracted vertex index: ", vertex_index);
985 }
986 }
987
988 if (is_aindex_indirect) {
989 attr_index = VEXTRACT(attr_index, C(0));
990 if (verbose_shader) {
991 lp_build_print_value(gallivm, "TCS: Extracted attrib index: ", attrib_index);
992 }
993 }
994
995 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
996 if (verbose_shader) {
997 lp_build_print_value(gallivm, "TCS: Attrib index loaded from map: ", wrap(attrib));
998 }
999
1000 Value *pBase = GEP(iface->pTcsCtx,
1001 { C(0), C(SWR_HS_CONTEXT_vert), vert_index,
1002 C(simdvertex_attrib), attrib /*attr_index*/, unwrap(swizzle_index) });
1003
1004 LLVMValueRef res = wrap(LOAD(pBase));
1005
1006 if (verbose_shader) {
1007 lp_build_print_value(gallivm, "TCS input fetched: ", res);
1008 }
1009 return res;
1010 }
1011
1012 LLVMValueRef
1013 BuilderSWR::swr_tcs_llvm_fetch_output(const struct lp_build_tcs_iface *tcs_iface,
1014 struct lp_build_tgsi_context * bld_base,
1015 boolean is_vindex_indirect,
1016 LLVMValueRef vertex_index,
1017 boolean is_aindex_indirect,
1018 LLVMValueRef attrib_index,
1019 LLVMValueRef swizzle_index,
1020 uint32_t name)
1021 {
1022 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
1023
1024 Value *vert_index = unwrap(vertex_index);
1025 Value *attr_index = unwrap(attrib_index);
1026
1027 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1028
1029 if (verbose_shader) {
1030 lp_build_print_value(gallivm, "++TCSo: Vertex index: ", vertex_index);
1031 lp_build_print_value(gallivm, "++TCSo: Attrib index: ", wrap(attr_index));
1032 lp_build_print_value(gallivm, "++TCSo: Swizzle index: ", swizzle_index);
1033 }
1034
1035 if (is_vindex_indirect) {
1036 vert_index = VEXTRACT(vert_index, C(0));
1037 if (verbose_shader)
1038 {
1039 lp_build_print_value(gallivm, "TCSo: Extracted vertex index: ", vertex_index);
1040 }
1041 }
1042
1043 if (is_aindex_indirect) {
1044 attr_index = VEXTRACT(attr_index, C(0));
1045 if (verbose_shader) {
1046 lp_build_print_value(gallivm, "TCSo: Extracted attrib index: ", attrib_index);
1047 }
1048 }
1049
1050 Value* res = unwrap(bld_base->base.zero);
1051
1052 for (uint32_t lane = 0; lane < mVWidth; lane++) {
1053 Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
1054 Value* pCpOut = GEP(p1, {lane});
1055
1056 if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
1057
1058 Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
1059 Value* tessFactorArray = nullptr;
1060 if (name == TGSI_SEMANTIC_TESSOUTER) {
1061 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
1062 } else {
1063 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
1064 }
1065 Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
1066 res = VINSERT(res, LOAD(tessFactor), C(lane));
1067
1068 } else if (name == TGSI_SEMANTIC_PATCH) {
1069 lp_build_print_value(gallivm, "bbbbb TCS per-patch attr_index: ", wrap(attr_index));
1070 Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attr_index, unwrap(swizzle_index)});
1071 res = VINSERT(res, LOAD(attr), C(lane));
1072 if (verbose_shader) {
1073 lp_build_print_value(gallivm, "++TCSo per-patch lane (patch-id): ", wrap(C(lane)));
1074 lp_build_print_value(gallivm, "++TCSo per-patch loaded value: ", wrap(res));
1075 }
1076 } else {
1077 // Generic attribute
1078 Value *attrib =
1079 LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
1080 if (verbose_shader)
1081 {
1082 lp_build_print_value(gallivm, "TCSo: Attrib index from map: ", wrap(attrib));
1083 }
1084 Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp), vert_index,
1085 C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
1086
1087 res = VINSERT(res, LOAD(attr_chan), C(lane));
1088 }
1089 }
1090
1091 if (verbose_shader) {
1092 lp_build_print_value(gallivm, "TCSo: output fetched: ", wrap(res));
1093 }
1094 return wrap(res);
1095 }
1096
1097 void
1098 BuilderSWR::swr_tcs_llvm_store_output(const struct lp_build_tcs_iface *tcs_iface,
1099 struct lp_build_tgsi_context *bld_base,
1100 unsigned name,
1101 boolean is_vindex_indirect,
1102 LLVMValueRef vertex_index,
1103 boolean is_aindex_indirect,
1104 LLVMValueRef attrib_index,
1105 LLVMValueRef swizzle_index,
1106 LLVMValueRef value)
1107 {
1108 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
1109 struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
1110
1111 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1112
1113 if (verbose_shader) {
1114 lp_build_printf(gallivm, "[TCS OUT] =============================================\n");
1115 }
1116
1117 if (verbose_shader) {
1118 lp_build_print_value(gallivm, "[TCS OUT] Store mask: ", bld->exec_mask.exec_mask);
1119 lp_build_print_value(gallivm, "[TCS OUT] Store value: ", value);
1120 }
1121
1122 Value *vert_index = unwrap(vertex_index);
1123 Value *attr_index = unwrap(attrib_index);
1124
1125 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1126
1127 if (verbose_shader) {
1128 lp_build_print_value(gallivm, "[TCS OUT] Vertex index: ", vertex_index);
1129 lp_build_print_value(gallivm, "[TCS OUT] Attrib index: ", wrap(attr_index));
1130 lp_build_print_value(gallivm, "[TCS OUT] Swizzle index: ", swizzle_index);
1131 }
1132
1133 if (is_vindex_indirect) {
1134 vert_index = VEXTRACT(vert_index, C(0));
1135 if (verbose_shader)
1136 {
1137 lp_build_print_value(gallivm, "[TCS OUT] Extracted vertex index: ", vertex_index);
1138 }
1139 }
1140
1141 if (is_aindex_indirect) {
1142 attr_index = VEXTRACT(attr_index, C(0));
1143 if (verbose_shader) {
1144 lp_build_print_value(gallivm, "[TCS OUT] Extracted attrib index: ", wrap(attr_index));
1145 }
1146 }
1147
1148 for (uint32_t lane = 0; lane < mVWidth; lane++) {
1149 Value* p1 = LOAD(iface->pTcsCtx, {0, SWR_HS_CONTEXT_pCPout});
1150 Value* pCpOut = GEP(p1, {lane});
1151
1152 if (name == TGSI_SEMANTIC_TESSOUTER || name == TGSI_SEMANTIC_TESSINNER) {
1153 Value* tessFactors = GEP(pCpOut, {(uint32_t)0, ScalarPatch_tessFactors});
1154 Value* tessFactorArray = nullptr;
1155 if (name == TGSI_SEMANTIC_TESSOUTER) {
1156 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_OuterTessFactors});
1157 } else {
1158 tessFactorArray = GEP(tessFactors, {(uint32_t)0, SWR_TESSELLATION_FACTORS_InnerTessFactors});
1159 }
1160 Value* tessFactor = GEP(tessFactorArray, {C(0), unwrap(swizzle_index)});
1161 Value* valueToStore = VEXTRACT(unwrap(value), C(lane));
1162 struct lp_exec_mask *mask = &bld->exec_mask;
1163 if (mask->has_mask) {
1164 Value *originalVal = LOAD(tessFactor);
1165 Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
1166 valueToStore = SELECT(vMask, valueToStore, originalVal);
1167 }
1168 STORE(valueToStore, tessFactor);
1169 if (verbose_shader) {
1170 lp_build_print_value(gallivm, "[TCS OUT][FACTOR] Stored value: ", wrap(valueToStore));
1171 }
1172 } else if (name == TGSI_SEMANTIC_PATCH) {
1173 Value* attrib = LOAD(GEP(iface->pPatchOutputAttribMap, {C(0), attr_index}));
1174 if (verbose_shader) {
1175 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index: ", wrap(vert_index));
1176 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index: ", wrap(attr_index));
1177 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vert_index_indirect: ", wrap(C(is_vindex_indirect)));
1178 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr_index_indirect: ", wrap(C(is_aindex_indirect)));
1179 lp_build_print_value(gallivm, "[TCS OUT][PATCH] attr index loaded from map: ", wrap(attrib));
1180 }
1181 Value* attr = GEP(pCpOut, {C(0), C(ScalarPatch_patchData), C(ScalarCPoint_attrib), attrib});
1182 Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
1183 if (verbose_shader) {
1184 lp_build_print_value(gallivm, "[TCS OUT][PATCH] lane (patch-id): ", wrap(C(lane)));
1185 lp_build_print_value(gallivm, "[TCS OUT][PATCH] value to store: ", value);
1186 lp_build_print_value(gallivm, "[TCS OUT][PATCH] per-patch value to store: ", wrap(value_to_store));
1187 lp_build_print_value(gallivm, "[TCS OUT][PATCH] chan_index: ", swizzle_index);
1188 }
1189 struct lp_exec_mask *mask = &bld->exec_mask;
1190 if (mask->has_mask) {
1191 Value *originalVal = LOADV(attr, {C(0), unwrap(swizzle_index)});
1192 Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
1193 value_to_store = SELECT(vMask, BITCAST(value_to_store, mFP32Ty), originalVal);
1194 if (verbose_shader) {
1195 lp_build_print_value(gallivm, "[TCS OUT][PATCH] store mask: ", bld->exec_mask.exec_mask);
1196 lp_build_print_value(gallivm, "[TCS OUT][PATCH] loaded original value: ", wrap(originalVal));
1197 lp_build_print_value(gallivm, "[TCS OUT][PATCH] vMask: ", wrap(vMask));
1198 lp_build_print_value(gallivm, "[TCS OUT][PATCH] selected value to store: ", wrap(value_to_store));
1199 }
1200 }
1201 STOREV(value_to_store, attr, {C(0), unwrap(swizzle_index)});
1202 if (verbose_shader) {
1203 lp_build_print_value(gallivm, "[TCS OUT][PATCH] stored value: ", wrap(value_to_store));
1204 }
1205 } else {
1206 Value* value_to_store = VEXTRACT(unwrap(value), C(lane));
1207 Value* attrib = LOAD(GEP(iface->pVtxOutputAttribMap, {C(0), attr_index}));
1208
1209 if (verbose_shader) {
1210 lp_build_print_value(gallivm, "[TCS OUT][VTX] invocation_id: ", bld->system_values.invocation_id);
1211 lp_build_print_value(gallivm, "[TCS OUT][VTX] attribIndex: ", wrap(attr_index));
1212 lp_build_print_value(gallivm, "[TCS OUT][VTX] attrib read from map: ", wrap(attrib));
1213 lp_build_print_value(gallivm, "[TCS OUT][VTX] chan_index: ", swizzle_index);
1214 lp_build_print_value(gallivm, "[TCS OUT][VTX] value: ", value);
1215 lp_build_print_value(gallivm, "[TCS OUT][VTX] value_to_store: ", wrap(value_to_store));
1216 }
1217
1218 Value* attr_chan = GEP(pCpOut, {C(0), C(ScalarPatch_cp),
1219 VEXTRACT(unwrap(bld->system_values.invocation_id), C(0)),
1220 C(ScalarCPoint_attrib), attrib, unwrap(swizzle_index)});
1221
1222 // Mask output values if needed
1223 struct lp_exec_mask *mask = &bld->exec_mask;
1224 if (mask->has_mask) {
1225 Value *originalVal = LOAD(attr_chan);
1226 Value *vMask = TRUNC(VEXTRACT(unwrap(mask->exec_mask), C(lane)), mInt1Ty);
1227 // convert input to float before trying to store
1228 value_to_store = SELECT(vMask, BITCAST(value_to_store, mFP32Ty), originalVal);
1229 }
1230 STORE(value_to_store, attr_chan);
1231 if (verbose_shader) {
1232 lp_build_print_value(gallivm, "[TCS OUT][VTX] stored: ", wrap(value_to_store));
1233 }
1234 }
1235 }
1236 }
1237
1238
1239
1240 void
1241 BuilderSWR::swr_tcs_llvm_emit_barrier(const struct lp_build_tcs_iface *tcs_iface,
1242 struct lp_build_tgsi_context *bld_base)
1243 {
1244 swr_tcs_llvm_iface *iface = (swr_tcs_llvm_iface*)tcs_iface;
1245 struct lp_build_tgsi_soa_context* bld = (struct lp_build_tgsi_soa_context*)bld_base;
1246
1247 if (verbose_shader) {
1248 lp_build_printf(gallivm, "Barrier LOOP: Iteration %d END\n", iface->loop_state.counter);
1249 }
1250
1251 // End previous loop
1252 lp_build_for_loop_end(&iface->loop_state);
1253
1254 // Start new one
1255 lp_build_for_loop_begin(&iface->loop_state, gallivm,
1256 lp_build_const_int32(gallivm, 0),
1257 LLVMIntULT,
1258 lp_build_const_int32(gallivm, iface->output_vertices),
1259 lp_build_const_int32(gallivm, 1));
1260
1261
1262 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1263
1264 bld->system_values.invocation_id = wrap(VBROADCAST(unwrap(iface->loop_state.counter)));
1265
1266 if (verbose_shader) {
1267 lp_build_printf(gallivm, "Barrier LOOP: Iteration %d BEGIN\n", iface->loop_state.counter);
1268 lp_build_print_value(gallivm, "LOOP: InvocationId: \n", bld->system_values.invocation_id);
1269 }
1270 }
1271
1272
1273 LLVMValueRef
1274 BuilderSWR::swr_tes_llvm_fetch_patch_input(const struct lp_build_tes_iface *tes_iface,
1275 struct lp_build_tgsi_context * bld_base,
1276 boolean is_aindex_indirect,
1277 LLVMValueRef attrib_index,
1278 LLVMValueRef swizzle_index)
1279 {
1280 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
1281 Value *attr_index = unwrap(attrib_index);
1282 Value *res = unwrap(bld_base->base.zero);
1283
1284 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1285
1286 if (verbose_shader) {
1287 lp_build_printf(gallivm, "[TES IN][PATCH] --------------------------------------\n");
1288 }
1289
1290 if (is_aindex_indirect) {
1291 int i;
1292 struct lp_type type = bld_base->base.type;
1293
1294 for (i = 0; i < type.length; i++) {
1295 Value *attr_chan_index = attr_index;
1296
1297 if (is_aindex_indirect) {
1298 attr_chan_index = VEXTRACT(attr_index, C(i));
1299 }
1300
1301 Value *attrib =
1302 LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_chan_index}));
1303
1304 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1305 Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
1306 Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
1307 Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
1308 if (verbose_shader) {
1309 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
1310 lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_chan_index));
1311 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
1312 lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
1313 lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
1314 }
1315 res = VINSERT(res, Val, C(i));
1316 }
1317 } else {
1318 Value *attrib = LOAD(GEP(iface->pPatchAttribMap, {C(0), attr_index}));
1319
1320 Value *pCpIn = LOAD(iface->pTesCtx, {(uint32_t)0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1321 Value *pPatchData = GEP(pCpIn, {(uint32_t)0, ScalarPatch_patchData});
1322 Value *pAttr = GEP(pPatchData, {(uint32_t)0, ScalarCPoint_attrib});
1323 Value *Val = LOADV(pAttr, {C(0), attrib, unwrap(swizzle_index)});
1324 if (verbose_shader) {
1325 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib_index: ", attrib_index);
1326 lp_build_print_value(gallivm, "[TES IN][PATCH] attr_chan_index: ", wrap(attr_index));
1327 lp_build_print_value(gallivm, "[TES IN][PATCH] attrib read from map: ", wrap(attrib));
1328 lp_build_print_value(gallivm, "[TES IN][PATCH] swizzle_index: ", swizzle_index);
1329 lp_build_print_value(gallivm, "[TES IN][PATCH] Loaded: ", wrap(Val));
1330 }
1331 res = VBROADCAST(Val);
1332 }
1333 if (verbose_shader) {
1334 lp_build_print_value(gallivm, "[TES IN][PATCH] returning: ", wrap(res));
1335 }
1336 return wrap(res);
1337 }
1338
1339
1340
1341 LLVMValueRef
1342 BuilderSWR::swr_tes_llvm_fetch_vtx_input(const struct lp_build_tes_iface *tes_iface,
1343 struct lp_build_tgsi_context * bld_base,
1344 boolean is_vindex_indirect,
1345 LLVMValueRef vertex_index,
1346 boolean is_aindex_indirect,
1347 LLVMValueRef attrib_index,
1348 LLVMValueRef swizzle_index)
1349 {
1350 swr_tes_llvm_iface *iface = (swr_tes_llvm_iface*)tes_iface;
1351 Value *vert_index = unwrap(vertex_index);
1352 Value *attr_index = unwrap(attrib_index);
1353
1354 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1355
1356 if (verbose_shader) {
1357 lp_build_printf(gallivm, "[TES IN][VTX] --------------------------------------\n");
1358 }
1359
1360 Value *res = unwrap(bld_base->base.zero);
1361 if (is_vindex_indirect || is_aindex_indirect) {
1362 int i;
1363 struct lp_type type = bld_base->base.type;
1364
1365 for (i = 0; i < type.length; i++) {
1366 Value *vert_chan_index = vert_index;
1367 Value *attr_chan_index = attr_index;
1368
1369 if (is_vindex_indirect) {
1370 vert_chan_index = VEXTRACT(vert_index, C(i));
1371 }
1372 if (is_aindex_indirect) {
1373 attr_chan_index = VEXTRACT(attr_index, C(i));
1374 }
1375
1376 Value *attrib =
1377 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
1378
1379 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1380 Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
1381 Value *pVertex = GEP(pCp, {(Value*)C(0), vert_chan_index});
1382 Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
1383 Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
1384 Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
1385 if (verbose_shader) {
1386 lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
1387 lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
1388 lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
1389 lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
1390 lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
1391 }
1392 res = VINSERT(res, Val, C(i));
1393 }
1394 } else {
1395 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
1396
1397 Value *pCpIn = LOAD(iface->pTesCtx, {0, SWR_DS_CONTEXT_pCpIn}, "pCpIn");
1398 Value *pCp = GEP(pCpIn, {0, ScalarPatch_cp});
1399 Value *pVertex = GEP(pCp, {(Value*)C(0), vert_index});
1400 Value *pAttrTab = GEP(pVertex, {uint32_t(0), uint32_t(0)});
1401 Value *pAttr = GEP(pAttrTab, {(Value*)C(0), attrib});
1402 Value *Val = LOADV(pAttr, {C(0), unwrap(swizzle_index)});
1403 if (verbose_shader) {
1404 lp_build_print_value(gallivm, "[TES IN][VTX] attrib_index: ", attrib_index);
1405 lp_build_print_value(gallivm, "[TES IN][VTX] attr_chan_index: ", wrap(attr_index));
1406 lp_build_print_value(gallivm, "[TES IN][VTX] attrib read from map: ", wrap(attrib));
1407 lp_build_print_value(gallivm, "[TES IN][VTX] swizzle_index: ", swizzle_index);
1408 lp_build_print_value(gallivm, "[TES IN][VTX] Loaded: ", wrap(Val));
1409 }
1410 res = VBROADCAST(Val);
1411 }
1412 if (verbose_shader) {
1413 lp_build_print_value(gallivm, "[TES IN][VTX] returning: ", wrap(res));
1414 }
1415 return wrap(res);
1416 }
1417
1418
1419
1420
1421 PFN_GS_FUNC
1422 BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
1423 {
1424 SWR_GS_STATE *pGS = &ctx->gs->gsState;
1425 struct tgsi_shader_info *info = &ctx->gs->info.base;
1426
1427 memset(pGS, 0, sizeof(*pGS));
1428
1429 pGS->gsEnable = true;
1430
1431 pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
1432 pGS->outputTopology =
1433 swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM], 0);
1434
1435 /* It's +1 because emit_vertex in swr is always called exactly one time more
1436 * than max_vertices passed in Geometry Shader. We need to allocate more memory
1437 * to avoid crash/memory overwritten.
1438 */
1439 pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1;
1440 pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
1441
1442 // If point primitive then assume to use multiple streams
1443 if(pGS->outputTopology == TOP_POINT_LIST) {
1444 pGS->isSingleStream = false;
1445 } else {
1446 pGS->isSingleStream = true;
1447 pGS->singleStreamID = 0;
1448 }
1449
1450 pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
1451 pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
1452 pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
1453 pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
1454 pGS->controlDataOffset = VERTEX_COUNT_SIZE;
1455 pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
1456
1457 pGS->allocationSize =
1458 VERTEX_COUNT_SIZE + // vertex count
1459 CONTROL_HEADER_SIZE + // control header
1460 (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
1461 pGS->maxNumVerts; // num verts
1462
1463 struct swr_geometry_shader *gs = ctx->gs;
1464
1465 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1466 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1467
1468 memset(outputs, 0, sizeof(outputs));
1469
1470 AttrBuilder attrBuilder;
1471 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1472
1473 std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
1474 PointerType::get(mInt8Ty, 0),
1475 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
1476 FunctionType *vsFuncType =
1477 FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
1478
1479 // create new vertex shader function
1480 auto pFunction = Function::Create(vsFuncType,
1481 GlobalValue::ExternalLinkage,
1482 "GS",
1483 JM()->mpCurrentModule);
1484 #if LLVM_VERSION_MAJOR < 5
1485 AttributeSet attrSet = AttributeSet::get(
1486 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
1487 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
1488 #else
1489 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
1490 #endif
1491
1492 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
1493 IRB()->SetInsertPoint(block);
1494 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
1495
1496 auto argitr = pFunction->arg_begin();
1497 Value *hPrivateData = &*argitr++;
1498 hPrivateData->setName("hPrivateData");
1499 Value *pWorkerData = &*argitr++;
1500 pWorkerData->setName("pWorkerData");
1501 Value *pGsCtx = &*argitr++;
1502 pGsCtx->setName("gsCtx");
1503
1504 Value *consts_ptr =
1505 GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
1506 consts_ptr->setName("gs_constants");
1507 Value *const_sizes_ptr =
1508 GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
1509 const_sizes_ptr->setName("num_gs_constants");
1510
1511 struct lp_build_sampler_soa *sampler =
1512 swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
1513 assert(sampler != nullptr);
1514
1515 struct lp_bld_tgsi_system_values system_values;
1516 memset(&system_values, 0, sizeof(system_values));
1517 system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
1518 system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
1519
1520 std::vector<Constant*> mapConstants;
1521 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
1522 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
1523 ubyte semantic_name = info->input_semantic_name[slot];
1524 ubyte semantic_idx = info->input_semantic_index[slot];
1525
1526 unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
1527 assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS);
1528
1529 vs_slot += VERTEX_ATTRIB_START_SLOT;
1530
1531 if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
1532 vs_slot--;
1533
1534 if (semantic_name == TGSI_SEMANTIC_POSITION)
1535 vs_slot = VERTEX_POSITION_SLOT;
1536
1537 STORE(C(vs_slot), vtxAttribMap, {0, slot});
1538 mapConstants.push_back(C(vs_slot));
1539 }
1540
1541 struct lp_build_mask_context mask;
1542 Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
1543 lp_build_mask_begin(&mask, gallivm,
1544 lp_type_float_vec(32, 32 * 8), wrap(mask_val));
1545
1546 // zero out cut buffer so we can load/modify/store bits
1547 for (uint32_t lane = 0; lane < mVWidth; ++lane)
1548 {
1549 Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
1550 #if LLVM_VERSION_MAJOR >= 10
1551 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, MaybeAlign(sizeof(float) * KNOB_SIMD_WIDTH));
1552 #else
1553 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
1554 #endif
1555 }
1556
1557 struct swr_gs_llvm_iface gs_iface;
1558 gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
1559 gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
1560 gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
1561 gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
1562 gs_iface.pBuilder = this;
1563 gs_iface.pGsCtx = pGsCtx;
1564 gs_iface.pGsState = pGS;
1565 gs_iface.num_outputs = gs->info.base.num_outputs;
1566 gs_iface.num_verts_per_prim =
1567 u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
1568 gs_iface.info = info;
1569 gs_iface.pVtxAttribMap = vtxAttribMap;
1570
1571 struct lp_build_tgsi_params params;
1572 memset(&params, 0, sizeof(params));
1573 params.type = lp_type_float_vec(32, 32 * 8);
1574 params.mask = & mask;
1575 params.consts_ptr = wrap(consts_ptr);
1576 params.const_sizes_ptr = wrap(const_sizes_ptr);
1577 params.system_values = &system_values;
1578 params.inputs = inputs;
1579 params.context_ptr = wrap(hPrivateData);
1580 params.sampler = sampler;
1581 params.info = &gs->info.base;
1582 params.gs_iface = &gs_iface.base;
1583
1584 lp_build_tgsi_soa(gallivm,
1585 gs->pipe.tokens,
1586 &params,
1587 outputs);
1588
1589 lp_build_mask_end(&mask);
1590
1591 sampler->destroy(sampler);
1592
1593 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1594
1595 RET_VOID();
1596
1597 gallivm_verify_function(gallivm, wrap(pFunction));
1598 gallivm_compile_module(gallivm);
1599
1600 PFN_GS_FUNC pFunc =
1601 (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
1602
1603 debug_printf("geom shader %p\n", pFunc);
1604 assert(pFunc && "Error: GeomShader = NULL");
1605
1606 JM()->mIsModuleFinalized = true;
1607
1608 return pFunc;
1609 }
1610
1611 PFN_TES_FUNC
1612 BuilderSWR::CompileTES(struct swr_context *ctx, swr_jit_tes_key &key)
1613 {
1614 SWR_TS_STATE *pTS = &ctx->tsState;
1615 struct tgsi_shader_info *info = &ctx->tes->info.base;
1616
1617 // tessellation is enabled if TES is present
1618 // clear tessellation state here then
1619 memset(pTS, 0, sizeof(*pTS));
1620
1621 pTS->tsEnable = true;
1622
1623 unsigned tes_prim_mode = info->properties[TGSI_PROPERTY_TES_PRIM_MODE];
1624 unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
1625 bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
1626 bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
1627 SWR_TS_DOMAIN type = SWR_TS_ISOLINE;
1628 SWR_TS_PARTITIONING partitioning = SWR_TS_EVEN_FRACTIONAL;
1629 SWR_TS_OUTPUT_TOPOLOGY topology = SWR_TS_OUTPUT_POINT;
1630 PRIMITIVE_TOPOLOGY postDSTopology = TOP_POINT_LIST;
1631
1632 // TESS_TODO: move this to helper functions to improve readability
1633 switch (tes_prim_mode) {
1634 case PIPE_PRIM_LINES:
1635 type = SWR_TS_ISOLINE;
1636 postDSTopology = TOP_LINE_LIST;
1637 break;
1638 case PIPE_PRIM_TRIANGLES:
1639 type = SWR_TS_TRI;
1640 postDSTopology = TOP_TRIANGLE_LIST;
1641 break;
1642 case PIPE_PRIM_QUADS:
1643 type = SWR_TS_QUAD;
1644 // See OpenGL spec - quads are tessellated into triangles
1645 postDSTopology = TOP_TRIANGLE_LIST;
1646 break;
1647 default:
1648 assert(0);
1649 }
1650
1651 switch (tes_spacing) {
1652 case PIPE_TESS_SPACING_FRACTIONAL_ODD:
1653 partitioning = SWR_TS_ODD_FRACTIONAL;
1654 break;
1655 case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
1656 partitioning = SWR_TS_EVEN_FRACTIONAL;
1657 break;
1658 case PIPE_TESS_SPACING_EQUAL:
1659 partitioning = SWR_TS_INTEGER;
1660 break;
1661 default:
1662 assert(0);
1663 }
1664
1665 if (tes_point_mode) {
1666 topology = SWR_TS_OUTPUT_POINT;
1667 postDSTopology = TOP_POINT_LIST;
1668 }
1669 else if (tes_prim_mode == PIPE_PRIM_LINES) {
1670 topology = SWR_TS_OUTPUT_LINE;
1671 }
1672 else if (tes_vertex_order_cw) {
1673 topology = SWR_TS_OUTPUT_TRI_CW;
1674 }
1675 else {
1676 topology = SWR_TS_OUTPUT_TRI_CCW;
1677 }
1678
1679 pTS->domain = type;
1680 pTS->tsOutputTopology = topology;
1681 pTS->partitioning = partitioning;
1682 pTS->numDsOutputAttribs = info->num_outputs;
1683 pTS->postDSTopology = postDSTopology;
1684
1685 pTS->dsAllocationSize = SWR_VTX_NUM_SLOTS * MAX_NUM_VERTS_PER_PRIM;
1686 pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
1687 pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
1688 pTS->dsOutVtxAttribOffset = VERTEX_ATTRIB_START_SLOT;
1689
1690 struct swr_tess_evaluation_shader *tes = ctx->tes;
1691
1692 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1693 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1694
1695 memset(outputs, 0, sizeof(outputs));
1696
1697 AttrBuilder attrBuilder;
1698 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1699
1700 std::vector<Type *> tesArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
1701 PointerType::get(mInt8Ty, 0),
1702 PointerType::get(Gen_SWR_DS_CONTEXT(JM()), 0)};
1703 FunctionType *tesFuncType =
1704 FunctionType::get(Type::getVoidTy(JM()->mContext), tesArgs, false);
1705
1706 // create new vertex shader function
1707 auto pFunction = Function::Create(tesFuncType,
1708 GlobalValue::ExternalLinkage,
1709 "TES",
1710 JM()->mpCurrentModule);
1711
1712 #if LLVM_VERSION_MAJOR < 5
1713 AttributeSet attrSet = AttributeSet::get(
1714 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
1715 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
1716 #else
1717 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
1718 #endif
1719
1720 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
1721 IRB()->SetInsertPoint(block);
1722 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
1723
1724 auto argitr = pFunction->arg_begin();
1725 Value *hPrivateData = &*argitr++;
1726 hPrivateData->setName("hPrivateData");
1727 Value *pWorkerData = &*argitr++;
1728 pWorkerData->setName("pWorkerData");
1729 Value *pTesCtx = &*argitr++;
1730 pTesCtx->setName("tesCtx");
1731
1732 Value *consts_ptr =
1733 GEP(hPrivateData, {C(0), C(swr_draw_context_constantTES)});
1734 consts_ptr->setName("tes_constants");
1735 Value *const_sizes_ptr =
1736 GEP(hPrivateData, {0, swr_draw_context_num_constantsTES});
1737 const_sizes_ptr->setName("num_tes_constants");
1738
1739 struct lp_build_sampler_soa *sampler =
1740 swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_EVAL);
1741 assert(sampler != nullptr);
1742
1743 struct lp_bld_tgsi_system_values system_values;
1744 memset(&system_values, 0, sizeof(system_values));
1745
1746 // Load and calculate system values
1747 // Tessellation coordinates (gl_TessCoord)
1748 Value *vecOffset = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset}, "vecOffset");
1749 Value *vecStride = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorStride}, "vecStride");
1750 Value *vecIndex = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_vectorOffset});
1751
1752 Value* tess_coord = ALLOCA(ArrayType::get(mSimdFP32Ty, 3));
1753
1754 Value *tessCoordU = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainU}), {vecIndex}, "tessCoordU");
1755 STORE(tessCoordU, tess_coord, {0, 0});
1756 Value *tessCoordV = LOADV(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pDomainV}), {vecIndex}, "tessCoordV");
1757 STORE(tessCoordV, tess_coord, {0, 1});
1758 Value *tessCoordW = FSUB(FSUB(VIMMED1(1.0f), tessCoordU), tessCoordV, "tessCoordW");
1759 STORE(tessCoordW, tess_coord, {0, 2});
1760 system_values.tess_coord = wrap(tess_coord);
1761
1762 // Primitive ID
1763 system_values.prim_id = wrap(VBROADCAST(LOAD(pTesCtx, {0, SWR_DS_CONTEXT_PrimitiveID}), "PrimitiveID"));
1764
1765 // Tessellation factors
1766 Value* pPatch = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pCpIn});
1767 Value* pTessFactors = GEP(pPatch, {C(0), C(ScalarPatch_tessFactors)});
1768
1769 assert(SWR_NUM_OUTER_TESS_FACTORS == 4);
1770 Value* sys_value_outer_factors = UndefValue::get(VectorType::get(mFP32Ty, 4));
1771 for (unsigned i = 0; i < SWR_NUM_OUTER_TESS_FACTORS; i++) {
1772 Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_OuterTessFactors, i});
1773 sys_value_outer_factors = VINSERT(sys_value_outer_factors, v, i, "gl_TessLevelOuter");
1774 }
1775 system_values.tess_outer = wrap(sys_value_outer_factors);
1776
1777 assert(SWR_NUM_INNER_TESS_FACTORS == 2);
1778 Value* sys_value_inner_factors = UndefValue::get(VectorType::get(mFP32Ty, 4));
1779 for (unsigned i = 0; i < SWR_NUM_INNER_TESS_FACTORS; i++) {
1780 Value* v = LOAD(pTessFactors, {0, SWR_TESSELLATION_FACTORS_InnerTessFactors, i});
1781 sys_value_inner_factors = VINSERT(sys_value_inner_factors, v, i, "gl_TessLevelInner");
1782 }
1783 system_values.tess_inner = wrap(sys_value_inner_factors);
1784
1785 if (verbose_shader)
1786 {
1787 lp_build_print_value(gallivm, "tess_coord = ", system_values.tess_coord);
1788 }
1789
1790 struct tgsi_shader_info *pPrevShader = nullptr;
1791
1792 if (ctx->tcs) {
1793 pPrevShader = &ctx->tcs->info.base;
1794 }
1795 else {
1796 pPrevShader = &ctx->vs->info.base;
1797 }
1798
1799 // Figure out how many per-patch attributes we have
1800 unsigned perPatchAttrs = 0;
1801 unsigned genericAttrs = 0;
1802 unsigned tessLevelAttrs = 0;
1803 unsigned sgvAttrs = 0;
1804 for (unsigned slot = 0; slot < pPrevShader->num_outputs; slot++) {
1805 switch (pPrevShader->output_semantic_name[slot]) {
1806 case TGSI_SEMANTIC_PATCH:
1807 perPatchAttrs++;
1808 break;
1809 case TGSI_SEMANTIC_GENERIC:
1810 genericAttrs++;
1811 break;
1812 case TGSI_SEMANTIC_TESSINNER:
1813 case TGSI_SEMANTIC_TESSOUTER:
1814 tessLevelAttrs++;
1815 break;
1816 case TGSI_SEMANTIC_POSITION:
1817 case TGSI_SEMANTIC_CLIPDIST:
1818 case TGSI_SEMANTIC_PSIZE:
1819 sgvAttrs++;
1820 break;
1821 default:
1822 assert(!"Unknown semantic input in TES");
1823 }
1824 }
1825
1826 std::vector<Constant *> mapConstants;
1827 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
1828 Value *patchAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
1829 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
1830 ubyte semantic_name = info->input_semantic_name[slot];
1831 ubyte semantic_idx = info->input_semantic_index[slot];
1832
1833 // Where in TCS output is my attribute?
1834 // TESS_TODO: revisit after implement pass-through TCS
1835 unsigned tcs_slot = locate_linkage(semantic_name, semantic_idx, pPrevShader);
1836 assert(tcs_slot < PIPE_MAX_SHADER_OUTPUTS);
1837
1838 // Skip tessellation levels - these go to the tessellator, not TES
1839 switch (semantic_name) {
1840 case TGSI_SEMANTIC_GENERIC:
1841 tcs_slot = tcs_slot + VERTEX_ATTRIB_START_SLOT - sgvAttrs - tessLevelAttrs;
1842 break;
1843 case TGSI_SEMANTIC_PATCH:
1844 tcs_slot = semantic_idx;
1845 break;
1846 case TGSI_SEMANTIC_POSITION:
1847 tcs_slot = VERTEX_POSITION_SLOT;
1848 break;
1849 case TGSI_SEMANTIC_CLIPDIST:
1850 case TGSI_SEMANTIC_PSIZE:
1851 break;
1852 default:
1853 assert(!"Unexpected semantic found while builiding TES input map");
1854 }
1855 if (semantic_name == TGSI_SEMANTIC_PATCH) {
1856 STORE(C(tcs_slot), patchAttribMap, {0, slot});
1857 } else {
1858 STORE(C(tcs_slot), vtxAttribMap, {0, slot});
1859 }
1860 mapConstants.push_back(C(tcs_slot));
1861 }
1862
1863 // Build execution mask
1864 struct lp_build_mask_context mask;
1865 Value *mask_val = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_mask}, "tesMask");
1866
1867 if (verbose_shader)
1868 lp_build_print_value(gallivm, "TES execution mask: ", wrap(mask_val));
1869
1870 lp_build_mask_begin(&mask, gallivm,
1871 lp_type_float_vec(32, 32 * 8), wrap(mask_val));
1872
1873 struct swr_tes_llvm_iface tes_iface;
1874
1875 tes_iface.base.fetch_vertex_input = ::swr_tes_llvm_fetch_vtx_input;
1876 tes_iface.base.fetch_patch_input = ::swr_tes_llvm_fetch_patch_input;
1877
1878 tes_iface.pBuilder = this;
1879 tes_iface.pTesCtx = pTesCtx;
1880 tes_iface.pTsState = pTS;
1881 tes_iface.num_outputs = tes->info.base.num_outputs;
1882 tes_iface.info = info;
1883 tes_iface.pVtxAttribMap = vtxAttribMap;
1884 tes_iface.pPatchAttribMap = patchAttribMap;
1885
1886 struct lp_build_tgsi_params params;
1887 memset(&params, 0, sizeof(params));
1888 params.type = lp_type_float_vec(32, 32 * 8);
1889 params.mask = & mask;
1890 params.consts_ptr = wrap(consts_ptr);
1891 params.const_sizes_ptr = wrap(const_sizes_ptr);
1892 params.system_values = &system_values;
1893 params.inputs = inputs;
1894 params.context_ptr = wrap(hPrivateData);
1895 params.sampler = sampler;
1896 params.info = &tes->info.base;
1897 params.tes_iface = &tes_iface.base;
1898
1899 // Build LLVM IR
1900 lp_build_tgsi_soa(gallivm,
1901 tes->pipe.tokens,
1902 &params,
1903 outputs);
1904
1905 lp_build_mask_end(&mask);
1906
1907 sampler->destroy(sampler);
1908
1909 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1910
1911 // Write output attributes
1912 Value *dclOut = LOAD(pTesCtx, {0, SWR_DS_CONTEXT_pOutputData}, "dclOut");
1913
1914 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
1915 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1916 if (!outputs[attrib][channel])
1917 continue;
1918
1919 Value *val = LOAD(unwrap(outputs[attrib][channel]));;
1920 Value *attribOffset =
1921 LOAD(pTesCtx, {0, SWR_DS_CONTEXT_outVertexAttribOffset});
1922
1923 // Assume we write possition
1924 Value* outputSlot = C(VERTEX_POSITION_SLOT);
1925 if (tes->info.base.output_semantic_name[attrib] != TGSI_SEMANTIC_POSITION) {
1926 // No, it's a generic attribute, not a position - let's calculate output slot
1927 uint32_t outSlot = attrib;
1928 if (tes->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
1929 // this shader will write position, so in shader's term
1930 // output starts at attrib 1, but we will handle that separately,
1931 // so let's fix the outSlot
1932 outSlot--;
1933 }
1934 outputSlot = ADD(attribOffset, C(outSlot));
1935 }
1936
1937 Value *attribVecIndex =
1938 ADD(MUL(vecStride, MUL(outputSlot, C(4))), vecOffset);
1939
1940 uint32_t outputComponent = 0;
1941 uint32_t curComp = outputComponent + channel;
1942 auto outValIndex = ADD(attribVecIndex, MUL(vecStride, C(curComp)));
1943 STOREV(val, dclOut, {outValIndex});
1944
1945 if (verbose_shader) {
1946 lp_build_printf(gallivm,
1947 "TES output [%d][%d]",
1948 C(attrib),
1949 C(channel));
1950 lp_build_print_value(gallivm, " = ", wrap(val));
1951 }
1952 }
1953 }
1954
1955 RET_VOID();
1956
1957 JM()->DumpToFile(pFunction, "src");
1958 gallivm_verify_function(gallivm, wrap(pFunction));
1959
1960 gallivm_compile_module(gallivm);
1961 JM()->DumpToFile(pFunction, "optimized");
1962
1963 PFN_TES_FUNC pFunc =
1964 (PFN_TES_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
1965
1966 debug_printf("tess evaluation shader %p\n", pFunc);
1967 assert(pFunc && "Error: TessEvaluationShader = NULL");
1968
1969 JM()->DumpAsm(pFunction, "asm");
1970
1971 JM()->mIsModuleFinalized = true;
1972
1973 return pFunc;
1974 }
1975
1976 PFN_TCS_FUNC
1977 BuilderSWR::CompileTCS(struct swr_context *ctx, swr_jit_tcs_key &key)
1978 {
1979 SWR_TS_STATE *pTS = &ctx->tsState;
1980 struct tgsi_shader_info *info = &ctx->tcs->info.base;
1981
1982 pTS->numHsInputAttribs = info->num_inputs;
1983 pTS->numHsOutputAttribs = info->num_outputs;
1984
1985 pTS->hsAllocationSize = sizeof(ScalarPatch);
1986
1987 pTS->vertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
1988 pTS->srcVertexAttribOffset = VERTEX_ATTRIB_START_SLOT;
1989
1990 struct swr_tess_control_shader *tcs = ctx->tcs;
1991
1992 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1993 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1994
1995 memset(outputs, 0, sizeof(outputs));
1996
1997 AttrBuilder attrBuilder;
1998 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1999
2000 std::vector<Type *> tcsArgs{
2001 PointerType::get(Gen_swr_draw_context(JM()), 0),
2002 PointerType::get(mInt8Ty, 0),
2003 PointerType::get(Gen_SWR_HS_CONTEXT(JM()), 0)};
2004 FunctionType *tcsFuncType =
2005 FunctionType::get(Type::getVoidTy(JM()->mContext), tcsArgs, false);
2006
2007 // create new vertex shader function
2008 auto pFunction = Function::Create(tcsFuncType,
2009 GlobalValue::ExternalLinkage,
2010 "TCS",
2011 JM()->mpCurrentModule);
2012
2013 #if LLVM_VERSION_MAJOR < 5
2014 AttributeSet attrSet = AttributeSet::get(
2015 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
2016 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
2017 #else
2018 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
2019 #endif
2020
2021 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
2022 IRB()->SetInsertPoint(block);
2023 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
2024
2025 auto argitr = pFunction->arg_begin();
2026 Value *hPrivateData = &*argitr++;
2027 hPrivateData->setName("hPrivateData");
2028 Value *pWorkerData = &*argitr++;
2029 pWorkerData->setName("pWorkerData");
2030 Value *pTcsCtx = &*argitr++;
2031 pTcsCtx->setName("tcsCtx");
2032
2033 Value *consts_ptr =
2034 GEP(hPrivateData, {C(0), C(swr_draw_context_constantTCS)});
2035 consts_ptr->setName("tcs_constants");
2036 Value *const_sizes_ptr =
2037 GEP(hPrivateData, {0, swr_draw_context_num_constantsTCS});
2038 const_sizes_ptr->setName("num_tcs_constants");
2039
2040 struct lp_build_sampler_soa *sampler =
2041 swr_sampler_soa_create(key.sampler, PIPE_SHADER_TESS_CTRL);
2042 assert(sampler != nullptr);
2043
2044 struct lp_bld_tgsi_system_values system_values;
2045 memset(&system_values, 0, sizeof(system_values));
2046
2047 system_values.prim_id =
2048 wrap(LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_PrimitiveID}));
2049
2050 Constant *vInvocationId;
2051 if (mVWidth == 8) {
2052 vInvocationId = C({0, 1, 2, 3, 4, 5, 6, 7});
2053 } else {
2054 vInvocationId =
2055 C({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
2056 }
2057
2058 system_values.invocation_id = wrap(vInvocationId);
2059 system_values.vertices_in = wrap(C(tcs->vertices_per_patch));
2060
2061 if (verbose_shader) {
2062 lp_build_print_value(gallivm, "TCS::prim_id = ", system_values.prim_id);
2063 lp_build_print_value(gallivm, "TCS::invocation_id = ", system_values.invocation_id);
2064 lp_build_print_value(gallivm, "TCS::vertices_in = ", system_values.vertices_in);
2065 }
2066
2067 std::vector<Constant *> mapConstants;
2068 Value *vtxAttribMap =
2069 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
2070
2071 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
2072 ubyte semantic_name = info->input_semantic_name[slot];
2073 ubyte semantic_idx = info->input_semantic_index[slot];
2074
2075 unsigned vs_slot =
2076 locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
2077 assert(vs_slot < PIPE_MAX_SHADER_OUTPUTS);
2078
2079 vs_slot += VERTEX_ATTRIB_START_SLOT;
2080
2081 if (ctx->vs->info.base.output_semantic_name[0]
2082 == TGSI_SEMANTIC_POSITION)
2083 vs_slot--;
2084
2085 if (semantic_name == TGSI_SEMANTIC_POSITION)
2086 vs_slot = VERTEX_POSITION_SLOT;
2087
2088 STORE(C(vs_slot), vtxAttribMap, {0, slot});
2089 mapConstants.push_back(C(vs_slot));
2090 }
2091
2092 // Prepare map of output attributes. Needed when shader instance wants
2093 // to read own output or output of other instance, which is allowed in TCS
2094 Value *vtxOutputAttribMap =
2095 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
2096 // Map for per-patch attributes
2097 Value *patchOutputAttribMap =
2098 ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
2099 for (unsigned slot = 0; slot < info->num_outputs; slot++) {
2100 ubyte name = info->output_semantic_name[slot];
2101 int32_t idx = info->output_semantic_index[slot];
2102 if (name == TGSI_SEMANTIC_PATCH) {
2103 STORE(C(idx), patchOutputAttribMap, {0, slot});
2104 } else {
2105 int32_t target_slot = slot;
2106 if (name == TGSI_SEMANTIC_GENERIC) {
2107 target_slot += VERTEX_ATTRIB_START_SLOT;
2108 }
2109 // Now normalize target slot
2110 for (ubyte as = 0; as < slot; as++) {
2111 ubyte name = info->output_semantic_name[as];
2112 switch (name) {
2113 case TGSI_SEMANTIC_TESSOUTER:
2114 case TGSI_SEMANTIC_TESSINNER:
2115 case TGSI_SEMANTIC_PATCH:
2116 case TGSI_SEMANTIC_POSITION:
2117 target_slot--;
2118 }
2119 }
2120 if (name == TGSI_SEMANTIC_POSITION) {
2121 target_slot = VERTEX_POSITION_SLOT;
2122 }
2123 STORE(C(target_slot), vtxOutputAttribMap, {0, slot});
2124 mapConstants.push_back(C(target_slot));
2125 }
2126 }
2127
2128 struct lp_build_mask_context mask;
2129 Value *mask_val = LOAD(pTcsCtx, {0, SWR_HS_CONTEXT_mask}, "tcsMask");
2130 lp_build_mask_begin(
2131 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val));
2132
2133 struct swr_tcs_llvm_iface tcs_iface;
2134
2135 tcs_iface.base.emit_store_output = ::swr_tcs_llvm_store_output;
2136 tcs_iface.base.emit_fetch_input = ::swr_tcs_llvm_fetch_input;
2137 tcs_iface.base.emit_fetch_output = ::swr_tcs_llvm_fetch_output;
2138 tcs_iface.base.emit_barrier = ::swr_tcs_llvm_emit_barrier;
2139 tcs_iface.base.emit_prologue = ::swr_tcs_llvm_emit_prologue;
2140 tcs_iface.base.emit_epilogue = ::swr_tcs_llvm_emit_epilogue;
2141
2142 tcs_iface.pBuilder = this;
2143 tcs_iface.pTcsCtx = pTcsCtx;
2144 tcs_iface.pTsState = pTS;
2145 tcs_iface.output_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
2146 tcs_iface.info = info;
2147 tcs_iface.pVtxAttribMap = vtxAttribMap;
2148 tcs_iface.pVtxOutputAttribMap = vtxOutputAttribMap;
2149 tcs_iface.pPatchOutputAttribMap = patchOutputAttribMap;
2150
2151 struct lp_build_tgsi_params params;
2152 memset(&params, 0, sizeof(params));
2153 params.type = lp_type_float_vec(32, 32 * 8);
2154 params.mask = &mask;
2155 params.consts_ptr = wrap(consts_ptr);
2156 params.const_sizes_ptr = wrap(const_sizes_ptr);
2157 params.system_values = &system_values;
2158 params.inputs = inputs;
2159 params.context_ptr = wrap(hPrivateData);
2160 params.sampler = sampler;
2161 params.info = &tcs->info.base;
2162 params.tcs_iface = &tcs_iface.base;
2163
2164 lp_build_tgsi_soa(gallivm, tcs->pipe.tokens, &params, outputs);
2165
2166 lp_build_mask_end(&mask);
2167
2168 sampler->destroy(sampler);
2169
2170 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2171 RET_VOID();
2172
2173 JM()->DumpToFile(pFunction, "src");
2174 gallivm_verify_function(gallivm, wrap(pFunction));
2175 gallivm_compile_module(gallivm);
2176 JM()->DumpToFile(pFunction, "optimized");
2177
2178 PFN_TCS_FUNC pFunc =
2179 (PFN_TCS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
2180
2181 debug_printf("tess control shader %p\n", pFunc);
2182 assert(pFunc && "Error: TessControlShader = NULL");
2183 JM()->DumpAsm(pFunction, "asm");
2184
2185 JM()->mIsModuleFinalized = true;
2186
2187 return pFunc;
2188 }
2189
2190
2191 PFN_GS_FUNC
2192 swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
2193 {
2194 BuilderSWR builder(
2195 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2196 "GS");
2197 PFN_GS_FUNC func = builder.CompileGS(ctx, key);
2198
2199 ctx->gs->map.insert(std::make_pair(key, std::unique_ptr<VariantGS>(new VariantGS(builder.gallivm, func))));
2200 return func;
2201 }
2202
2203 PFN_TCS_FUNC
2204 swr_compile_tcs(struct swr_context *ctx, swr_jit_tcs_key &key)
2205 {
2206 BuilderSWR builder(
2207 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2208 "TCS");
2209 PFN_TCS_FUNC func = builder.CompileTCS(ctx, key);
2210
2211 ctx->tcs->map.insert(
2212 std::make_pair(key, std::unique_ptr<VariantTCS>(new VariantTCS(builder.gallivm, func))));
2213
2214 return func;
2215 }
2216
2217 PFN_TES_FUNC
2218 swr_compile_tes(struct swr_context *ctx, swr_jit_tes_key &key)
2219 {
2220 BuilderSWR builder(
2221 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2222 "TES");
2223 PFN_TES_FUNC func = builder.CompileTES(ctx, key);
2224
2225 ctx->tes->map.insert(
2226 std::make_pair(key, std::unique_ptr<VariantTES>(new VariantTES(builder.gallivm, func))));
2227
2228 return func;
2229 }
2230
2231 void
2232 BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
2233 {
2234 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
2235 // interleave the simdvertex components into the dest simd16vertex
2236 // slot16offset = slot8offset * 2
2237 // comp16offset = comp8offset * 2 + alternateOffset
2238
2239 Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
2240 Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
2241 STORE(pVal, pOut, {channel * 2});
2242 #else
2243 Value *pOut = GEP(pVtxOutput, {0, 0, slot});
2244 STORE(pVal, pOut, {0, channel});
2245 if (verbose_shader) {
2246 lp_build_printf(gallivm, "VS: Storing on slot %d, channel %d: ", C(slot), C(channel));
2247 lp_build_print_value(gallivm, "", wrap(pVal));
2248 }
2249 #endif
2250 }
2251
2252 PFN_VERTEX_FUNC
2253 BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
2254 {
2255 struct swr_vertex_shader *swr_vs = ctx->vs;
2256
2257 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
2258 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2259
2260 memset(outputs, 0, sizeof(outputs));
2261
2262 AttrBuilder attrBuilder;
2263 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
2264
2265 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
2266 PointerType::get(mInt8Ty, 0),
2267 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
2268 FunctionType *vsFuncType =
2269 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
2270
2271 // create new vertex shader function
2272 auto pFunction = Function::Create(vsFuncType,
2273 GlobalValue::ExternalLinkage,
2274 "VS",
2275 JM()->mpCurrentModule);
2276 #if LLVM_VERSION_MAJOR < 5
2277 AttributeSet attrSet = AttributeSet::get(
2278 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
2279 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
2280 #else
2281 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
2282 #endif
2283
2284 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
2285 IRB()->SetInsertPoint(block);
2286 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
2287
2288 auto argitr = pFunction->arg_begin();
2289 Value *hPrivateData = &*argitr++;
2290 hPrivateData->setName("hPrivateData");
2291 Value *pWorkerData = &*argitr++;
2292 pWorkerData->setName("pWorkerData");
2293 Value *pVsCtx = &*argitr++;
2294 pVsCtx->setName("vsCtx");
2295
2296 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
2297
2298 consts_ptr->setName("vs_constants");
2299 Value *const_sizes_ptr =
2300 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
2301 const_sizes_ptr->setName("num_vs_constants");
2302
2303 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
2304 #if USE_SIMD16_VS
2305 vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
2306 #endif
2307
2308 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
2309 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
2310 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2311 if (mask & (1 << channel)) {
2312 inputs[attrib][channel] =
2313 wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
2314 }
2315 }
2316 }
2317
2318 struct lp_build_sampler_soa *sampler =
2319 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
2320 assert(sampler != nullptr);
2321
2322 struct lp_bld_tgsi_system_values system_values;
2323 memset(&system_values, 0, sizeof(system_values));
2324 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
2325
2326 #if USE_SIMD16_VS
2327 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
2328 #else
2329 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
2330 #endif
2331
2332 #if USE_SIMD16_VS
2333 uint32_t vectorWidth = mVWidth16;
2334 #else
2335 uint32_t vectorWidth = mVWidth;
2336 #endif
2337
2338 struct lp_build_tgsi_params params;
2339 memset(&params, 0, sizeof(params));
2340 params.type = lp_type_float_vec(32, 32 * vectorWidth);
2341 params.consts_ptr = wrap(consts_ptr);
2342 params.const_sizes_ptr = wrap(const_sizes_ptr);
2343 params.system_values = &system_values;
2344 params.inputs = inputs;
2345 params.context_ptr = wrap(hPrivateData);
2346 params.sampler = sampler;
2347 params.info = &swr_vs->info.base;
2348
2349 lp_build_tgsi_soa(gallivm,
2350 swr_vs->pipe.tokens,
2351 &params,
2352 outputs);
2353
2354 sampler->destroy(sampler);
2355
2356 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2357
2358 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
2359 #if USE_SIMD16_VS
2360 vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
2361 #endif
2362
2363 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2364 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
2365 if (!outputs[attrib][channel])
2366 continue;
2367
2368 Value *val;
2369 uint32_t outSlot;
2370
2371 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
2372 if (channel != VERTEX_SGV_POINT_SIZE_COMP)
2373 continue;
2374 val = LOAD(unwrap(outputs[attrib][0]));
2375 outSlot = VERTEX_SGV_SLOT;
2376 } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
2377 val = LOAD(unwrap(outputs[attrib][channel]));
2378 outSlot = VERTEX_POSITION_SLOT;
2379 } else {
2380 val = LOAD(unwrap(outputs[attrib][channel]));
2381 outSlot = VERTEX_ATTRIB_START_SLOT + attrib;
2382 if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
2383 outSlot--;
2384 }
2385
2386 WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
2387 }
2388 }
2389
2390 if (ctx->rasterizer->clip_plane_enable ||
2391 swr_vs->info.base.culldist_writemask) {
2392 unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
2393
2394 unsigned cv = 0;
2395 if (swr_vs->info.base.writes_clipvertex) {
2396 cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
2397 &swr_vs->info.base);
2398 } else {
2399 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
2400 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
2401 swr_vs->info.base.output_semantic_index[i] == 0) {
2402 cv = i;
2403 break;
2404 }
2405 }
2406 }
2407 assert(cv < PIPE_MAX_SHADER_OUTPUTS);
2408 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
2409 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
2410 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
2411 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
2412
2413 tgsi_shader_info *pLastFE = &ctx->vs->info.base;
2414
2415 if (ctx->gs) {
2416 pLastFE = &ctx->gs->info.base;
2417 }
2418 else if (ctx->tes) {
2419 pLastFE = &ctx->tes->info.base;
2420 }
2421 else if (ctx->tcs) {
2422 pLastFE = &ctx->tcs->info.base;
2423 }
2424
2425 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
2426 // clip distance overrides user clip planes
2427 if ((pLastFE->clipdist_writemask & clip_mask & (1 << val)) ||
2428 ((pLastFE->culldist_writemask << pLastFE->num_written_clipdistance) & (1 << val))) {
2429 unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, pLastFE);
2430 assert(cv < PIPE_MAX_SHADER_OUTPUTS);
2431 if (val < 4) {
2432 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
2433 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
2434 } else {
2435 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
2436 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
2437 }
2438 continue;
2439 }
2440
2441 if (!(clip_mask & (1 << val)))
2442 continue;
2443
2444 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
2445 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
2446 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
2447 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
2448 #if USE_SIMD16_VS
2449 Value *bpx = VBROADCAST_16(px);
2450 Value *bpy = VBROADCAST_16(py);
2451 Value *bpz = VBROADCAST_16(pz);
2452 Value *bpw = VBROADCAST_16(pw);
2453 #else
2454 Value *bpx = VBROADCAST(px);
2455 Value *bpy = VBROADCAST(py);
2456 Value *bpz = VBROADCAST(pz);
2457 Value *bpw = VBROADCAST(pw);
2458 #endif
2459 Value *dist = FADD(FMUL(unwrap(cx), bpx),
2460 FADD(FMUL(unwrap(cy), bpy),
2461 FADD(FMUL(unwrap(cz), bpz),
2462 FMUL(unwrap(cw), bpw))));
2463
2464 if (val < 4)
2465 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
2466 else
2467 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
2468 }
2469 }
2470
2471 RET_VOID();
2472
2473 JM()->DumpToFile(pFunction, "vs_function1");
2474 gallivm_verify_function(gallivm, wrap(pFunction));
2475 gallivm_compile_module(gallivm);
2476 JM()->DumpToFile(pFunction, "vs_function2");
2477
2478 // lp_debug_dump_value(func);
2479
2480 PFN_VERTEX_FUNC pFunc =
2481 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
2482
2483 JM()->DumpAsm(pFunction, "vs_function_asm");
2484 debug_printf("vert shader %p\n", pFunc);
2485 assert(pFunc && "Error: VertShader = NULL");
2486
2487 JM()->mIsModuleFinalized = true;
2488
2489 return pFunc;
2490 }
2491
2492 PFN_VERTEX_FUNC
2493 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
2494 {
2495 if (!ctx->vs->pipe.tokens)
2496 return NULL;
2497
2498 BuilderSWR builder(
2499 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2500 "VS");
2501 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
2502
2503 ctx->vs->map.insert(std::make_pair(key, std::unique_ptr<VariantVS>(new VariantVS(builder.gallivm, func))));
2504 return func;
2505 }
2506
2507 unsigned
2508 swr_so_adjust_attrib(unsigned in_attrib,
2509 swr_vertex_shader *swr_vs)
2510 {
2511 ubyte semantic_name;
2512 unsigned attrib;
2513
2514 attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
2515
2516 if (swr_vs) {
2517 semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
2518 if (semantic_name == TGSI_SEMANTIC_POSITION) {
2519 attrib = VERTEX_POSITION_SLOT;
2520 } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
2521 attrib = VERTEX_SGV_SLOT;
2522 } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
2523 attrib = VERTEX_SGV_SLOT;
2524 } else {
2525 if (swr_vs->info.base.writes_position) {
2526 attrib--;
2527 }
2528 }
2529 }
2530
2531 return attrib;
2532 }
2533
2534 static unsigned
2535 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
2536 {
2537 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
2538 if ((info->output_semantic_name[i] == name)
2539 && (info->output_semantic_index[i] == index)) {
2540 return i;
2541 }
2542 }
2543
2544 return 0xFFFFFFFF;
2545 }
2546
2547 PFN_PIXEL_KERNEL
2548 BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
2549 {
2550 struct swr_fragment_shader *swr_fs = ctx->fs;
2551
2552 struct tgsi_shader_info *pPrevShader;
2553 if (ctx->gs)
2554 pPrevShader = &ctx->gs->info.base;
2555 else if (ctx->tes)
2556 pPrevShader = &ctx->tes->info.base;
2557 else
2558 pPrevShader = &ctx->vs->info.base;
2559
2560 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
2561 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
2562
2563 memset(inputs, 0, sizeof(inputs));
2564 memset(outputs, 0, sizeof(outputs));
2565
2566 struct lp_build_sampler_soa *sampler = NULL;
2567
2568 AttrBuilder attrBuilder;
2569 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
2570
2571 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
2572 PointerType::get(mInt8Ty, 0),
2573 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
2574 FunctionType *funcType =
2575 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
2576
2577 auto pFunction = Function::Create(funcType,
2578 GlobalValue::ExternalLinkage,
2579 "FS",
2580 JM()->mpCurrentModule);
2581 #if LLVM_VERSION_MAJOR < 5
2582 AttributeSet attrSet = AttributeSet::get(
2583 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
2584 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
2585 #else
2586 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
2587 #endif
2588
2589 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
2590 IRB()->SetInsertPoint(block);
2591 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
2592
2593 auto args = pFunction->arg_begin();
2594 Value *hPrivateData = &*args++;
2595 hPrivateData->setName("hPrivateData");
2596 Value *pWorkerData = &*args++;
2597 pWorkerData->setName("pWorkerData");
2598 Value *pPS = &*args++;
2599 pPS->setName("psCtx");
2600
2601 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
2602 consts_ptr->setName("fs_constants");
2603 Value *const_sizes_ptr =
2604 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
2605 const_sizes_ptr->setName("num_fs_constants");
2606
2607 // load *pAttribs, *pPerspAttribs
2608 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
2609 Value *pPerspAttribs =
2610 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
2611
2612 swr_fs->constantMask = 0;
2613 swr_fs->flatConstantMask = 0;
2614 swr_fs->pointSpriteMask = 0;
2615
2616 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
2617 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
2618 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
2619 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
2620
2621 if (!mask)
2622 continue;
2623
2624 // load i,j
2625 Value *vi = nullptr, *vj = nullptr;
2626 switch (interpLoc) {
2627 case TGSI_INTERPOLATE_LOC_CENTER:
2628 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
2629 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
2630 break;
2631 case TGSI_INTERPOLATE_LOC_CENTROID:
2632 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
2633 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
2634 break;
2635 case TGSI_INTERPOLATE_LOC_SAMPLE:
2636 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
2637 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
2638 break;
2639 }
2640
2641 // load/compute w
2642 Value *vw = nullptr, *pAttribs;
2643 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
2644 interpMode == TGSI_INTERPOLATE_COLOR) {
2645 pAttribs = pPerspAttribs;
2646 switch (interpLoc) {
2647 case TGSI_INTERPOLATE_LOC_CENTER:
2648 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
2649 break;
2650 case TGSI_INTERPOLATE_LOC_CENTROID:
2651 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
2652 break;
2653 case TGSI_INTERPOLATE_LOC_SAMPLE:
2654 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
2655 break;
2656 }
2657 } else {
2658 pAttribs = pRawAttribs;
2659 vw = VIMMED1(1.f);
2660 }
2661
2662 vw->setName("w");
2663
2664 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
2665 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
2666
2667 if (semantic_name == TGSI_SEMANTIC_FACE) {
2668 Value *ff =
2669 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
2670 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
2671 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
2672
2673 inputs[attrib][0] = wrap(ff);
2674 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2675 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2676 inputs[attrib][3] = wrap(VIMMED1(1.0f));
2677 continue;
2678 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
2679 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
2680 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
2681 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
2682 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
2683 } else {
2684 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
2685 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
2686 }
2687 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
2688 inputs[attrib][3] =
2689 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
2690 continue;
2691 } else if (semantic_name == TGSI_SEMANTIC_LAYER) { // gl_Layer
2692 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_renderTargetArrayIndex});
2693 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vRenderTargetArrayIndex");
2694 inputs[attrib][0] = wrap(ff);
2695 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2696 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2697 inputs[attrib][3] = wrap(VIMMED1(0.0f));
2698 continue;
2699 } else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { // gl_ViewportIndex
2700 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_viewportIndex});
2701 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vViewportIndex");
2702 inputs[attrib][0] = wrap(ff);
2703 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2704 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2705 inputs[attrib][3] = wrap(VIMMED1(0.0f));
2706 continue;
2707 }
2708 unsigned linkedAttrib =
2709 locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1;
2710
2711 uint32_t extraAttribs = 0;
2712 if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) {
2713 /* non-gs generated primID - need to grab from swizzleMap override */
2714 linkedAttrib = pPrevShader->num_outputs - 1;
2715 swr_fs->constantMask |= 1 << linkedAttrib;
2716 extraAttribs++;
2717 } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
2718 key.sprite_coord_enable & (1 << semantic_idx)) {
2719 /* we add an extra attrib to the backendState in swr_update_derived. */
2720 linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1;
2721 swr_fs->pointSpriteMask |= (1 << linkedAttrib);
2722 extraAttribs++;
2723 } else if (linkedAttrib + 1 == 0xFFFFFFFF) {
2724 inputs[attrib][0] = wrap(VIMMED1(0.0f));
2725 inputs[attrib][1] = wrap(VIMMED1(0.0f));
2726 inputs[attrib][2] = wrap(VIMMED1(0.0f));
2727 inputs[attrib][3] = wrap(VIMMED1(1.0f));
2728 /* If we're reading in color and 2-sided lighting is enabled, we have
2729 * to keep going.
2730 */
2731 if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
2732 continue;
2733 } else {
2734 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
2735 swr_fs->constantMask |= 1 << linkedAttrib;
2736 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
2737 swr_fs->flatConstantMask |= 1 << linkedAttrib;
2738 }
2739 }
2740
2741 unsigned bcolorAttrib = 0xFFFFFFFF;
2742 Value *offset = NULL;
2743 if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
2744 bcolorAttrib = locate_linkage(
2745 TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader);
2746 /* Neither front nor back colors were available. Nothing to load. */
2747 if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
2748 continue;
2749 /* If there is no front color, just always use the back color. */
2750 if (linkedAttrib + 1 == 0xFFFFFFFF)
2751 linkedAttrib = bcolorAttrib;
2752
2753 if (bcolorAttrib != 0xFFFFFFFF) {
2754 bcolorAttrib -= 1;
2755 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
2756 swr_fs->constantMask |= 1 << bcolorAttrib;
2757 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
2758 swr_fs->flatConstantMask |= 1 << bcolorAttrib;
2759 }
2760
2761 unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
2762
2763 if (diff) {
2764 Value *back =
2765 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
2766
2767 offset = MUL(back, C(diff));
2768 offset->setName("offset");
2769 }
2770 }
2771 }
2772
2773 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2774 if (mask & (1 << channel)) {
2775 Value *indexA = C(linkedAttrib * 12 + channel);
2776 Value *indexB = C(linkedAttrib * 12 + channel + 4);
2777 Value *indexC = C(linkedAttrib * 12 + channel + 8);
2778
2779 if (offset) {
2780 indexA = ADD(indexA, offset);
2781 indexB = ADD(indexB, offset);
2782 indexC = ADD(indexC, offset);
2783 }
2784
2785 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
2786 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
2787 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
2788
2789 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
2790 inputs[attrib][channel] = wrap(va);
2791 } else {
2792 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
2793
2794 vc = FMUL(vk, vc);
2795
2796 Value *interp = FMUL(va, vi);
2797 Value *interp1 = FMUL(vb, vj);
2798 interp = FADD(interp, interp1);
2799 interp = FADD(interp, vc);
2800 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
2801 interpMode == TGSI_INTERPOLATE_COLOR)
2802 interp = FMUL(interp, vw);
2803 inputs[attrib][channel] = wrap(interp);
2804 }
2805 }
2806 }
2807 }
2808
2809 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
2810 assert(sampler != nullptr);
2811
2812 struct lp_bld_tgsi_system_values system_values;
2813 memset(&system_values, 0, sizeof(system_values));
2814
2815 struct lp_build_mask_context mask;
2816 bool uses_mask = false;
2817
2818 if (swr_fs->info.base.uses_kill ||
2819 key.poly_stipple_enable) {
2820 Value *vActiveMask = NULL;
2821 if (swr_fs->info.base.uses_kill) {
2822 vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
2823 }
2824 if (key.poly_stipple_enable) {
2825 // first get fragment xy coords and clip to stipple bounds
2826 Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL});
2827 Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL});
2828 Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty);
2829 Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty);
2830
2831 // stipple pattern is 32x32, which means that one line of stipple
2832 // is stored in one word:
2833 // vXstipple is bit offset inside 32-bit stipple word
2834 // vYstipple is word index is stipple array
2835 Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1)
2836 Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1)
2837
2838 // grab stipple pattern base address
2839 Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0});
2840 stipplePtr = BITCAST(stipplePtr, mInt8PtrTy);
2841
2842 // peform a gather to grab stipple words for each lane
2843 Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple,
2844 VIMMED1(0xffffffff), 4);
2845
2846 // create a mask with one bit corresponding to the x stipple
2847 // and AND it with the pattern, to see if we have a bit
2848 Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple);
2849 Value *vStippleMask = AND(vStipple, vBitMask);
2850 vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0));
2851 vStippleMask = VMASK(vStippleMask);
2852
2853 if (swr_fs->info.base.uses_kill) {
2854 vActiveMask = AND(vActiveMask, vStippleMask);
2855 } else {
2856 vActiveMask = vStippleMask;
2857 }
2858 }
2859 lp_build_mask_begin(
2860 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask));
2861 uses_mask = true;
2862 }
2863
2864 struct lp_build_tgsi_params params;
2865 memset(&params, 0, sizeof(params));
2866 params.type = lp_type_float_vec(32, 32 * 8);
2867 params.mask = uses_mask ? &mask : NULL;
2868 params.consts_ptr = wrap(consts_ptr);
2869 params.const_sizes_ptr = wrap(const_sizes_ptr);
2870 params.system_values = &system_values;
2871 params.inputs = inputs;
2872 params.context_ptr = wrap(hPrivateData);
2873 params.sampler = sampler;
2874 params.info = &swr_fs->info.base;
2875
2876 lp_build_tgsi_soa(gallivm,
2877 swr_fs->pipe.tokens,
2878 &params,
2879 outputs);
2880
2881 sampler->destroy(sampler);
2882
2883 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2884
2885 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
2886 attrib++) {
2887 switch (swr_fs->info.base.output_semantic_name[attrib]) {
2888 case TGSI_SEMANTIC_POSITION: {
2889 // write z
2890 LLVMValueRef outZ =
2891 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
2892 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
2893 break;
2894 }
2895 case TGSI_SEMANTIC_COLOR: {
2896 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
2897 if (!outputs[attrib][channel])
2898 continue;
2899
2900 LLVMValueRef out =
2901 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
2902 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
2903 swr_fs->info.base.output_semantic_index[attrib] == 0) {
2904 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
2905 STORE(unwrap(out),
2906 pPS,
2907 {0, SWR_PS_CONTEXT_shaded, rt, channel});
2908 }
2909 } else {
2910 STORE(unwrap(out),
2911 pPS,
2912 {0,
2913 SWR_PS_CONTEXT_shaded,
2914 swr_fs->info.base.output_semantic_index[attrib],
2915 channel});
2916 }
2917 }
2918 break;
2919 }
2920 default: {
2921 fprintf(stderr,
2922 "unknown output from FS %s[%d]\n",
2923 tgsi_semantic_names[swr_fs->info.base
2924 .output_semantic_name[attrib]],
2925 swr_fs->info.base.output_semantic_index[attrib]);
2926 break;
2927 }
2928 }
2929 }
2930
2931 LLVMValueRef mask_result = 0;
2932 if (uses_mask) {
2933 mask_result = lp_build_mask_end(&mask);
2934 }
2935
2936 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
2937
2938 if (uses_mask) {
2939 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
2940 }
2941
2942 RET_VOID();
2943
2944 gallivm_verify_function(gallivm, wrap(pFunction));
2945
2946 gallivm_compile_module(gallivm);
2947
2948 // after the gallivm passes, we have to lower the core's intrinsics
2949 llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule);
2950 lowerPass.add(createLowerX86Pass(this));
2951 lowerPass.run(*pFunction);
2952
2953 PFN_PIXEL_KERNEL kernel =
2954 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
2955 debug_printf("frag shader %p\n", kernel);
2956 assert(kernel && "Error: FragShader = NULL");
2957
2958 JM()->mIsModuleFinalized = true;
2959
2960 return kernel;
2961 }
2962
2963 PFN_PIXEL_KERNEL
2964 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
2965 {
2966 if (!ctx->fs->pipe.tokens)
2967 return NULL;
2968
2969 BuilderSWR builder(
2970 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
2971 "FS");
2972 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
2973
2974 ctx->fs->map.insert(std::make_pair(key, std::unique_ptr<VariantFS>(new VariantFS(builder.gallivm, func))));
2975 return func;
2976 }