swr: Limit DEBUG workaround to LLVM < 7
[mesa.git] / src / gallium / drivers / swr / swr_shader.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 #include <llvm/Config/llvm-config.h>
25
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
29 #undef DEBUG
30 #endif
31
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
36
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
39 #endif
40
41 #include "state.h"
42 #include "gen_state_llvm.h"
43 #include "builder.h"
44 #include "functionpasses/passes.h"
45
46 #include "tgsi/tgsi_strings.h"
47 #include "util/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53
54 #include "swr_context.h"
55 #include "gen_surf_state_llvm.h"
56 #include "gen_swr_context_llvm.h"
57 #include "swr_resource.h"
58 #include "swr_state.h"
59 #include "swr_screen.h"
60
61 using namespace SwrJit;
62 using namespace llvm;
63
64 static unsigned
65 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
66
67 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
68 {
69 return !memcmp(&lhs, &rhs, sizeof(lhs));
70 }
71
72 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
73 {
74 return !memcmp(&lhs, &rhs, sizeof(lhs));
75 }
76
77 bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs)
78 {
79 return !memcmp(&lhs, &rhs, sizeof(lhs));
80 }
81
82 bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
83 {
84 return !memcmp(&lhs, &rhs, sizeof(lhs));
85 }
86
87 static void
88 swr_generate_sampler_key(const struct lp_tgsi_info &info,
89 struct swr_context *ctx,
90 enum pipe_shader_type shader_type,
91 struct swr_jit_sampler_key &key)
92 {
93 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
94
95 for (unsigned i = 0; i < key.nr_samplers; i++) {
96 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
97 lp_sampler_static_sampler_state(
98 &key.sampler[i].sampler_state,
99 ctx->samplers[shader_type][i]);
100 }
101 }
102
103 /*
104 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
105 * are dx10-style? Can't really have mixed opcodes, at least not
106 * if we want to skip the holes here (without rescanning tgsi).
107 */
108 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
109 key.nr_sampler_views =
110 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
111 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
112 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
113 const struct pipe_sampler_view *view =
114 ctx->sampler_views[shader_type][i];
115 lp_sampler_static_texture_state(
116 &key.sampler[i].texture_state, view);
117 if (view) {
118 struct swr_resource *swr_res = swr_resource(view->texture);
119 const struct util_format_description *desc =
120 util_format_description(view->format);
121 if (swr_res->has_depth && swr_res->has_stencil &&
122 !util_format_has_depth(desc))
123 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
124 }
125 }
126 }
127 } else {
128 key.nr_sampler_views = key.nr_samplers;
129 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
130 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
131 const struct pipe_sampler_view *view =
132 ctx->sampler_views[shader_type][i];
133 lp_sampler_static_texture_state(
134 &key.sampler[i].texture_state, view);
135 if (view) {
136 struct swr_resource *swr_res = swr_resource(view->texture);
137 const struct util_format_description *desc =
138 util_format_description(view->format);
139 if (swr_res->has_depth && swr_res->has_stencil &&
140 !util_format_has_depth(desc))
141 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
142 }
143 }
144 }
145 }
146 }
147
148 void
149 swr_generate_fs_key(struct swr_jit_fs_key &key,
150 struct swr_context *ctx,
151 swr_fragment_shader *swr_fs)
152 {
153 memset(&key, 0, sizeof(key));
154
155 key.nr_cbufs = ctx->framebuffer.nr_cbufs;
156 key.light_twoside = ctx->rasterizer->light_twoside;
157 key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
158
159 struct tgsi_shader_info *pPrevShader;
160 if (ctx->gs)
161 pPrevShader = &ctx->gs->info.base;
162 else
163 pPrevShader = &ctx->vs->info.base;
164
165 memcpy(&key.vs_output_semantic_name,
166 &pPrevShader->output_semantic_name,
167 sizeof(key.vs_output_semantic_name));
168 memcpy(&key.vs_output_semantic_idx,
169 &pPrevShader->output_semantic_index,
170 sizeof(key.vs_output_semantic_idx));
171
172 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
173
174 key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable &&
175 ctx->poly_stipple.prim_is_poly;
176 }
177
178 void
179 swr_generate_vs_key(struct swr_jit_vs_key &key,
180 struct swr_context *ctx,
181 swr_vertex_shader *swr_vs)
182 {
183 memset(&key, 0, sizeof(key));
184
185 key.clip_plane_mask =
186 swr_vs->info.base.clipdist_writemask ?
187 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
188 ctx->rasterizer->clip_plane_enable;
189
190 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
191 }
192
193 void
194 swr_generate_fetch_key(struct swr_jit_fetch_key &key,
195 struct swr_vertex_element_state *velems)
196 {
197 memset(&key, 0, sizeof(key));
198
199 key.fsState = velems->fsState;
200 }
201
202 void
203 swr_generate_gs_key(struct swr_jit_gs_key &key,
204 struct swr_context *ctx,
205 swr_geometry_shader *swr_gs)
206 {
207 memset(&key, 0, sizeof(key));
208
209 struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
210
211 memcpy(&key.vs_output_semantic_name,
212 &pPrevShader->output_semantic_name,
213 sizeof(key.vs_output_semantic_name));
214 memcpy(&key.vs_output_semantic_idx,
215 &pPrevShader->output_semantic_index,
216 sizeof(key.vs_output_semantic_idx));
217
218 swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
219 }
220
221 struct BuilderSWR : public Builder {
222 BuilderSWR(JitManager *pJitMgr, const char *pName)
223 : Builder(pJitMgr)
224 {
225 pJitMgr->SetupNewModule();
226 gallivm = gallivm_create(pName, wrap(&JM()->mContext));
227 pJitMgr->mpCurrentModule = unwrap(gallivm->module);
228 }
229
230 ~BuilderSWR() {
231 gallivm_free_ir(gallivm);
232 }
233
234 void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
235 unsigned slot, unsigned channel);
236
237 struct gallivm_state *gallivm;
238 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
239 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
240 PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
241
242 LLVMValueRef
243 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
244 struct lp_build_tgsi_context * bld_base,
245 boolean is_vindex_indirect,
246 LLVMValueRef vertex_index,
247 boolean is_aindex_indirect,
248 LLVMValueRef attrib_index,
249 LLVMValueRef swizzle_index);
250 void
251 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
252 struct lp_build_tgsi_context * bld_base,
253 LLVMValueRef (*outputs)[4],
254 LLVMValueRef emitted_vertices_vec);
255
256 void
257 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
258 struct lp_build_tgsi_context * bld_base,
259 LLVMValueRef verts_per_prim_vec,
260 LLVMValueRef emitted_prims_vec);
261
262 void
263 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
264 struct lp_build_tgsi_context * bld_base,
265 LLVMValueRef total_emitted_vertices_vec,
266 LLVMValueRef emitted_prims_vec);
267
268 };
269
270 struct swr_gs_llvm_iface {
271 struct lp_build_tgsi_gs_iface base;
272 struct tgsi_shader_info *info;
273
274 BuilderSWR *pBuilder;
275
276 Value *pGsCtx;
277 SWR_GS_STATE *pGsState;
278 uint32_t num_outputs;
279 uint32_t num_verts_per_prim;
280
281 Value *pVtxAttribMap;
282 };
283
284 // trampoline functions so we can use the builder llvm construction methods
285 static LLVMValueRef
286 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
287 struct lp_build_tgsi_context * bld_base,
288 boolean is_vindex_indirect,
289 LLVMValueRef vertex_index,
290 boolean is_aindex_indirect,
291 LLVMValueRef attrib_index,
292 LLVMValueRef swizzle_index)
293 {
294 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
295
296 return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld_base,
297 is_vindex_indirect,
298 vertex_index,
299 is_aindex_indirect,
300 attrib_index,
301 swizzle_index);
302 }
303
304 static void
305 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
306 struct lp_build_tgsi_context * bld_base,
307 LLVMValueRef (*outputs)[4],
308 LLVMValueRef emitted_vertices_vec)
309 {
310 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
311
312 iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld_base,
313 outputs,
314 emitted_vertices_vec);
315 }
316
317 static void
318 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
319 struct lp_build_tgsi_context * bld_base,
320 LLVMValueRef verts_per_prim_vec,
321 LLVMValueRef emitted_prims_vec)
322 {
323 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
324
325 iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld_base,
326 verts_per_prim_vec,
327 emitted_prims_vec);
328 }
329
330 static void
331 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
332 struct lp_build_tgsi_context * bld_base,
333 LLVMValueRef total_emitted_vertices_vec,
334 LLVMValueRef emitted_prims_vec)
335 {
336 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
337
338 iface->pBuilder->swr_gs_llvm_epilogue(gs_base, bld_base,
339 total_emitted_vertices_vec,
340 emitted_prims_vec);
341 }
342
343 LLVMValueRef
344 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
345 struct lp_build_tgsi_context * bld_base,
346 boolean is_vindex_indirect,
347 LLVMValueRef vertex_index,
348 boolean is_aindex_indirect,
349 LLVMValueRef attrib_index,
350 LLVMValueRef swizzle_index)
351 {
352 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
353 Value *vert_index = unwrap(vertex_index);
354 Value *attr_index = unwrap(attrib_index);
355
356 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
357
358 if (is_vindex_indirect || is_aindex_indirect) {
359 int i;
360 Value *res = unwrap(bld_base->base.zero);
361 struct lp_type type = bld_base->base.type;
362
363 for (i = 0; i < type.length; i++) {
364 Value *vert_chan_index = vert_index;
365 Value *attr_chan_index = attr_index;
366
367 if (is_vindex_indirect) {
368 vert_chan_index = VEXTRACT(vert_index, C(i));
369 }
370 if (is_aindex_indirect) {
371 attr_chan_index = VEXTRACT(attr_index, C(i));
372 }
373
374 Value *attrib =
375 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
376
377 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
378 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
379
380 Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib);
381 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
382
383 Value *value = VEXTRACT(pInput, C(i));
384 res = VINSERT(res, value, C(i));
385 }
386
387 return wrap(res);
388 } else {
389 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
390
391 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
392 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
393
394 Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib);
395
396 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
397
398 return wrap(pInput);
399 }
400 }
401
402 // GS output stream layout
403 #define VERTEX_COUNT_SIZE 32
404 #define CONTROL_HEADER_SIZE (8*32)
405
406 void
407 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
408 struct lp_build_tgsi_context * bld_base,
409 LLVMValueRef (*outputs)[4],
410 LLVMValueRef emitted_vertices_vec)
411 {
412 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
413
414 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
415
416 const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
417 const uint32_t attribSize = 4 * sizeof(float);
418 const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
419 Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize));
420
421 Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask});
422 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, mVWidth));
423
424 Value *pStack = STACKSAVE();
425 Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking
426
427 for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
428 uint32_t attribSlot = attrib;
429 uint32_t sgvChannel = 0;
430 if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
431 attribSlot = VERTEX_SGV_SLOT;
432 sgvChannel = VERTEX_SGV_POINT_SIZE_COMP;
433 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) {
434 attribSlot = VERTEX_SGV_SLOT;
435 sgvChannel = VERTEX_SGV_RTAI_COMP;
436 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_VIEWPORT_INDEX) {
437 attribSlot = VERTEX_SGV_SLOT;
438 sgvChannel = VERTEX_SGV_VAI_COMP;
439 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
440 attribSlot = VERTEX_POSITION_SLOT;
441 } else {
442 attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
443 if (iface->info->writes_position) {
444 attribSlot--;
445 }
446 }
447
448 Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ?
449
450 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
451 Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane));
452 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
453 Value *pStreamOffset = GEP(pStream, pLaneOffset);
454 pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy);
455
456 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
457 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
458
459 for (uint32_t channel = 0; channel < 4; ++channel) {
460 Value *vData;
461
462 if (attribSlot == VERTEX_SGV_SLOT)
463 vData = LOAD(unwrap(outputs[attrib][0]));
464 else
465 vData = LOAD(unwrap(outputs[attrib][channel]));
466
467 if (attribSlot != VERTEX_SGV_SLOT ||
468 sgvChannel == channel) {
469 vData = VEXTRACT(vData, C(lane));
470 STORE(vData, pStreamOffset);
471 }
472 pStreamOffset = GEP(pStreamOffset, C(1));
473 }
474 }
475 }
476
477 STACKRESTORE(pStack);
478 }
479
480 void
481 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
482 struct lp_build_tgsi_context * bld_base,
483 LLVMValueRef verts_per_prim_vec,
484 LLVMValueRef emitted_prims_vec)
485 {
486 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
487
488 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
489
490 Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
491 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8));
492
493 uint32_t vertsPerPrim = iface->num_verts_per_prim;
494
495 Value *vCount =
496 ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)),
497 unwrap(verts_per_prim_vec));
498
499 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
500 vCount = LOAD(unwrap(bld->total_emitted_vertices_vec_ptr));
501
502 struct lp_exec_mask *exec_mask = &bld->exec_mask;
503 Value *mask = unwrap(lp_build_mask_value(bld->mask));
504 if (exec_mask->has_mask)
505 mask = AND(mask, unwrap(exec_mask->exec_mask));
506
507 Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
508 mask = AND(mask, cmpMask);
509 vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8));
510
511 vCount = SUB(vCount, VIMMED1(1));
512 Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE));
513 Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
514
515 vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8));
516
517 Value *pStack = STACKSAVE();
518 Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking
519
520 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
521 Value *vLaneOffset = VEXTRACT(vOffset, C(lane));
522 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
523 Value *pStreamOffset = GEP(pStream, vLaneOffset);
524
525 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
526 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
527
528 Value *vVal = LOAD(pStreamOffset);
529 vVal = OR(vVal, VEXTRACT(vValue, C(lane)));
530 STORE(vVal, pStreamOffset);
531 }
532
533 STACKRESTORE(pStack);
534 }
535
536 void
537 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
538 struct lp_build_tgsi_context * bld_base,
539 LLVMValueRef total_emitted_vertices_vec,
540 LLVMValueRef emitted_prims_vec)
541 {
542 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
543
544 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
545
546 // Store emit count to each output stream in the first DWORD
547 for (uint32_t lane = 0; lane < mVWidth; ++lane)
548 {
549 Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
550 pStream = BITCAST(pStream, mInt32PtrTy);
551 Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane));
552 STORE(pLaneCount, pStream);
553 }
554 }
555
556 PFN_GS_FUNC
557 BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
558 {
559 SWR_GS_STATE *pGS = &ctx->gs->gsState;
560 struct tgsi_shader_info *info = &ctx->gs->info.base;
561
562 memset(pGS, 0, sizeof(*pGS));
563
564 pGS->gsEnable = true;
565
566 pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
567 pGS->outputTopology =
568 swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
569 pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
570 pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
571
572 // XXX: single stream for now...
573 pGS->isSingleStream = true;
574 pGS->singleStreamID = 0;
575
576 pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
577 pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
578 pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
579 pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
580 pGS->controlDataOffset = VERTEX_COUNT_SIZE;
581 pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
582
583 pGS->allocationSize =
584 VERTEX_COUNT_SIZE + // vertex count
585 CONTROL_HEADER_SIZE + // control header
586 (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
587 pGS->maxNumVerts; // num verts
588
589 struct swr_geometry_shader *gs = ctx->gs;
590
591 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
592 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
593
594 memset(outputs, 0, sizeof(outputs));
595
596 AttrBuilder attrBuilder;
597 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
598
599 std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
600 PointerType::get(mInt8Ty, 0),
601 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
602 FunctionType *vsFuncType =
603 FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
604
605 // create new vertex shader function
606 auto pFunction = Function::Create(vsFuncType,
607 GlobalValue::ExternalLinkage,
608 "GS",
609 JM()->mpCurrentModule);
610 #if LLVM_VERSION_MAJOR < 5
611 AttributeSet attrSet = AttributeSet::get(
612 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
613 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
614 #else
615 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
616 #endif
617
618 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
619 IRB()->SetInsertPoint(block);
620 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
621
622 auto argitr = pFunction->arg_begin();
623 Value *hPrivateData = &*argitr++;
624 hPrivateData->setName("hPrivateData");
625 Value *pWorkerData = &*argitr++;
626 pWorkerData->setName("pWorkerData");
627 Value *pGsCtx = &*argitr++;
628 pGsCtx->setName("gsCtx");
629
630 Value *consts_ptr =
631 GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
632 consts_ptr->setName("gs_constants");
633 Value *const_sizes_ptr =
634 GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
635 const_sizes_ptr->setName("num_gs_constants");
636
637 struct lp_build_sampler_soa *sampler =
638 swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
639
640 struct lp_bld_tgsi_system_values system_values;
641 memset(&system_values, 0, sizeof(system_values));
642 system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
643 system_values.instance_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
644
645 std::vector<Constant*> mapConstants;
646 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
647 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
648 ubyte semantic_name = info->input_semantic_name[slot];
649 ubyte semantic_idx = info->input_semantic_index[slot];
650
651 unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
652
653 vs_slot += VERTEX_ATTRIB_START_SLOT;
654
655 if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
656 vs_slot--;
657
658 if (semantic_name == TGSI_SEMANTIC_POSITION)
659 vs_slot = VERTEX_POSITION_SLOT;
660
661 STORE(C(vs_slot), vtxAttribMap, {0, slot});
662 mapConstants.push_back(C(vs_slot));
663 }
664
665 struct lp_build_mask_context mask;
666 Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
667 lp_build_mask_begin(&mask, gallivm,
668 lp_type_float_vec(32, 32 * 8), wrap(mask_val));
669
670 // zero out cut buffer so we can load/modify/store bits
671 for (uint32_t lane = 0; lane < mVWidth; ++lane)
672 {
673 Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
674 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
675 }
676
677 struct swr_gs_llvm_iface gs_iface;
678 gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
679 gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
680 gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
681 gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
682 gs_iface.pBuilder = this;
683 gs_iface.pGsCtx = pGsCtx;
684 gs_iface.pGsState = pGS;
685 gs_iface.num_outputs = gs->info.base.num_outputs;
686 gs_iface.num_verts_per_prim =
687 u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
688 gs_iface.info = info;
689 gs_iface.pVtxAttribMap = vtxAttribMap;
690
691 struct lp_build_tgsi_params params;
692 memset(&params, 0, sizeof(params));
693 params.type = lp_type_float_vec(32, 32 * 8);
694 params.mask = & mask;
695 params.consts_ptr = wrap(consts_ptr);
696 params.const_sizes_ptr = wrap(const_sizes_ptr);
697 params.system_values = &system_values;
698 params.inputs = inputs;
699 params.context_ptr = wrap(hPrivateData);
700 params.sampler = sampler;
701 params.info = &gs->info.base;
702 params.gs_iface = &gs_iface.base;
703
704 lp_build_tgsi_soa(gallivm,
705 gs->pipe.tokens,
706 &params,
707 outputs);
708
709 lp_build_mask_end(&mask);
710
711 sampler->destroy(sampler);
712
713 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
714
715 RET_VOID();
716
717 gallivm_verify_function(gallivm, wrap(pFunction));
718 gallivm_compile_module(gallivm);
719
720 PFN_GS_FUNC pFunc =
721 (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
722
723 debug_printf("geom shader %p\n", pFunc);
724 assert(pFunc && "Error: GeomShader = NULL");
725
726 JM()->mIsModuleFinalized = true;
727
728 return pFunc;
729 }
730
731 PFN_GS_FUNC
732 swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
733 {
734 BuilderSWR builder(
735 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
736 "GS");
737 PFN_GS_FUNC func = builder.CompileGS(ctx, key);
738
739 ctx->gs->map.insert(std::make_pair(key, std::make_unique<VariantGS>(builder.gallivm, func)));
740 return func;
741 }
742
743 void
744 BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
745 {
746 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
747 // interleave the simdvertex components into the dest simd16vertex
748 // slot16offset = slot8offset * 2
749 // comp16offset = comp8offset * 2 + alternateOffset
750
751 Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
752 Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
753 STORE(pVal, pOut, {channel * 2});
754 #else
755 Value *pOut = GEP(pVtxOutput, {0, 0, slot});
756 STORE(pVal, pOut, {0, channel});
757 #endif
758 }
759
760 PFN_VERTEX_FUNC
761 BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
762 {
763 struct swr_vertex_shader *swr_vs = ctx->vs;
764
765 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
766 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
767
768 memset(outputs, 0, sizeof(outputs));
769
770 AttrBuilder attrBuilder;
771 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
772
773 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
774 PointerType::get(mInt8Ty, 0),
775 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
776 FunctionType *vsFuncType =
777 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
778
779 // create new vertex shader function
780 auto pFunction = Function::Create(vsFuncType,
781 GlobalValue::ExternalLinkage,
782 "VS",
783 JM()->mpCurrentModule);
784 #if LLVM_VERSION_MAJOR < 5
785 AttributeSet attrSet = AttributeSet::get(
786 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
787 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
788 #else
789 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
790 #endif
791
792 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
793 IRB()->SetInsertPoint(block);
794 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
795
796 auto argitr = pFunction->arg_begin();
797 Value *hPrivateData = &*argitr++;
798 hPrivateData->setName("hPrivateData");
799 Value *pWorkerData = &*argitr++;
800 pWorkerData->setName("pWorkerData");
801 Value *pVsCtx = &*argitr++;
802 pVsCtx->setName("vsCtx");
803
804 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
805
806 consts_ptr->setName("vs_constants");
807 Value *const_sizes_ptr =
808 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
809 const_sizes_ptr->setName("num_vs_constants");
810
811 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
812 #if USE_SIMD16_VS
813 vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
814 #endif
815
816 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
817 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
818 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
819 if (mask & (1 << channel)) {
820 inputs[attrib][channel] =
821 wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
822 }
823 }
824 }
825
826 struct lp_build_sampler_soa *sampler =
827 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
828
829 struct lp_bld_tgsi_system_values system_values;
830 memset(&system_values, 0, sizeof(system_values));
831 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
832
833 #if USE_SIMD16_VS
834 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
835 #else
836 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
837 #endif
838
839 #if USE_SIMD16_VS
840 uint32_t vectorWidth = mVWidth16;
841 #else
842 uint32_t vectorWidth = mVWidth;
843 #endif
844
845 struct lp_build_tgsi_params params;
846 memset(&params, 0, sizeof(params));
847 params.type = lp_type_float_vec(32, 32 * vectorWidth);
848 params.consts_ptr = wrap(consts_ptr);
849 params.const_sizes_ptr = wrap(const_sizes_ptr);
850 params.system_values = &system_values;
851 params.inputs = inputs;
852 params.context_ptr = wrap(hPrivateData);
853 params.sampler = sampler;
854 params.info = &swr_vs->info.base;
855
856 lp_build_tgsi_soa(gallivm,
857 swr_vs->pipe.tokens,
858 &params,
859 outputs);
860
861 sampler->destroy(sampler);
862
863 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
864
865 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
866 #if USE_SIMD16_VS
867 vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
868 #endif
869
870 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
871 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
872 if (!outputs[attrib][channel])
873 continue;
874
875 Value *val;
876 uint32_t outSlot;
877
878 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
879 if (channel != VERTEX_SGV_POINT_SIZE_COMP)
880 continue;
881 val = LOAD(unwrap(outputs[attrib][0]));
882 outSlot = VERTEX_SGV_SLOT;
883 } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
884 val = LOAD(unwrap(outputs[attrib][channel]));
885 outSlot = VERTEX_POSITION_SLOT;
886 } else {
887 val = LOAD(unwrap(outputs[attrib][channel]));
888 outSlot = VERTEX_ATTRIB_START_SLOT + attrib;
889 if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
890 outSlot--;
891 }
892
893 WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
894 }
895 }
896
897 if (ctx->rasterizer->clip_plane_enable ||
898 swr_vs->info.base.culldist_writemask) {
899 unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
900
901 unsigned cv = 0;
902 if (swr_vs->info.base.writes_clipvertex) {
903 cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
904 &swr_vs->info.base);
905 } else {
906 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
907 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
908 swr_vs->info.base.output_semantic_index[i] == 0) {
909 cv = i;
910 break;
911 }
912 }
913 }
914 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
915 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
916 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
917 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
918
919 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
920 // clip distance overrides user clip planes
921 if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
922 ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
923 unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
924 &swr_vs->info.base);
925 if (val < 4) {
926 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
927 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
928 } else {
929 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
930 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
931 }
932 continue;
933 }
934
935 if (!(clip_mask & (1 << val)))
936 continue;
937
938 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
939 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
940 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
941 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
942 #if USE_SIMD16_VS
943 Value *bpx = VBROADCAST_16(px);
944 Value *bpy = VBROADCAST_16(py);
945 Value *bpz = VBROADCAST_16(pz);
946 Value *bpw = VBROADCAST_16(pw);
947 #else
948 Value *bpx = VBROADCAST(px);
949 Value *bpy = VBROADCAST(py);
950 Value *bpz = VBROADCAST(pz);
951 Value *bpw = VBROADCAST(pw);
952 #endif
953 Value *dist = FADD(FMUL(unwrap(cx), bpx),
954 FADD(FMUL(unwrap(cy), bpy),
955 FADD(FMUL(unwrap(cz), bpz),
956 FMUL(unwrap(cw), bpw))));
957
958 if (val < 4)
959 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
960 else
961 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
962 }
963 }
964
965 RET_VOID();
966
967 gallivm_verify_function(gallivm, wrap(pFunction));
968 gallivm_compile_module(gallivm);
969
970 // lp_debug_dump_value(func);
971
972 PFN_VERTEX_FUNC pFunc =
973 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
974
975 debug_printf("vert shader %p\n", pFunc);
976 assert(pFunc && "Error: VertShader = NULL");
977
978 JM()->mIsModuleFinalized = true;
979
980 return pFunc;
981 }
982
983 PFN_VERTEX_FUNC
984 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
985 {
986 if (!ctx->vs->pipe.tokens)
987 return NULL;
988
989 BuilderSWR builder(
990 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
991 "VS");
992 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
993
994 ctx->vs->map.insert(std::make_pair(key, std::make_unique<VariantVS>(builder.gallivm, func)));
995 return func;
996 }
997
998 unsigned
999 swr_so_adjust_attrib(unsigned in_attrib,
1000 swr_vertex_shader *swr_vs)
1001 {
1002 ubyte semantic_name;
1003 unsigned attrib;
1004
1005 attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
1006
1007 if (swr_vs) {
1008 semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
1009 if (semantic_name == TGSI_SEMANTIC_POSITION) {
1010 attrib = VERTEX_POSITION_SLOT;
1011 } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
1012 attrib = VERTEX_SGV_SLOT;
1013 } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
1014 attrib = VERTEX_SGV_SLOT;
1015 } else {
1016 if (swr_vs->info.base.writes_position) {
1017 attrib--;
1018 }
1019 }
1020 }
1021
1022 return attrib;
1023 }
1024
1025 static unsigned
1026 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
1027 {
1028 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1029 if ((info->output_semantic_name[i] == name)
1030 && (info->output_semantic_index[i] == index)) {
1031 return i;
1032 }
1033 }
1034
1035 return 0xFFFFFFFF;
1036 }
1037
1038 PFN_PIXEL_KERNEL
1039 BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
1040 {
1041 struct swr_fragment_shader *swr_fs = ctx->fs;
1042
1043 struct tgsi_shader_info *pPrevShader;
1044 if (ctx->gs)
1045 pPrevShader = &ctx->gs->info.base;
1046 else
1047 pPrevShader = &ctx->vs->info.base;
1048
1049 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1050 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1051
1052 memset(inputs, 0, sizeof(inputs));
1053 memset(outputs, 0, sizeof(outputs));
1054
1055 struct lp_build_sampler_soa *sampler = NULL;
1056
1057 AttrBuilder attrBuilder;
1058 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1059
1060 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
1061 PointerType::get(mInt8Ty, 0),
1062 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
1063 FunctionType *funcType =
1064 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
1065
1066 auto pFunction = Function::Create(funcType,
1067 GlobalValue::ExternalLinkage,
1068 "FS",
1069 JM()->mpCurrentModule);
1070 #if LLVM_VERSION_MAJOR < 5
1071 AttributeSet attrSet = AttributeSet::get(
1072 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
1073 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
1074 #else
1075 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
1076 #endif
1077
1078 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
1079 IRB()->SetInsertPoint(block);
1080 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
1081
1082 auto args = pFunction->arg_begin();
1083 Value *hPrivateData = &*args++;
1084 hPrivateData->setName("hPrivateData");
1085 Value *pWorkerData = &*args++;
1086 pWorkerData->setName("pWorkerData");
1087 Value *pPS = &*args++;
1088 pPS->setName("psCtx");
1089
1090 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
1091 consts_ptr->setName("fs_constants");
1092 Value *const_sizes_ptr =
1093 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
1094 const_sizes_ptr->setName("num_fs_constants");
1095
1096 // load *pAttribs, *pPerspAttribs
1097 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
1098 Value *pPerspAttribs =
1099 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
1100
1101 swr_fs->constantMask = 0;
1102 swr_fs->flatConstantMask = 0;
1103 swr_fs->pointSpriteMask = 0;
1104
1105 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
1106 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
1107 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
1108 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
1109
1110 if (!mask)
1111 continue;
1112
1113 // load i,j
1114 Value *vi = nullptr, *vj = nullptr;
1115 switch (interpLoc) {
1116 case TGSI_INTERPOLATE_LOC_CENTER:
1117 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
1118 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
1119 break;
1120 case TGSI_INTERPOLATE_LOC_CENTROID:
1121 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
1122 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
1123 break;
1124 case TGSI_INTERPOLATE_LOC_SAMPLE:
1125 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
1126 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
1127 break;
1128 }
1129
1130 // load/compute w
1131 Value *vw = nullptr, *pAttribs;
1132 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
1133 interpMode == TGSI_INTERPOLATE_COLOR) {
1134 pAttribs = pPerspAttribs;
1135 switch (interpLoc) {
1136 case TGSI_INTERPOLATE_LOC_CENTER:
1137 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
1138 break;
1139 case TGSI_INTERPOLATE_LOC_CENTROID:
1140 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
1141 break;
1142 case TGSI_INTERPOLATE_LOC_SAMPLE:
1143 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
1144 break;
1145 }
1146 } else {
1147 pAttribs = pRawAttribs;
1148 vw = VIMMED1(1.f);
1149 }
1150
1151 vw->setName("w");
1152
1153 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
1154 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
1155
1156 if (semantic_name == TGSI_SEMANTIC_FACE) {
1157 Value *ff =
1158 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
1159 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
1160 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
1161
1162 inputs[attrib][0] = wrap(ff);
1163 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1164 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1165 inputs[attrib][3] = wrap(VIMMED1(1.0f));
1166 continue;
1167 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
1168 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
1169 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
1170 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
1171 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
1172 } else {
1173 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
1174 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
1175 }
1176 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
1177 inputs[attrib][3] =
1178 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
1179 continue;
1180 } else if (semantic_name == TGSI_SEMANTIC_LAYER) { // gl_Layer
1181 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_renderTargetArrayIndex});
1182 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vRenderTargetArrayIndex");
1183 inputs[attrib][0] = wrap(ff);
1184 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1185 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1186 inputs[attrib][3] = wrap(VIMMED1(0.0f));
1187 continue;
1188 } else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { // gl_ViewportIndex
1189 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_viewportIndex});
1190 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vViewportIndex");
1191 inputs[attrib][0] = wrap(ff);
1192 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1193 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1194 inputs[attrib][3] = wrap(VIMMED1(0.0f));
1195 continue;
1196 }
1197 unsigned linkedAttrib =
1198 locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1;
1199
1200 uint32_t extraAttribs = 0;
1201 if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) {
1202 /* non-gs generated primID - need to grab from swizzleMap override */
1203 linkedAttrib = pPrevShader->num_outputs - 1;
1204 swr_fs->constantMask |= 1 << linkedAttrib;
1205 extraAttribs++;
1206 } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
1207 key.sprite_coord_enable & (1 << semantic_idx)) {
1208 /* we add an extra attrib to the backendState in swr_update_derived. */
1209 linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1;
1210 swr_fs->pointSpriteMask |= (1 << linkedAttrib);
1211 extraAttribs++;
1212 } else if (linkedAttrib == 0xFFFFFFFF) {
1213 inputs[attrib][0] = wrap(VIMMED1(0.0f));
1214 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1215 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1216 inputs[attrib][3] = wrap(VIMMED1(1.0f));
1217 /* If we're reading in color and 2-sided lighting is enabled, we have
1218 * to keep going.
1219 */
1220 if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
1221 continue;
1222 } else {
1223 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1224 swr_fs->constantMask |= 1 << linkedAttrib;
1225 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1226 swr_fs->flatConstantMask |= 1 << linkedAttrib;
1227 }
1228 }
1229
1230 unsigned bcolorAttrib = 0xFFFFFFFF;
1231 Value *offset = NULL;
1232 if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
1233 bcolorAttrib = locate_linkage(
1234 TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader) - 1;
1235 /* Neither front nor back colors were available. Nothing to load. */
1236 if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
1237 continue;
1238 /* If there is no front color, just always use the back color. */
1239 if (linkedAttrib == 0xFFFFFFFF)
1240 linkedAttrib = bcolorAttrib;
1241
1242 if (bcolorAttrib != 0xFFFFFFFF) {
1243 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1244 swr_fs->constantMask |= 1 << bcolorAttrib;
1245 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1246 swr_fs->flatConstantMask |= 1 << bcolorAttrib;
1247 }
1248
1249 unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
1250
1251 if (diff) {
1252 Value *back =
1253 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
1254
1255 offset = MUL(back, C(diff));
1256 offset->setName("offset");
1257 }
1258 }
1259 }
1260
1261 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1262 if (mask & (1 << channel)) {
1263 Value *indexA = C(linkedAttrib * 12 + channel);
1264 Value *indexB = C(linkedAttrib * 12 + channel + 4);
1265 Value *indexC = C(linkedAttrib * 12 + channel + 8);
1266
1267 if (offset) {
1268 indexA = ADD(indexA, offset);
1269 indexB = ADD(indexB, offset);
1270 indexC = ADD(indexC, offset);
1271 }
1272
1273 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
1274 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
1275 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
1276
1277 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1278 inputs[attrib][channel] = wrap(va);
1279 } else {
1280 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
1281
1282 vc = FMUL(vk, vc);
1283
1284 Value *interp = FMUL(va, vi);
1285 Value *interp1 = FMUL(vb, vj);
1286 interp = FADD(interp, interp1);
1287 interp = FADD(interp, vc);
1288 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
1289 interpMode == TGSI_INTERPOLATE_COLOR)
1290 interp = FMUL(interp, vw);
1291 inputs[attrib][channel] = wrap(interp);
1292 }
1293 }
1294 }
1295 }
1296
1297 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
1298
1299 struct lp_bld_tgsi_system_values system_values;
1300 memset(&system_values, 0, sizeof(system_values));
1301
1302 struct lp_build_mask_context mask;
1303 bool uses_mask = false;
1304
1305 if (swr_fs->info.base.uses_kill ||
1306 key.poly_stipple_enable) {
1307 Value *vActiveMask = NULL;
1308 if (swr_fs->info.base.uses_kill) {
1309 vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
1310 }
1311 if (key.poly_stipple_enable) {
1312 // first get fragment xy coords and clip to stipple bounds
1313 Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL});
1314 Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL});
1315 Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty);
1316 Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty);
1317
1318 // stipple pattern is 32x32, which means that one line of stipple
1319 // is stored in one word:
1320 // vXstipple is bit offset inside 32-bit stipple word
1321 // vYstipple is word index is stipple array
1322 Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1)
1323 Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1)
1324
1325 // grab stipple pattern base address
1326 Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0});
1327 stipplePtr = BITCAST(stipplePtr, mInt8PtrTy);
1328
1329 // peform a gather to grab stipple words for each lane
1330 Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple,
1331 VIMMED1(0xffffffff), 4);
1332
1333 // create a mask with one bit corresponding to the x stipple
1334 // and AND it with the pattern, to see if we have a bit
1335 Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple);
1336 Value *vStippleMask = AND(vStipple, vBitMask);
1337 vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0));
1338 vStippleMask = VMASK(vStippleMask);
1339
1340 if (swr_fs->info.base.uses_kill) {
1341 vActiveMask = AND(vActiveMask, vStippleMask);
1342 } else {
1343 vActiveMask = vStippleMask;
1344 }
1345 }
1346 lp_build_mask_begin(
1347 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask));
1348 uses_mask = true;
1349 }
1350
1351 struct lp_build_tgsi_params params;
1352 memset(&params, 0, sizeof(params));
1353 params.type = lp_type_float_vec(32, 32 * 8);
1354 params.mask = uses_mask ? &mask : NULL;
1355 params.consts_ptr = wrap(consts_ptr);
1356 params.const_sizes_ptr = wrap(const_sizes_ptr);
1357 params.system_values = &system_values;
1358 params.inputs = inputs;
1359 params.context_ptr = wrap(hPrivateData);
1360 params.sampler = sampler;
1361 params.info = &swr_fs->info.base;
1362
1363 lp_build_tgsi_soa(gallivm,
1364 swr_fs->pipe.tokens,
1365 &params,
1366 outputs);
1367
1368 sampler->destroy(sampler);
1369
1370 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1371
1372 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
1373 attrib++) {
1374 switch (swr_fs->info.base.output_semantic_name[attrib]) {
1375 case TGSI_SEMANTIC_POSITION: {
1376 // write z
1377 LLVMValueRef outZ =
1378 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
1379 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
1380 break;
1381 }
1382 case TGSI_SEMANTIC_COLOR: {
1383 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1384 if (!outputs[attrib][channel])
1385 continue;
1386
1387 LLVMValueRef out =
1388 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
1389 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
1390 swr_fs->info.base.output_semantic_index[attrib] == 0) {
1391 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
1392 STORE(unwrap(out),
1393 pPS,
1394 {0, SWR_PS_CONTEXT_shaded, rt, channel});
1395 }
1396 } else {
1397 STORE(unwrap(out),
1398 pPS,
1399 {0,
1400 SWR_PS_CONTEXT_shaded,
1401 swr_fs->info.base.output_semantic_index[attrib],
1402 channel});
1403 }
1404 }
1405 break;
1406 }
1407 default: {
1408 fprintf(stderr,
1409 "unknown output from FS %s[%d]\n",
1410 tgsi_semantic_names[swr_fs->info.base
1411 .output_semantic_name[attrib]],
1412 swr_fs->info.base.output_semantic_index[attrib]);
1413 break;
1414 }
1415 }
1416 }
1417
1418 LLVMValueRef mask_result = 0;
1419 if (uses_mask) {
1420 mask_result = lp_build_mask_end(&mask);
1421 }
1422
1423 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1424
1425 if (uses_mask) {
1426 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
1427 }
1428
1429 RET_VOID();
1430
1431 gallivm_verify_function(gallivm, wrap(pFunction));
1432
1433 gallivm_compile_module(gallivm);
1434
1435 // after the gallivm passes, we have to lower the core's intrinsics
1436 llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule);
1437 lowerPass.add(createLowerX86Pass(this));
1438 lowerPass.run(*pFunction);
1439
1440 PFN_PIXEL_KERNEL kernel =
1441 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
1442 debug_printf("frag shader %p\n", kernel);
1443 assert(kernel && "Error: FragShader = NULL");
1444
1445 JM()->mIsModuleFinalized = true;
1446
1447 return kernel;
1448 }
1449
1450 PFN_PIXEL_KERNEL
1451 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
1452 {
1453 if (!ctx->fs->pipe.tokens)
1454 return NULL;
1455
1456 BuilderSWR builder(
1457 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
1458 "FS");
1459 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
1460
1461 ctx->fs->map.insert(std::make_pair(key, std::make_unique<VariantFS>(builder.gallivm, func)));
1462 return func;
1463 }