gallium/u_threaded: align batches and call slots to 16 bytes
[mesa.git] / src / gallium / drivers / swr / swr_shader.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
26 #undef DEBUG
27 #include "JitManager.h"
28 #include "llvm-c/Core.h"
29 #include "llvm/Support/CBindingWrapping.h"
30 #pragma pop_macro("DEBUG")
31
32 #include "state.h"
33 #include "gen_state_llvm.h"
34 #include "builder.h"
35
36 #include "tgsi/tgsi_strings.h"
37 #include "util/u_format.h"
38 #include "util/u_prim.h"
39 #include "gallivm/lp_bld_init.h"
40 #include "gallivm/lp_bld_flow.h"
41 #include "gallivm/lp_bld_struct.h"
42 #include "gallivm/lp_bld_tgsi.h"
43
44 #include "swr_context.h"
45 #include "gen_swr_context_llvm.h"
46 #include "swr_resource.h"
47 #include "swr_state.h"
48 #include "swr_screen.h"
49
50 using namespace SwrJit;
51 using namespace llvm;
52
53 static unsigned
54 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
55
56 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
57 {
58 return !memcmp(&lhs, &rhs, sizeof(lhs));
59 }
60
61 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
62 {
63 return !memcmp(&lhs, &rhs, sizeof(lhs));
64 }
65
66 bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs)
67 {
68 return !memcmp(&lhs, &rhs, sizeof(lhs));
69 }
70
71 bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
72 {
73 return !memcmp(&lhs, &rhs, sizeof(lhs));
74 }
75
76 static void
77 swr_generate_sampler_key(const struct lp_tgsi_info &info,
78 struct swr_context *ctx,
79 enum pipe_shader_type shader_type,
80 struct swr_jit_sampler_key &key)
81 {
82 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
83
84 for (unsigned i = 0; i < key.nr_samplers; i++) {
85 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
86 lp_sampler_static_sampler_state(
87 &key.sampler[i].sampler_state,
88 ctx->samplers[shader_type][i]);
89 }
90 }
91
92 /*
93 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
94 * are dx10-style? Can't really have mixed opcodes, at least not
95 * if we want to skip the holes here (without rescanning tgsi).
96 */
97 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
98 key.nr_sampler_views =
99 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
100 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
101 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) {
102 const struct pipe_sampler_view *view =
103 ctx->sampler_views[shader_type][i];
104 lp_sampler_static_texture_state(
105 &key.sampler[i].texture_state, view);
106 if (view) {
107 struct swr_resource *swr_res = swr_resource(view->texture);
108 const struct util_format_description *desc =
109 util_format_description(view->format);
110 if (swr_res->has_depth && swr_res->has_stencil &&
111 !util_format_has_depth(desc))
112 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
113 }
114 }
115 }
116 } else {
117 key.nr_sampler_views = key.nr_samplers;
118 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
119 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
120 const struct pipe_sampler_view *view =
121 ctx->sampler_views[shader_type][i];
122 lp_sampler_static_texture_state(
123 &key.sampler[i].texture_state, view);
124 if (view) {
125 struct swr_resource *swr_res = swr_resource(view->texture);
126 const struct util_format_description *desc =
127 util_format_description(view->format);
128 if (swr_res->has_depth && swr_res->has_stencil &&
129 !util_format_has_depth(desc))
130 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
131 }
132 }
133 }
134 }
135 }
136
137 void
138 swr_generate_fs_key(struct swr_jit_fs_key &key,
139 struct swr_context *ctx,
140 swr_fragment_shader *swr_fs)
141 {
142 memset(&key, 0, sizeof(key));
143
144 key.nr_cbufs = ctx->framebuffer.nr_cbufs;
145 key.light_twoside = ctx->rasterizer->light_twoside;
146 key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
147
148 struct tgsi_shader_info *pPrevShader;
149 if (ctx->gs)
150 pPrevShader = &ctx->gs->info.base;
151 else
152 pPrevShader = &ctx->vs->info.base;
153
154 memcpy(&key.vs_output_semantic_name,
155 &pPrevShader->output_semantic_name,
156 sizeof(key.vs_output_semantic_name));
157 memcpy(&key.vs_output_semantic_idx,
158 &pPrevShader->output_semantic_index,
159 sizeof(key.vs_output_semantic_idx));
160
161 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
162
163 key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable &&
164 ctx->poly_stipple.prim_is_poly;
165 }
166
167 void
168 swr_generate_vs_key(struct swr_jit_vs_key &key,
169 struct swr_context *ctx,
170 swr_vertex_shader *swr_vs)
171 {
172 memset(&key, 0, sizeof(key));
173
174 key.clip_plane_mask =
175 swr_vs->info.base.clipdist_writemask ?
176 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
177 ctx->rasterizer->clip_plane_enable;
178
179 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
180 }
181
182 void
183 swr_generate_fetch_key(struct swr_jit_fetch_key &key,
184 struct swr_vertex_element_state *velems)
185 {
186 memset(&key, 0, sizeof(key));
187
188 key.fsState = velems->fsState;
189 }
190
191 void
192 swr_generate_gs_key(struct swr_jit_gs_key &key,
193 struct swr_context *ctx,
194 swr_geometry_shader *swr_gs)
195 {
196 memset(&key, 0, sizeof(key));
197
198 struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
199
200 memcpy(&key.vs_output_semantic_name,
201 &pPrevShader->output_semantic_name,
202 sizeof(key.vs_output_semantic_name));
203 memcpy(&key.vs_output_semantic_idx,
204 &pPrevShader->output_semantic_index,
205 sizeof(key.vs_output_semantic_idx));
206
207 swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
208 }
209
210 struct BuilderSWR : public Builder {
211 BuilderSWR(JitManager *pJitMgr, const char *pName)
212 : Builder(pJitMgr)
213 {
214 pJitMgr->SetupNewModule();
215 gallivm = gallivm_create(pName, wrap(&JM()->mContext));
216 pJitMgr->mpCurrentModule = unwrap(gallivm->module);
217 }
218
219 ~BuilderSWR() {
220 gallivm_free_ir(gallivm);
221 }
222
223 void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
224 unsigned slot, unsigned channel);
225
226 struct gallivm_state *gallivm;
227 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
228 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
229 PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
230
231 LLVMValueRef
232 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
233 struct lp_build_tgsi_context * bld_base,
234 boolean is_vindex_indirect,
235 LLVMValueRef vertex_index,
236 boolean is_aindex_indirect,
237 LLVMValueRef attrib_index,
238 LLVMValueRef swizzle_index);
239 void
240 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
241 struct lp_build_tgsi_context * bld_base,
242 LLVMValueRef (*outputs)[4],
243 LLVMValueRef emitted_vertices_vec);
244
245 void
246 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
247 struct lp_build_tgsi_context * bld_base,
248 LLVMValueRef verts_per_prim_vec,
249 LLVMValueRef emitted_prims_vec);
250
251 void
252 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
253 struct lp_build_tgsi_context * bld_base,
254 LLVMValueRef total_emitted_vertices_vec,
255 LLVMValueRef emitted_prims_vec);
256
257 };
258
259 struct swr_gs_llvm_iface {
260 struct lp_build_tgsi_gs_iface base;
261 struct tgsi_shader_info *info;
262
263 BuilderSWR *pBuilder;
264
265 Value *pGsCtx;
266 SWR_GS_STATE *pGsState;
267 uint32_t num_outputs;
268 uint32_t num_verts_per_prim;
269
270 Value *pVtxAttribMap;
271 };
272
273 // trampoline functions so we can use the builder llvm construction methods
274 static LLVMValueRef
275 swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
276 struct lp_build_tgsi_context * bld_base,
277 boolean is_vindex_indirect,
278 LLVMValueRef vertex_index,
279 boolean is_aindex_indirect,
280 LLVMValueRef attrib_index,
281 LLVMValueRef swizzle_index)
282 {
283 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
284
285 return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld_base,
286 is_vindex_indirect,
287 vertex_index,
288 is_aindex_indirect,
289 attrib_index,
290 swizzle_index);
291 }
292
293 static void
294 swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
295 struct lp_build_tgsi_context * bld_base,
296 LLVMValueRef (*outputs)[4],
297 LLVMValueRef emitted_vertices_vec)
298 {
299 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
300
301 iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld_base,
302 outputs,
303 emitted_vertices_vec);
304 }
305
306 static void
307 swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
308 struct lp_build_tgsi_context * bld_base,
309 LLVMValueRef verts_per_prim_vec,
310 LLVMValueRef emitted_prims_vec)
311 {
312 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
313
314 iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld_base,
315 verts_per_prim_vec,
316 emitted_prims_vec);
317 }
318
319 static void
320 swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
321 struct lp_build_tgsi_context * bld_base,
322 LLVMValueRef total_emitted_vertices_vec,
323 LLVMValueRef emitted_prims_vec)
324 {
325 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
326
327 iface->pBuilder->swr_gs_llvm_epilogue(gs_base, bld_base,
328 total_emitted_vertices_vec,
329 emitted_prims_vec);
330 }
331
332 LLVMValueRef
333 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
334 struct lp_build_tgsi_context * bld_base,
335 boolean is_vindex_indirect,
336 LLVMValueRef vertex_index,
337 boolean is_aindex_indirect,
338 LLVMValueRef attrib_index,
339 LLVMValueRef swizzle_index)
340 {
341 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
342
343 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
344
345 assert(is_vindex_indirect == false && is_aindex_indirect == false);
346
347 Value *attrib =
348 LOAD(GEP(iface->pVtxAttribMap, {C(0), unwrap(attrib_index)}));
349
350 Value *pInput =
351 LOAD(GEP(iface->pGsCtx,
352 {C(0),
353 C(SWR_GS_CONTEXT_vert),
354 unwrap(vertex_index),
355 C(0),
356 attrib,
357 unwrap(swizzle_index)}));
358
359 return wrap(pInput);
360 }
361
362 void
363 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
364 struct lp_build_tgsi_context * bld_base,
365 LLVMValueRef (*outputs)[4],
366 LLVMValueRef emitted_vertices_vec)
367 {
368 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
369 SWR_GS_STATE *pGS = iface->pGsState;
370
371 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
372
373 #if USE_SIMD16_FRONTEND
374 const uint32_t simdVertexStride = sizeof(simdvertex) * 2;
375 const uint32_t numSimdBatches = (pGS->maxNumVerts + (mVWidth * 2) - 1) / (mVWidth * 2);
376 #else
377 const uint32_t simdVertexStride = sizeof(simdvertex);
378 const uint32_t numSimdBatches = (pGS->maxNumVerts + mVWidth - 1) / mVWidth;
379 #endif
380 const uint32_t inputPrimStride = numSimdBatches * simdVertexStride;
381
382 Value *pStream = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_pStream });
383 Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
384 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8));
385
386 Value *vOffsets = C({
387 inputPrimStride * 0,
388 inputPrimStride * 1,
389 inputPrimStride * 2,
390 inputPrimStride * 3,
391 inputPrimStride * 4,
392 inputPrimStride * 5,
393 inputPrimStride * 6,
394 inputPrimStride * 7 } );
395
396 #if USE_SIMD16_FRONTEND
397 const uint32_t simdShift = log2(mVWidth * 2);
398 Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), (mVWidth * 2) - 1);
399 #else
400 const uint32_t simdShift = log2(mVWidth);
401 Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), mVWidth - 1);
402 #endif
403 Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), simdShift);
404
405 for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
406 uint32_t attribSlot = attrib;
407 if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
408 attribSlot = VERTEX_POINT_SIZE_SLOT;
409 else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PRIMID)
410 attribSlot = VERTEX_PRIMID_SLOT;
411 else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER)
412 attribSlot = VERTEX_RTAI_SLOT;
413
414 #if USE_SIMD16_FRONTEND
415 Value *vOffsetsAttrib =
416 ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex) * 2)));
417 vOffsetsAttrib =
418 ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector) * 2)));
419 #else
420 Value *vOffsetsAttrib =
421 ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex))));
422 vOffsetsAttrib =
423 ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector))));
424 #endif
425 vOffsetsAttrib =
426 ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float))));
427
428 for (uint32_t channel = 0; channel < 4; ++channel) {
429 Value *vData = LOAD(unwrap(outputs[attrib][channel]));
430 Value *vPtrs = GEP(pStream, vOffsetsAttrib);
431
432 vPtrs = BITCAST(vPtrs,
433 VectorType::get(PointerType::get(mFP32Ty, 0), 8));
434
435 MASKED_SCATTER(vData, vPtrs, 32, vMask1);
436
437 #if USE_SIMD16_FRONTEND
438 vOffsetsAttrib =
439 ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar) * 2));
440 #else
441 vOffsetsAttrib =
442 ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar)));
443 #endif
444 }
445 }
446 }
447
448 void
449 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
450 struct lp_build_tgsi_context * bld_base,
451 LLVMValueRef verts_per_prim_vec,
452 LLVMValueRef emitted_prims_vec)
453 {
454 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
455 SWR_GS_STATE *pGS = iface->pGsState;
456
457 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
458
459 Value *pCutBuffer =
460 LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pCutOrStreamIdBuffer});
461 Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
462 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8));
463
464 uint32_t vertsPerPrim = iface->num_verts_per_prim;
465
466 Value *vCount =
467 ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)),
468 unwrap(verts_per_prim_vec));
469
470 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
471 vCount = LOAD(unwrap(bld->total_emitted_vertices_vec_ptr));
472
473 struct lp_exec_mask *exec_mask = &bld->exec_mask;
474 Value *mask = unwrap(lp_build_mask_value(bld->mask));
475 if (exec_mask->has_mask)
476 mask = AND(mask, unwrap(exec_mask->exec_mask));
477
478 Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
479 mask = AND(mask, cmpMask);
480 vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8));
481
482 const uint32_t cutPrimStride =
483 (pGS->maxNumVerts + JM()->mVWidth - 1) / JM()->mVWidth;
484 Value *vOffsets = C({
485 (uint32_t)(cutPrimStride * 0),
486 (uint32_t)(cutPrimStride * 1),
487 (uint32_t)(cutPrimStride * 2),
488 (uint32_t)(cutPrimStride * 3),
489 (uint32_t)(cutPrimStride * 4),
490 (uint32_t)(cutPrimStride * 5),
491 (uint32_t)(cutPrimStride * 6),
492 (uint32_t)(cutPrimStride * 7) } );
493
494 vCount = SUB(vCount, VIMMED1(1));
495 Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), vOffsets);
496 Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
497
498 vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8));
499
500 Value *vPtrs = GEP(pCutBuffer, vOffset);
501 vPtrs =
502 BITCAST(vPtrs, VectorType::get(PointerType::get(mInt8Ty, 0), JM()->mVWidth));
503
504 Value *vGather = MASKED_GATHER(vPtrs, 32, vMask1);
505 vValue = OR(vGather, vValue);
506 MASKED_SCATTER(vValue, vPtrs, 32, vMask1);
507 }
508
509 void
510 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
511 struct lp_build_tgsi_context * bld_base,
512 LLVMValueRef total_emitted_vertices_vec,
513 LLVMValueRef emitted_prims_vec)
514 {
515 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
516
517 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
518
519 STORE(unwrap(total_emitted_vertices_vec), iface->pGsCtx, {0, SWR_GS_CONTEXT_vertexCount});
520 }
521
522 PFN_GS_FUNC
523 BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
524 {
525 SWR_GS_STATE *pGS = &ctx->gs->gsState;
526 struct tgsi_shader_info *info = &ctx->gs->info.base;
527
528 pGS->gsEnable = true;
529
530 pGS->numInputAttribs = info->num_inputs;
531 pGS->outputTopology =
532 swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
533 pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
534 pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
535
536 pGS->emitsRenderTargetArrayIndex = info->writes_layer;
537 pGS->emitsPrimitiveID = info->writes_primid;
538 pGS->emitsViewportArrayIndex = info->writes_viewport_index;
539
540 // XXX: single stream for now...
541 pGS->isSingleStream = true;
542 pGS->singleStreamID = 0;
543
544 struct swr_geometry_shader *gs = ctx->gs;
545
546 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
547 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
548
549 memset(outputs, 0, sizeof(outputs));
550
551 AttrBuilder attrBuilder;
552 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
553
554 std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
555 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
556 FunctionType *vsFuncType =
557 FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
558
559 // create new vertex shader function
560 auto pFunction = Function::Create(vsFuncType,
561 GlobalValue::ExternalLinkage,
562 "GS",
563 JM()->mpCurrentModule);
564 #if HAVE_LLVM < 0x0500
565 AttributeSet attrSet = AttributeSet::get(
566 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
567 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
568 #else
569 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
570 #endif
571
572 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
573 IRB()->SetInsertPoint(block);
574 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
575
576 auto argitr = pFunction->arg_begin();
577 Value *hPrivateData = &*argitr++;
578 hPrivateData->setName("hPrivateData");
579 Value *pGsCtx = &*argitr++;
580 pGsCtx->setName("gsCtx");
581
582 Value *consts_ptr =
583 GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
584 consts_ptr->setName("gs_constants");
585 Value *const_sizes_ptr =
586 GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
587 const_sizes_ptr->setName("num_gs_constants");
588
589 struct lp_build_sampler_soa *sampler =
590 swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
591
592 struct lp_bld_tgsi_system_values system_values;
593 memset(&system_values, 0, sizeof(system_values));
594 system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
595 system_values.instance_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
596
597 std::vector<Constant*> mapConstants;
598 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
599 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
600 ubyte semantic_name = info->input_semantic_name[slot];
601 ubyte semantic_idx = info->input_semantic_index[slot];
602
603 unsigned vs_slot =
604 locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base) + 1;
605
606 STORE(C(vs_slot), vtxAttribMap, {0, slot});
607 mapConstants.push_back(C(vs_slot));
608 }
609
610 struct lp_build_mask_context mask;
611 Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
612 lp_build_mask_begin(&mask, gallivm,
613 lp_type_float_vec(32, 32 * 8), wrap(mask_val));
614
615 // zero out cut buffer so we can load/modify/store bits
616 MEMSET(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pCutOrStreamIdBuffer}),
617 C((char)0),
618 pGS->instanceCount * ((pGS->maxNumVerts + 7) / 8) * JM()->mVWidth,
619 sizeof(float) * KNOB_SIMD_WIDTH);
620
621 struct swr_gs_llvm_iface gs_iface;
622 gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
623 gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
624 gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
625 gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
626 gs_iface.pBuilder = this;
627 gs_iface.pGsCtx = pGsCtx;
628 gs_iface.pGsState = pGS;
629 gs_iface.num_outputs = gs->info.base.num_outputs;
630 gs_iface.num_verts_per_prim =
631 u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
632 gs_iface.info = info;
633 gs_iface.pVtxAttribMap = vtxAttribMap;
634
635 lp_build_tgsi_soa(gallivm,
636 gs->pipe.tokens,
637 lp_type_float_vec(32, 32 * 8),
638 &mask,
639 wrap(consts_ptr),
640 wrap(const_sizes_ptr),
641 &system_values,
642 inputs,
643 outputs,
644 wrap(hPrivateData), // (sampler context)
645 NULL, // thread data
646 sampler,
647 &gs->info.base,
648 &gs_iface.base);
649
650 lp_build_mask_end(&mask);
651
652 sampler->destroy(sampler);
653
654 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
655
656 RET_VOID();
657
658 gallivm_verify_function(gallivm, wrap(pFunction));
659 gallivm_compile_module(gallivm);
660
661 PFN_GS_FUNC pFunc =
662 (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
663
664 debug_printf("geom shader %p\n", pFunc);
665 assert(pFunc && "Error: GeomShader = NULL");
666
667 JM()->mIsModuleFinalized = true;
668
669 return pFunc;
670 }
671
672 PFN_GS_FUNC
673 swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
674 {
675 BuilderSWR builder(
676 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
677 "GS");
678 PFN_GS_FUNC func = builder.CompileGS(ctx, key);
679
680 ctx->gs->map.insert(std::make_pair(key, make_unique<VariantGS>(builder.gallivm, func)));
681 return func;
682 }
683
684 void
685 BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
686 {
687 #if USE_SIMD16_FRONTEND
688 // interleave the simdvertex components into the dest simd16vertex
689 // slot16offset = slot8offset * 2
690 // comp16offset = comp8offset * 2 + alternateOffset
691
692 Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
693 Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
694 STORE(pVal, pOut, {channel * 2});
695 #else
696 Value *pOut = GEP(pVtxOutput, {0, 0, slot});
697 STORE(pVal, pOut, {0, channel});
698 #endif
699 }
700
701 PFN_VERTEX_FUNC
702 BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
703 {
704 struct swr_vertex_shader *swr_vs = ctx->vs;
705
706 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
707 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
708
709 memset(outputs, 0, sizeof(outputs));
710
711 AttrBuilder attrBuilder;
712 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
713
714 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
715 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
716 FunctionType *vsFuncType =
717 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
718
719 // create new vertex shader function
720 auto pFunction = Function::Create(vsFuncType,
721 GlobalValue::ExternalLinkage,
722 "VS",
723 JM()->mpCurrentModule);
724 #if HAVE_LLVM < 0x0500
725 AttributeSet attrSet = AttributeSet::get(
726 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
727 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
728 #else
729 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
730 #endif
731
732 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
733 IRB()->SetInsertPoint(block);
734 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
735
736 auto argitr = pFunction->arg_begin();
737 Value *hPrivateData = &*argitr++;
738 hPrivateData->setName("hPrivateData");
739 Value *pVsCtx = &*argitr++;
740 pVsCtx->setName("vsCtx");
741
742 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
743
744 consts_ptr->setName("vs_constants");
745 Value *const_sizes_ptr =
746 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
747 const_sizes_ptr->setName("num_vs_constants");
748
749 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
750
751 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
752 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
753 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
754 if (mask & (1 << channel)) {
755 inputs[attrib][channel] =
756 wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
757 }
758 }
759 }
760
761 struct lp_build_sampler_soa *sampler =
762 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
763
764 struct lp_bld_tgsi_system_values system_values;
765 memset(&system_values, 0, sizeof(system_values));
766 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
767 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
768
769 lp_build_tgsi_soa(gallivm,
770 swr_vs->pipe.tokens,
771 lp_type_float_vec(32, 32 * 8),
772 NULL, // mask
773 wrap(consts_ptr),
774 wrap(const_sizes_ptr),
775 &system_values,
776 inputs,
777 outputs,
778 wrap(hPrivateData), // (sampler context)
779 NULL, // thread data
780 sampler, // sampler
781 &swr_vs->info.base,
782 NULL); // geometry shader face
783
784 sampler->destroy(sampler);
785
786 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
787
788 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
789
790 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
791 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
792 if (!outputs[attrib][channel])
793 continue;
794
795 Value *val = LOAD(unwrap(outputs[attrib][channel]));
796
797 uint32_t outSlot = attrib;
798 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE)
799 outSlot = VERTEX_POINT_SIZE_SLOT;
800 WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
801 }
802 }
803
804 if (ctx->rasterizer->clip_plane_enable ||
805 swr_vs->info.base.culldist_writemask) {
806 unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
807
808 unsigned cv = 0;
809 if (swr_vs->info.base.writes_clipvertex) {
810 cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
811 &swr_vs->info.base);
812 } else {
813 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
814 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
815 swr_vs->info.base.output_semantic_index[i] == 0) {
816 cv = i;
817 break;
818 }
819 }
820 }
821 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
822 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
823 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
824 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
825
826 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
827 // clip distance overrides user clip planes
828 if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
829 ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
830 unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
831 &swr_vs->info.base);
832 if (val < 4) {
833 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
834 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
835 } else {
836 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
837 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
838 }
839 continue;
840 }
841
842 if (!(clip_mask & (1 << val)))
843 continue;
844
845 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
846 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
847 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
848 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
849 Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
850 FADD(FMUL(unwrap(cy), VBROADCAST(py)),
851 FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
852 FMUL(unwrap(cw), VBROADCAST(pw)))));
853
854 if (val < 4)
855 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
856 else
857 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
858 }
859 }
860
861 RET_VOID();
862
863 gallivm_verify_function(gallivm, wrap(pFunction));
864 gallivm_compile_module(gallivm);
865
866 // lp_debug_dump_value(func);
867
868 PFN_VERTEX_FUNC pFunc =
869 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
870
871 debug_printf("vert shader %p\n", pFunc);
872 assert(pFunc && "Error: VertShader = NULL");
873
874 JM()->mIsModuleFinalized = true;
875
876 return pFunc;
877 }
878
879 PFN_VERTEX_FUNC
880 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
881 {
882 if (!ctx->vs->pipe.tokens)
883 return NULL;
884
885 BuilderSWR builder(
886 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
887 "VS");
888 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
889
890 ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func)));
891 return func;
892 }
893
894 static unsigned
895 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
896 {
897 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
898 if ((info->output_semantic_name[i] == name)
899 && (info->output_semantic_index[i] == index)) {
900 return i - 1; // position is not part of the linkage
901 }
902 }
903
904 return 0xFFFFFFFF;
905 }
906
907 PFN_PIXEL_KERNEL
908 BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
909 {
910 struct swr_fragment_shader *swr_fs = ctx->fs;
911
912 struct tgsi_shader_info *pPrevShader;
913 if (ctx->gs)
914 pPrevShader = &ctx->gs->info.base;
915 else
916 pPrevShader = &ctx->vs->info.base;
917
918 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
919 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
920
921 memset(inputs, 0, sizeof(inputs));
922 memset(outputs, 0, sizeof(outputs));
923
924 struct lp_build_sampler_soa *sampler = NULL;
925
926 AttrBuilder attrBuilder;
927 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
928
929 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
930 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
931 FunctionType *funcType =
932 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
933
934 auto pFunction = Function::Create(funcType,
935 GlobalValue::ExternalLinkage,
936 "FS",
937 JM()->mpCurrentModule);
938 #if HAVE_LLVM < 0x0500
939 AttributeSet attrSet = AttributeSet::get(
940 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
941 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
942 #else
943 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
944 #endif
945
946 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
947 IRB()->SetInsertPoint(block);
948 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
949
950 auto args = pFunction->arg_begin();
951 Value *hPrivateData = &*args++;
952 hPrivateData->setName("hPrivateData");
953 Value *pPS = &*args++;
954 pPS->setName("psCtx");
955
956 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
957 consts_ptr->setName("fs_constants");
958 Value *const_sizes_ptr =
959 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
960 const_sizes_ptr->setName("num_fs_constants");
961
962 // load *pAttribs, *pPerspAttribs
963 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
964 Value *pPerspAttribs =
965 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
966
967 swr_fs->constantMask = 0;
968 swr_fs->flatConstantMask = 0;
969 swr_fs->pointSpriteMask = 0;
970
971 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
972 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
973 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
974 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
975
976 if (!mask)
977 continue;
978
979 // load i,j
980 Value *vi = nullptr, *vj = nullptr;
981 switch (interpLoc) {
982 case TGSI_INTERPOLATE_LOC_CENTER:
983 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
984 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
985 break;
986 case TGSI_INTERPOLATE_LOC_CENTROID:
987 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
988 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
989 break;
990 case TGSI_INTERPOLATE_LOC_SAMPLE:
991 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
992 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
993 break;
994 }
995
996 // load/compute w
997 Value *vw = nullptr, *pAttribs;
998 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
999 interpMode == TGSI_INTERPOLATE_COLOR) {
1000 pAttribs = pPerspAttribs;
1001 switch (interpLoc) {
1002 case TGSI_INTERPOLATE_LOC_CENTER:
1003 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
1004 break;
1005 case TGSI_INTERPOLATE_LOC_CENTROID:
1006 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
1007 break;
1008 case TGSI_INTERPOLATE_LOC_SAMPLE:
1009 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
1010 break;
1011 }
1012 } else {
1013 pAttribs = pRawAttribs;
1014 vw = VIMMED1(1.f);
1015 }
1016
1017 vw->setName("w");
1018
1019 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
1020 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
1021
1022 if (semantic_name == TGSI_SEMANTIC_FACE) {
1023 Value *ff =
1024 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
1025 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
1026 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
1027
1028 inputs[attrib][0] = wrap(ff);
1029 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1030 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1031 inputs[attrib][3] = wrap(VIMMED1(1.0f));
1032 continue;
1033 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
1034 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
1035 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
1036 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
1037 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
1038 } else {
1039 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
1040 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
1041 }
1042 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
1043 inputs[attrib][3] =
1044 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
1045 continue;
1046 } else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
1047 Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID");
1048 inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID));
1049 inputs[attrib][1] = wrap(VIMMED1(0));
1050 inputs[attrib][2] = wrap(VIMMED1(0));
1051 inputs[attrib][3] = wrap(VIMMED1(0));
1052 continue;
1053 }
1054
1055 unsigned linkedAttrib =
1056 locate_linkage(semantic_name, semantic_idx, pPrevShader);
1057
1058 if (semantic_name == TGSI_SEMANTIC_GENERIC &&
1059 key.sprite_coord_enable & (1 << semantic_idx)) {
1060 /* we add an extra attrib to the backendState in swr_update_derived. */
1061 linkedAttrib = pPrevShader->num_outputs - 1;
1062 swr_fs->pointSpriteMask |= (1 << linkedAttrib);
1063 } else if (linkedAttrib == 0xFFFFFFFF) {
1064 inputs[attrib][0] = wrap(VIMMED1(0.0f));
1065 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1066 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1067 inputs[attrib][3] = wrap(VIMMED1(1.0f));
1068 /* If we're reading in color and 2-sided lighting is enabled, we have
1069 * to keep going.
1070 */
1071 if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
1072 continue;
1073 } else {
1074 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1075 swr_fs->constantMask |= 1 << linkedAttrib;
1076 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1077 swr_fs->flatConstantMask |= 1 << linkedAttrib;
1078 }
1079 }
1080
1081 unsigned bcolorAttrib = 0xFFFFFFFF;
1082 Value *offset = NULL;
1083 if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
1084 bcolorAttrib = locate_linkage(
1085 TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader);
1086 /* Neither front nor back colors were available. Nothing to load. */
1087 if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
1088 continue;
1089 /* If there is no front color, just always use the back color. */
1090 if (linkedAttrib == 0xFFFFFFFF)
1091 linkedAttrib = bcolorAttrib;
1092
1093 if (bcolorAttrib != 0xFFFFFFFF) {
1094 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1095 swr_fs->constantMask |= 1 << bcolorAttrib;
1096 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1097 swr_fs->flatConstantMask |= 1 << bcolorAttrib;
1098 }
1099
1100 unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
1101
1102 if (diff) {
1103 Value *back =
1104 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
1105
1106 offset = MUL(back, C(diff));
1107 offset->setName("offset");
1108 }
1109 }
1110 }
1111
1112 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1113 if (mask & (1 << channel)) {
1114 Value *indexA = C(linkedAttrib * 12 + channel);
1115 Value *indexB = C(linkedAttrib * 12 + channel + 4);
1116 Value *indexC = C(linkedAttrib * 12 + channel + 8);
1117
1118 if (offset) {
1119 indexA = ADD(indexA, offset);
1120 indexB = ADD(indexB, offset);
1121 indexC = ADD(indexC, offset);
1122 }
1123
1124 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
1125 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
1126 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
1127
1128 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1129 inputs[attrib][channel] = wrap(va);
1130 } else {
1131 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
1132
1133 vc = FMUL(vk, vc);
1134
1135 Value *interp = FMUL(va, vi);
1136 Value *interp1 = FMUL(vb, vj);
1137 interp = FADD(interp, interp1);
1138 interp = FADD(interp, vc);
1139 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
1140 interpMode == TGSI_INTERPOLATE_COLOR)
1141 interp = FMUL(interp, vw);
1142 inputs[attrib][channel] = wrap(interp);
1143 }
1144 }
1145 }
1146 }
1147
1148 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
1149
1150 struct lp_bld_tgsi_system_values system_values;
1151 memset(&system_values, 0, sizeof(system_values));
1152
1153 struct lp_build_mask_context mask;
1154 bool uses_mask = false;
1155
1156 if (swr_fs->info.base.uses_kill ||
1157 key.poly_stipple_enable) {
1158 Value *vActiveMask = NULL;
1159 if (swr_fs->info.base.uses_kill) {
1160 vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
1161 }
1162 if (key.poly_stipple_enable) {
1163 // first get fragment xy coords and clip to stipple bounds
1164 Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL});
1165 Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL});
1166 Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty);
1167 Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty);
1168
1169 // stipple pattern is 32x32, which means that one line of stipple
1170 // is stored in one word:
1171 // vXstipple is bit offset inside 32-bit stipple word
1172 // vYstipple is word index is stipple array
1173 Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1)
1174 Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1)
1175
1176 // grab stipple pattern base address
1177 Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0});
1178 stipplePtr = BITCAST(stipplePtr, mInt8PtrTy);
1179
1180 // peform a gather to grab stipple words for each lane
1181 Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple,
1182 VIMMED1(0xffffffff), C((char)4));
1183
1184 // create a mask with one bit corresponding to the x stipple
1185 // and AND it with the pattern, to see if we have a bit
1186 Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple);
1187 Value *vStippleMask = AND(vStipple, vBitMask);
1188 vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0));
1189 vStippleMask = VMASK(vStippleMask);
1190
1191 if (swr_fs->info.base.uses_kill) {
1192 vActiveMask = AND(vActiveMask, vStippleMask);
1193 } else {
1194 vActiveMask = vStippleMask;
1195 }
1196 }
1197 lp_build_mask_begin(
1198 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask));
1199 uses_mask = true;
1200 }
1201
1202 lp_build_tgsi_soa(gallivm,
1203 swr_fs->pipe.tokens,
1204 lp_type_float_vec(32, 32 * 8),
1205 uses_mask ? &mask : NULL, // mask
1206 wrap(consts_ptr),
1207 wrap(const_sizes_ptr),
1208 &system_values,
1209 inputs,
1210 outputs,
1211 wrap(hPrivateData),
1212 NULL, // thread data
1213 sampler, // sampler
1214 &swr_fs->info.base,
1215 NULL); // geometry shader face
1216
1217 sampler->destroy(sampler);
1218
1219 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1220
1221 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
1222 attrib++) {
1223 switch (swr_fs->info.base.output_semantic_name[attrib]) {
1224 case TGSI_SEMANTIC_POSITION: {
1225 // write z
1226 LLVMValueRef outZ =
1227 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
1228 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
1229 break;
1230 }
1231 case TGSI_SEMANTIC_COLOR: {
1232 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1233 if (!outputs[attrib][channel])
1234 continue;
1235
1236 LLVMValueRef out =
1237 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
1238 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
1239 swr_fs->info.base.output_semantic_index[attrib] == 0) {
1240 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
1241 STORE(unwrap(out),
1242 pPS,
1243 {0, SWR_PS_CONTEXT_shaded, rt, channel});
1244 }
1245 } else {
1246 STORE(unwrap(out),
1247 pPS,
1248 {0,
1249 SWR_PS_CONTEXT_shaded,
1250 swr_fs->info.base.output_semantic_index[attrib],
1251 channel});
1252 }
1253 }
1254 break;
1255 }
1256 default: {
1257 fprintf(stderr,
1258 "unknown output from FS %s[%d]\n",
1259 tgsi_semantic_names[swr_fs->info.base
1260 .output_semantic_name[attrib]],
1261 swr_fs->info.base.output_semantic_index[attrib]);
1262 break;
1263 }
1264 }
1265 }
1266
1267 LLVMValueRef mask_result = 0;
1268 if (uses_mask) {
1269 mask_result = lp_build_mask_end(&mask);
1270 }
1271
1272 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1273
1274 if (uses_mask) {
1275 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
1276 }
1277
1278 RET_VOID();
1279
1280 gallivm_verify_function(gallivm, wrap(pFunction));
1281
1282 gallivm_compile_module(gallivm);
1283
1284 PFN_PIXEL_KERNEL kernel =
1285 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
1286 debug_printf("frag shader %p\n", kernel);
1287 assert(kernel && "Error: FragShader = NULL");
1288
1289 JM()->mIsModuleFinalized = true;
1290
1291 return kernel;
1292 }
1293
1294 PFN_PIXEL_KERNEL
1295 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
1296 {
1297 if (!ctx->fs->pipe.tokens)
1298 return NULL;
1299
1300 BuilderSWR builder(
1301 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
1302 "FS");
1303 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
1304
1305 ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func)));
1306 return func;
1307 }