util: Move gallium's PIPE_FORMAT utils to /util/format/
[mesa.git] / src / gallium / drivers / swr / swr_shader.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 #include <llvm/Config/llvm-config.h>
25
26 #if LLVM_VERSION_MAJOR < 7
27 // llvm redefines DEBUG
28 #pragma push_macro("DEBUG")
29 #undef DEBUG
30 #endif
31
32 #include "JitManager.h"
33 #include "llvm-c/Core.h"
34 #include "llvm/Support/CBindingWrapping.h"
35 #include "llvm/IR/LegacyPassManager.h"
36
37 #if LLVM_VERSION_MAJOR < 7
38 #pragma pop_macro("DEBUG")
39 #endif
40
41 #include "state.h"
42 #include "gen_state_llvm.h"
43 #include "builder.h"
44 #include "functionpasses/passes.h"
45
46 #include "tgsi/tgsi_strings.h"
47 #include "util/format/u_format.h"
48 #include "util/u_prim.h"
49 #include "gallivm/lp_bld_init.h"
50 #include "gallivm/lp_bld_flow.h"
51 #include "gallivm/lp_bld_struct.h"
52 #include "gallivm/lp_bld_tgsi.h"
53
54 #include "swr_context.h"
55 #include "gen_surf_state_llvm.h"
56 #include "gen_swr_context_llvm.h"
57 #include "swr_resource.h"
58 #include "swr_state.h"
59 #include "swr_screen.h"
60
61 using namespace SwrJit;
62 using namespace llvm;
63
64 static unsigned
65 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
66
67 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
68 {
69 return !memcmp(&lhs, &rhs, sizeof(lhs));
70 }
71
72 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs)
73 {
74 return !memcmp(&lhs, &rhs, sizeof(lhs));
75 }
76
77 bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs)
78 {
79 return !memcmp(&lhs, &rhs, sizeof(lhs));
80 }
81
82 bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs)
83 {
84 return !memcmp(&lhs, &rhs, sizeof(lhs));
85 }
86
87 static void
88 swr_generate_sampler_key(const struct lp_tgsi_info &info,
89 struct swr_context *ctx,
90 enum pipe_shader_type shader_type,
91 struct swr_jit_sampler_key &key)
92 {
93 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1;
94
95 for (unsigned i = 0; i < key.nr_samplers; i++) {
96 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
97 lp_sampler_static_sampler_state(
98 &key.sampler[i].sampler_state,
99 ctx->samplers[shader_type][i]);
100 }
101 }
102
103 /*
104 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
105 * are dx10-style? Can't really have mixed opcodes, at least not
106 * if we want to skip the holes here (without rescanning tgsi).
107 */
108 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
109 key.nr_sampler_views =
110 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
111 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
112 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1u << (i & 31))) {
113 const struct pipe_sampler_view *view =
114 ctx->sampler_views[shader_type][i];
115 lp_sampler_static_texture_state(
116 &key.sampler[i].texture_state, view);
117 if (view) {
118 struct swr_resource *swr_res = swr_resource(view->texture);
119 const struct util_format_description *desc =
120 util_format_description(view->format);
121 if (swr_res->has_depth && swr_res->has_stencil &&
122 !util_format_has_depth(desc))
123 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
124 }
125 }
126 }
127 } else {
128 key.nr_sampler_views = key.nr_samplers;
129 for (unsigned i = 0; i < key.nr_sampler_views; i++) {
130 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) {
131 const struct pipe_sampler_view *view =
132 ctx->sampler_views[shader_type][i];
133 lp_sampler_static_texture_state(
134 &key.sampler[i].texture_state, view);
135 if (view) {
136 struct swr_resource *swr_res = swr_resource(view->texture);
137 const struct util_format_description *desc =
138 util_format_description(view->format);
139 if (swr_res->has_depth && swr_res->has_stencil &&
140 !util_format_has_depth(desc))
141 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT;
142 }
143 }
144 }
145 }
146 }
147
148 void
149 swr_generate_fs_key(struct swr_jit_fs_key &key,
150 struct swr_context *ctx,
151 swr_fragment_shader *swr_fs)
152 {
153 memset(&key, 0, sizeof(key));
154
155 key.nr_cbufs = ctx->framebuffer.nr_cbufs;
156 key.light_twoside = ctx->rasterizer->light_twoside;
157 key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
158
159 struct tgsi_shader_info *pPrevShader;
160 if (ctx->gs)
161 pPrevShader = &ctx->gs->info.base;
162 else
163 pPrevShader = &ctx->vs->info.base;
164
165 memcpy(&key.vs_output_semantic_name,
166 &pPrevShader->output_semantic_name,
167 sizeof(key.vs_output_semantic_name));
168 memcpy(&key.vs_output_semantic_idx,
169 &pPrevShader->output_semantic_index,
170 sizeof(key.vs_output_semantic_idx));
171
172 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key);
173
174 key.poly_stipple_enable = ctx->rasterizer->poly_stipple_enable &&
175 ctx->poly_stipple.prim_is_poly;
176 }
177
178 void
179 swr_generate_vs_key(struct swr_jit_vs_key &key,
180 struct swr_context *ctx,
181 swr_vertex_shader *swr_vs)
182 {
183 memset(&key, 0, sizeof(key));
184
185 key.clip_plane_mask =
186 swr_vs->info.base.clipdist_writemask ?
187 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
188 ctx->rasterizer->clip_plane_enable;
189
190 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
191 }
192
193 void
194 swr_generate_fetch_key(struct swr_jit_fetch_key &key,
195 struct swr_vertex_element_state *velems)
196 {
197 memset(&key, 0, sizeof(key));
198
199 key.fsState = velems->fsState;
200 }
201
202 void
203 swr_generate_gs_key(struct swr_jit_gs_key &key,
204 struct swr_context *ctx,
205 swr_geometry_shader *swr_gs)
206 {
207 memset(&key, 0, sizeof(key));
208
209 struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base;
210
211 memcpy(&key.vs_output_semantic_name,
212 &pPrevShader->output_semantic_name,
213 sizeof(key.vs_output_semantic_name));
214 memcpy(&key.vs_output_semantic_idx,
215 &pPrevShader->output_semantic_index,
216 sizeof(key.vs_output_semantic_idx));
217
218 swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key);
219 }
220
221 struct BuilderSWR : public Builder {
222 BuilderSWR(JitManager *pJitMgr, const char *pName)
223 : Builder(pJitMgr)
224 {
225 pJitMgr->SetupNewModule();
226 gallivm = gallivm_create(pName, wrap(&JM()->mContext));
227 pJitMgr->mpCurrentModule = unwrap(gallivm->module);
228 }
229
230 ~BuilderSWR() {
231 gallivm_free_ir(gallivm);
232 }
233
234 void WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput,
235 unsigned slot, unsigned channel);
236
237 struct gallivm_state *gallivm;
238 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key);
239 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key);
240 PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key);
241
242 LLVMValueRef
243 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
244 struct lp_build_context * bld,
245 boolean is_vindex_indirect,
246 LLVMValueRef vertex_index,
247 boolean is_aindex_indirect,
248 LLVMValueRef attrib_index,
249 LLVMValueRef swizzle_index);
250 void
251 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
252 struct lp_build_context * bld,
253 LLVMValueRef (*outputs)[4],
254 LLVMValueRef emitted_vertices_vec,
255 LLVMValueRef stream_id);
256
257 void
258 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
259 struct lp_build_context * bld,
260 LLVMValueRef total_emitted_vertices_vec_ptr,
261 LLVMValueRef verts_per_prim_vec,
262 LLVMValueRef emitted_prims_vec,
263 LLVMValueRef mask_vec);
264
265 void
266 swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
267 LLVMValueRef total_emitted_vertices_vec,
268 LLVMValueRef emitted_prims_vec);
269
270 };
271
272 struct swr_gs_llvm_iface {
273 struct lp_build_gs_iface base;
274 struct tgsi_shader_info *info;
275
276 BuilderSWR *pBuilder;
277
278 Value *pGsCtx;
279 SWR_GS_STATE *pGsState;
280 uint32_t num_outputs;
281 uint32_t num_verts_per_prim;
282
283 Value *pVtxAttribMap;
284 };
285
286 // trampoline functions so we can use the builder llvm construction methods
287 static LLVMValueRef
288 swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
289 struct lp_build_context * bld,
290 boolean is_vindex_indirect,
291 LLVMValueRef vertex_index,
292 boolean is_aindex_indirect,
293 LLVMValueRef attrib_index,
294 LLVMValueRef swizzle_index)
295 {
296 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
297
298 return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld,
299 is_vindex_indirect,
300 vertex_index,
301 is_aindex_indirect,
302 attrib_index,
303 swizzle_index);
304 }
305
306 static void
307 swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
308 struct lp_build_context * bld,
309 LLVMValueRef (*outputs)[4],
310 LLVMValueRef emitted_vertices_vec,
311 LLVMValueRef stream_id)
312 {
313 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
314
315 iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld,
316 outputs,
317 emitted_vertices_vec,
318 stream_id);
319 }
320
321 static void
322 swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
323 struct lp_build_context * bld,
324 LLVMValueRef total_emitted_vertices_vec_ptr,
325 LLVMValueRef verts_per_prim_vec,
326 LLVMValueRef emitted_prims_vec,
327 LLVMValueRef mask_vec)
328 {
329 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
330
331 iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld,
332 total_emitted_vertices_vec_ptr,
333 verts_per_prim_vec,
334 emitted_prims_vec,
335 mask_vec);
336 }
337
338 static void
339 swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
340 LLVMValueRef total_emitted_vertices_vec,
341 LLVMValueRef emitted_prims_vec)
342 {
343 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
344
345 iface->pBuilder->swr_gs_llvm_epilogue(gs_base,
346 total_emitted_vertices_vec,
347 emitted_prims_vec);
348 }
349
350 LLVMValueRef
351 BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_gs_iface *gs_iface,
352 struct lp_build_context * bld,
353 boolean is_vindex_indirect,
354 LLVMValueRef vertex_index,
355 boolean is_aindex_indirect,
356 LLVMValueRef attrib_index,
357 LLVMValueRef swizzle_index)
358 {
359 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface;
360 Value *vert_index = unwrap(vertex_index);
361 Value *attr_index = unwrap(attrib_index);
362
363 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
364
365 if (is_vindex_indirect || is_aindex_indirect) {
366 int i;
367 Value *res = unwrap(bld->zero);
368 struct lp_type type = bld->type;
369
370 for (i = 0; i < type.length; i++) {
371 Value *vert_chan_index = vert_index;
372 Value *attr_chan_index = attr_index;
373
374 if (is_vindex_indirect) {
375 vert_chan_index = VEXTRACT(vert_index, C(i));
376 }
377 if (is_aindex_indirect) {
378 attr_chan_index = VEXTRACT(attr_index, C(i));
379 }
380
381 Value *attrib =
382 LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_chan_index}));
383
384 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
385 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
386
387 Value *pVector = ADD(MUL(vert_chan_index, pInputVertStride), attrib);
388 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
389
390 Value *value = VEXTRACT(pInput, C(i));
391 res = VINSERT(res, value, C(i));
392 }
393
394 return wrap(res);
395 } else {
396 Value *attrib = LOAD(GEP(iface->pVtxAttribMap, {C(0), attr_index}));
397
398 Value *pVertex = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pVerts});
399 Value *pInputVertStride = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_inputVertStride});
400
401 Value *pVector = ADD(MUL(vert_index, pInputVertStride), attrib);
402
403 Value *pInput = LOAD(GEP(pVertex, {pVector, unwrap(swizzle_index)}));
404
405 return wrap(pInput);
406 }
407 }
408
409 // GS output stream layout
410 #define VERTEX_COUNT_SIZE 32
411 #define CONTROL_HEADER_SIZE (8*32)
412
413 void
414 BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_gs_iface *gs_base,
415 struct lp_build_context * bld,
416 LLVMValueRef (*outputs)[4],
417 LLVMValueRef emitted_vertices_vec,
418 LLVMValueRef stream_id)
419 {
420 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
421
422 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
423 const uint32_t headerSize = VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE;
424 const uint32_t attribSize = 4 * sizeof(float);
425 const uint32_t vertSize = attribSize * SWR_VTX_NUM_SLOTS;
426 Value *pVertexOffset = MUL(unwrap(emitted_vertices_vec), VIMMED1(vertSize));
427
428 Value *vMask = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_mask});
429 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, mVWidth));
430
431 Value *pStack = STACKSAVE();
432 Value *pTmpPtr = ALLOCA(mFP32Ty, C(4)); // used for dummy write for lane masking
433
434 for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) {
435 uint32_t attribSlot = attrib;
436 uint32_t sgvChannel = 0;
437 if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
438 attribSlot = VERTEX_SGV_SLOT;
439 sgvChannel = VERTEX_SGV_POINT_SIZE_COMP;
440 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_LAYER) {
441 attribSlot = VERTEX_SGV_SLOT;
442 sgvChannel = VERTEX_SGV_RTAI_COMP;
443 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_VIEWPORT_INDEX) {
444 attribSlot = VERTEX_SGV_SLOT;
445 sgvChannel = VERTEX_SGV_VAI_COMP;
446 } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
447 attribSlot = VERTEX_POSITION_SLOT;
448 } else {
449 attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
450 if (iface->info->writes_position) {
451 attribSlot--;
452 }
453 }
454
455 Value *pOutputOffset = ADD(pVertexOffset, VIMMED1(headerSize + attribSize * attribSlot)); // + sgvChannel ?
456
457 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
458 Value *pLaneOffset = VEXTRACT(pOutputOffset, C(lane));
459 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
460 Value *pStreamOffset = GEP(pStream, pLaneOffset);
461 pStreamOffset = BITCAST(pStreamOffset, mFP32PtrTy);
462
463 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
464 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
465
466 for (uint32_t channel = 0; channel < 4; ++channel) {
467 Value *vData;
468
469 if (attribSlot == VERTEX_SGV_SLOT)
470 vData = LOAD(unwrap(outputs[attrib][0]));
471 else
472 vData = LOAD(unwrap(outputs[attrib][channel]));
473
474 if (attribSlot != VERTEX_SGV_SLOT ||
475 sgvChannel == channel) {
476 vData = VEXTRACT(vData, C(lane));
477 STORE(vData, pStreamOffset);
478 }
479 pStreamOffset = GEP(pStreamOffset, C(1));
480 }
481 }
482 }
483
484 /* When the output type is not points, the geometry shader may not
485 * output data to multiple streams. So early exit here.
486 */
487 if(iface->pGsState->outputTopology != TOP_POINT_LIST) {
488 STACKRESTORE(pStack);
489 return;
490 }
491
492 // Info about stream id for each vertex
493 // is coded in 2 bits (4 vert per byte "box"):
494 // ----------------- ----------------- ----
495 // |d|d|c|c|b|b|a|a| |h|h|g|g|f|f|e|e| |...
496 // ----------------- ----------------- ----
497
498 // Calculate where need to put stream id for current vert
499 // in 1 byte "box".
500 Value *pShiftControl = MUL(unwrap(emitted_vertices_vec), VIMMED1(2));
501
502 // Calculate in which box put stream id for current vert.
503 Value *pOffsetControl = LSHR(unwrap(emitted_vertices_vec), VIMMED1(2));
504
505 // Skip count header
506 Value *pStreamIdOffset = ADD(pOffsetControl, VIMMED1(VERTEX_COUNT_SIZE));
507
508 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
509 Value *pShift = TRUNC(VEXTRACT(pShiftControl, C(lane)), mInt8Ty);
510 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
511
512 Value *pStreamOffset = GEP(pStream, VEXTRACT(pStreamIdOffset, C(lane)));
513
514 // Just make sure that not overflow max - stream id = (0,1,2,3)
515 Value *vVal = TRUNC(AND(VEXTRACT(unwrap(stream_id), C(0)), C(0x3)), mInt8Ty);
516
517 // Shift it to correct position in byte "box"
518 vVal = SHL(vVal, pShift);
519
520 // Info about other vertices can be already stored
521 // so we need to read and add bits from current vert info.
522 Value *storedValue = LOAD(pStreamOffset);
523 vVal = OR(storedValue, vVal);
524 STORE(vVal, pStreamOffset);
525 }
526
527 STACKRESTORE(pStack);
528 }
529
530 void
531 BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_gs_iface *gs_base,
532 struct lp_build_context * bld,
533 LLVMValueRef total_emitted_vertices_vec,
534 LLVMValueRef verts_per_prim_vec,
535 LLVMValueRef emitted_prims_vec,
536 LLVMValueRef mask_vec)
537 {
538 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
539
540 /* When the output type is points, the geometry shader may output data
541 * to multiple streams, and end_primitive has no effect. Info about
542 * stream id for vertices is stored into the same place in memory where
543 * end primitive info is stored so early exit in this case.
544 */
545 if (iface->pGsState->outputTopology == TOP_POINT_LIST) {
546 return;
547 }
548
549 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
550
551 Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask });
552 Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8));
553
554 uint32_t vertsPerPrim = iface->num_verts_per_prim;
555
556 Value *vCount =
557 ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)),
558 unwrap(verts_per_prim_vec));
559
560 vCount = unwrap(total_emitted_vertices_vec);
561
562 Value *mask = unwrap(mask_vec);
563 Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0)));
564 mask = AND(mask, cmpMask);
565 vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8));
566
567 vCount = SUB(vCount, VIMMED1(1));
568 Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), VIMMED1(VERTEX_COUNT_SIZE));
569 Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8)));
570
571 vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8));
572
573 Value *pStack = STACKSAVE();
574 Value *pTmpPtr = ALLOCA(mInt8Ty, C(4)); // used for dummy read/write for lane masking
575
576 for (uint32_t lane = 0; lane < mVWidth; ++lane) {
577 Value *vLaneOffset = VEXTRACT(vOffset, C(lane));
578 Value *pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
579 Value *pStreamOffset = GEP(pStream, vLaneOffset);
580
581 Value *pLaneMask = VEXTRACT(vMask1, C(lane));
582 pStreamOffset = SELECT(pLaneMask, pStreamOffset, pTmpPtr);
583
584 Value *vVal = LOAD(pStreamOffset);
585 vVal = OR(vVal, VEXTRACT(vValue, C(lane)));
586 STORE(vVal, pStreamOffset);
587 }
588
589 STACKRESTORE(pStack);
590 }
591
592 void
593 BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_gs_iface *gs_base,
594 LLVMValueRef total_emitted_vertices_vec,
595 LLVMValueRef emitted_prims_vec)
596 {
597 swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base;
598
599 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
600
601 // Store emit count to each output stream in the first DWORD
602 for (uint32_t lane = 0; lane < mVWidth; ++lane)
603 {
604 Value* pStream = LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
605 pStream = BITCAST(pStream, mInt32PtrTy);
606 Value* pLaneCount = VEXTRACT(unwrap(total_emitted_vertices_vec), C(lane));
607 STORE(pLaneCount, pStream);
608 }
609 }
610
611 PFN_GS_FUNC
612 BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key)
613 {
614 SWR_GS_STATE *pGS = &ctx->gs->gsState;
615 struct tgsi_shader_info *info = &ctx->gs->info.base;
616
617 memset(pGS, 0, sizeof(*pGS));
618
619 pGS->gsEnable = true;
620
621 pGS->numInputAttribs = (VERTEX_ATTRIB_START_SLOT - VERTEX_POSITION_SLOT) + info->num_inputs;
622 pGS->outputTopology =
623 swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
624
625 /* It's +1 because emit_vertex in swr is always called exactly one time more
626 * than max_vertices passed in Geometry Shader. We need to allocate more memory
627 * to avoid crash/memory overwritten.
628 */
629 pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] + 1;
630 pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS];
631
632 // If point primitive then assume to use multiple streams
633 if(pGS->outputTopology == TOP_POINT_LIST) {
634 pGS->isSingleStream = false;
635 } else {
636 pGS->isSingleStream = true;
637 pGS->singleStreamID = 0;
638 }
639
640 pGS->vertexAttribOffset = VERTEX_POSITION_SLOT;
641 pGS->inputVertStride = pGS->numInputAttribs + pGS->vertexAttribOffset;
642 pGS->outputVertexSize = SWR_VTX_NUM_SLOTS;
643 pGS->controlDataSize = 8; // GS ouputs max of 8 32B units
644 pGS->controlDataOffset = VERTEX_COUNT_SIZE;
645 pGS->outputVertexOffset = pGS->controlDataOffset + CONTROL_HEADER_SIZE;
646
647 pGS->allocationSize =
648 VERTEX_COUNT_SIZE + // vertex count
649 CONTROL_HEADER_SIZE + // control header
650 (SWR_VTX_NUM_SLOTS * 16) * // sizeof vertex
651 pGS->maxNumVerts; // num verts
652
653 struct swr_geometry_shader *gs = ctx->gs;
654
655 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
656 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
657
658 memset(outputs, 0, sizeof(outputs));
659
660 AttrBuilder attrBuilder;
661 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
662
663 std::vector<Type *> gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
664 PointerType::get(mInt8Ty, 0),
665 PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)};
666 FunctionType *vsFuncType =
667 FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false);
668
669 // create new vertex shader function
670 auto pFunction = Function::Create(vsFuncType,
671 GlobalValue::ExternalLinkage,
672 "GS",
673 JM()->mpCurrentModule);
674 #if LLVM_VERSION_MAJOR < 5
675 AttributeSet attrSet = AttributeSet::get(
676 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
677 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
678 #else
679 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
680 #endif
681
682 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
683 IRB()->SetInsertPoint(block);
684 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
685
686 auto argitr = pFunction->arg_begin();
687 Value *hPrivateData = &*argitr++;
688 hPrivateData->setName("hPrivateData");
689 Value *pWorkerData = &*argitr++;
690 pWorkerData->setName("pWorkerData");
691 Value *pGsCtx = &*argitr++;
692 pGsCtx->setName("gsCtx");
693
694 Value *consts_ptr =
695 GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)});
696 consts_ptr->setName("gs_constants");
697 Value *const_sizes_ptr =
698 GEP(hPrivateData, {0, swr_draw_context_num_constantsGS});
699 const_sizes_ptr->setName("num_gs_constants");
700
701 struct lp_build_sampler_soa *sampler =
702 swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY);
703
704 struct lp_bld_tgsi_system_values system_values;
705 memset(&system_values, 0, sizeof(system_values));
706 system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID}));
707 system_values.invocation_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID}));
708
709 std::vector<Constant*> mapConstants;
710 Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS));
711 for (unsigned slot = 0; slot < info->num_inputs; slot++) {
712 ubyte semantic_name = info->input_semantic_name[slot];
713 ubyte semantic_idx = info->input_semantic_index[slot];
714
715 unsigned vs_slot = locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base);
716
717 vs_slot += VERTEX_ATTRIB_START_SLOT;
718
719 if (ctx->vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
720 vs_slot--;
721
722 if (semantic_name == TGSI_SEMANTIC_POSITION)
723 vs_slot = VERTEX_POSITION_SLOT;
724
725 STORE(C(vs_slot), vtxAttribMap, {0, slot});
726 mapConstants.push_back(C(vs_slot));
727 }
728
729 struct lp_build_mask_context mask;
730 Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask");
731 lp_build_mask_begin(&mask, gallivm,
732 lp_type_float_vec(32, 32 * 8), wrap(mask_val));
733
734 // zero out cut buffer so we can load/modify/store bits
735 for (uint32_t lane = 0; lane < mVWidth; ++lane)
736 {
737 Value* pStream = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pStreams, lane});
738 MEMSET(pStream, C((char)0), VERTEX_COUNT_SIZE + CONTROL_HEADER_SIZE, sizeof(float) * KNOB_SIMD_WIDTH);
739 }
740
741 struct swr_gs_llvm_iface gs_iface;
742 gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input;
743 gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex;
744 gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive;
745 gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue;
746 gs_iface.pBuilder = this;
747 gs_iface.pGsCtx = pGsCtx;
748 gs_iface.pGsState = pGS;
749 gs_iface.num_outputs = gs->info.base.num_outputs;
750 gs_iface.num_verts_per_prim =
751 u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]);
752 gs_iface.info = info;
753 gs_iface.pVtxAttribMap = vtxAttribMap;
754
755 struct lp_build_tgsi_params params;
756 memset(&params, 0, sizeof(params));
757 params.type = lp_type_float_vec(32, 32 * 8);
758 params.mask = & mask;
759 params.consts_ptr = wrap(consts_ptr);
760 params.const_sizes_ptr = wrap(const_sizes_ptr);
761 params.system_values = &system_values;
762 params.inputs = inputs;
763 params.context_ptr = wrap(hPrivateData);
764 params.sampler = sampler;
765 params.info = &gs->info.base;
766 params.gs_iface = &gs_iface.base;
767
768 lp_build_tgsi_soa(gallivm,
769 gs->pipe.tokens,
770 &params,
771 outputs);
772
773 lp_build_mask_end(&mask);
774
775 sampler->destroy(sampler);
776
777 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
778
779 RET_VOID();
780
781 gallivm_verify_function(gallivm, wrap(pFunction));
782 gallivm_compile_module(gallivm);
783
784 PFN_GS_FUNC pFunc =
785 (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
786
787 debug_printf("geom shader %p\n", pFunc);
788 assert(pFunc && "Error: GeomShader = NULL");
789
790 JM()->mIsModuleFinalized = true;
791
792 return pFunc;
793 }
794
795 PFN_GS_FUNC
796 swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key)
797 {
798 BuilderSWR builder(
799 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
800 "GS");
801 PFN_GS_FUNC func = builder.CompileGS(ctx, key);
802
803 ctx->gs->map.insert(std::make_pair(key, std::make_unique<VariantGS>(builder.gallivm, func)));
804 return func;
805 }
806
807 void
808 BuilderSWR::WriteVS(Value *pVal, Value *pVsContext, Value *pVtxOutput, unsigned slot, unsigned channel)
809 {
810 #if USE_SIMD16_FRONTEND && !USE_SIMD16_VS
811 // interleave the simdvertex components into the dest simd16vertex
812 // slot16offset = slot8offset * 2
813 // comp16offset = comp8offset * 2 + alternateOffset
814
815 Value *offset = LOAD(pVsContext, { 0, SWR_VS_CONTEXT_AlternateOffset });
816 Value *pOut = GEP(pVtxOutput, { C(0), C(0), C(slot * 2), offset } );
817 STORE(pVal, pOut, {channel * 2});
818 #else
819 Value *pOut = GEP(pVtxOutput, {0, 0, slot});
820 STORE(pVal, pOut, {0, channel});
821 #endif
822 }
823
824 PFN_VERTEX_FUNC
825 BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
826 {
827 struct swr_vertex_shader *swr_vs = ctx->vs;
828
829 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
830 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
831
832 memset(outputs, 0, sizeof(outputs));
833
834 AttrBuilder attrBuilder;
835 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
836
837 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
838 PointerType::get(mInt8Ty, 0),
839 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
840 FunctionType *vsFuncType =
841 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false);
842
843 // create new vertex shader function
844 auto pFunction = Function::Create(vsFuncType,
845 GlobalValue::ExternalLinkage,
846 "VS",
847 JM()->mpCurrentModule);
848 #if LLVM_VERSION_MAJOR < 5
849 AttributeSet attrSet = AttributeSet::get(
850 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
851 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
852 #else
853 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
854 #endif
855
856 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
857 IRB()->SetInsertPoint(block);
858 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
859
860 auto argitr = pFunction->arg_begin();
861 Value *hPrivateData = &*argitr++;
862 hPrivateData->setName("hPrivateData");
863 Value *pWorkerData = &*argitr++;
864 pWorkerData->setName("pWorkerData");
865 Value *pVsCtx = &*argitr++;
866 pVsCtx->setName("vsCtx");
867
868 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)});
869
870 consts_ptr->setName("vs_constants");
871 Value *const_sizes_ptr =
872 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS});
873 const_sizes_ptr->setName("num_vs_constants");
874
875 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin});
876 #if USE_SIMD16_VS
877 vtxInput = BITCAST(vtxInput, PointerType::get(Gen_simd16vertex(JM()), 0));
878 #endif
879
880 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
881 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib];
882 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
883 if (mask & (1 << channel)) {
884 inputs[attrib][channel] =
885 wrap(LOAD(vtxInput, {0, 0, attrib, channel}));
886 }
887 }
888 }
889
890 struct lp_build_sampler_soa *sampler =
891 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX);
892
893 struct lp_bld_tgsi_system_values system_values;
894 memset(&system_values, 0, sizeof(system_values));
895 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID}));
896
897 #if USE_SIMD16_VS
898 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID16}));
899 #else
900 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID}));
901 #endif
902
903 #if USE_SIMD16_VS
904 uint32_t vectorWidth = mVWidth16;
905 #else
906 uint32_t vectorWidth = mVWidth;
907 #endif
908
909 struct lp_build_tgsi_params params;
910 memset(&params, 0, sizeof(params));
911 params.type = lp_type_float_vec(32, 32 * vectorWidth);
912 params.consts_ptr = wrap(consts_ptr);
913 params.const_sizes_ptr = wrap(const_sizes_ptr);
914 params.system_values = &system_values;
915 params.inputs = inputs;
916 params.context_ptr = wrap(hPrivateData);
917 params.sampler = sampler;
918 params.info = &swr_vs->info.base;
919
920 lp_build_tgsi_soa(gallivm,
921 swr_vs->pipe.tokens,
922 &params,
923 outputs);
924
925 sampler->destroy(sampler);
926
927 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
928
929 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout});
930 #if USE_SIMD16_VS
931 vtxOutput = BITCAST(vtxOutput, PointerType::get(Gen_simd16vertex(JM()), 0));
932 #endif
933
934 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
935 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) {
936 if (!outputs[attrib][channel])
937 continue;
938
939 Value *val;
940 uint32_t outSlot;
941
942 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) {
943 if (channel != VERTEX_SGV_POINT_SIZE_COMP)
944 continue;
945 val = LOAD(unwrap(outputs[attrib][0]));
946 outSlot = VERTEX_SGV_SLOT;
947 } else if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
948 val = LOAD(unwrap(outputs[attrib][channel]));
949 outSlot = VERTEX_POSITION_SLOT;
950 } else {
951 val = LOAD(unwrap(outputs[attrib][channel]));
952 outSlot = VERTEX_ATTRIB_START_SLOT + attrib;
953 if (swr_vs->info.base.output_semantic_name[0] == TGSI_SEMANTIC_POSITION)
954 outSlot--;
955 }
956
957 WriteVS(val, pVsCtx, vtxOutput, outSlot, channel);
958 }
959 }
960
961 if (ctx->rasterizer->clip_plane_enable ||
962 swr_vs->info.base.culldist_writemask) {
963 unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
964
965 unsigned cv = 0;
966 if (swr_vs->info.base.writes_clipvertex) {
967 cv = locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
968 &swr_vs->info.base);
969 } else {
970 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
971 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
972 swr_vs->info.base.output_semantic_index[i] == 0) {
973 cv = i;
974 break;
975 }
976 }
977 }
978 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
979 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
980 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
981 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
982
983 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
984 // clip distance overrides user clip planes
985 if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
986 ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) {
987 unsigned cv = locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1,
988 &swr_vs->info.base);
989 if (val < 4) {
990 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], "");
991 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
992 } else {
993 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], "");
994 WriteVS(unwrap(dist), pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
995 }
996 continue;
997 }
998
999 if (!(clip_mask & (1 << val)))
1000 continue;
1001
1002 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
1003 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
1004 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
1005 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
1006 #if USE_SIMD16_VS
1007 Value *bpx = VBROADCAST_16(px);
1008 Value *bpy = VBROADCAST_16(py);
1009 Value *bpz = VBROADCAST_16(pz);
1010 Value *bpw = VBROADCAST_16(pw);
1011 #else
1012 Value *bpx = VBROADCAST(px);
1013 Value *bpy = VBROADCAST(py);
1014 Value *bpz = VBROADCAST(pz);
1015 Value *bpw = VBROADCAST(pw);
1016 #endif
1017 Value *dist = FADD(FMUL(unwrap(cx), bpx),
1018 FADD(FMUL(unwrap(cy), bpy),
1019 FADD(FMUL(unwrap(cz), bpz),
1020 FMUL(unwrap(cw), bpw))));
1021
1022 if (val < 4)
1023 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_LO_SLOT, val);
1024 else
1025 WriteVS(dist, pVsCtx, vtxOutput, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4);
1026 }
1027 }
1028
1029 RET_VOID();
1030
1031 gallivm_verify_function(gallivm, wrap(pFunction));
1032 gallivm_compile_module(gallivm);
1033
1034 // lp_debug_dump_value(func);
1035
1036 PFN_VERTEX_FUNC pFunc =
1037 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction));
1038
1039 debug_printf("vert shader %p\n", pFunc);
1040 assert(pFunc && "Error: VertShader = NULL");
1041
1042 JM()->mIsModuleFinalized = true;
1043
1044 return pFunc;
1045 }
1046
1047 PFN_VERTEX_FUNC
1048 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
1049 {
1050 if (!ctx->vs->pipe.tokens)
1051 return NULL;
1052
1053 BuilderSWR builder(
1054 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
1055 "VS");
1056 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key);
1057
1058 ctx->vs->map.insert(std::make_pair(key, std::make_unique<VariantVS>(builder.gallivm, func)));
1059 return func;
1060 }
1061
1062 unsigned
1063 swr_so_adjust_attrib(unsigned in_attrib,
1064 swr_vertex_shader *swr_vs)
1065 {
1066 ubyte semantic_name;
1067 unsigned attrib;
1068
1069 attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
1070
1071 if (swr_vs) {
1072 semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
1073 if (semantic_name == TGSI_SEMANTIC_POSITION) {
1074 attrib = VERTEX_POSITION_SLOT;
1075 } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
1076 attrib = VERTEX_SGV_SLOT;
1077 } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
1078 attrib = VERTEX_SGV_SLOT;
1079 } else {
1080 if (swr_vs->info.base.writes_position) {
1081 attrib--;
1082 }
1083 }
1084 }
1085
1086 return attrib;
1087 }
1088
1089 static unsigned
1090 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
1091 {
1092 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
1093 if ((info->output_semantic_name[i] == name)
1094 && (info->output_semantic_index[i] == index)) {
1095 return i;
1096 }
1097 }
1098
1099 return 0xFFFFFFFF;
1100 }
1101
1102 PFN_PIXEL_KERNEL
1103 BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key)
1104 {
1105 struct swr_fragment_shader *swr_fs = ctx->fs;
1106
1107 struct tgsi_shader_info *pPrevShader;
1108 if (ctx->gs)
1109 pPrevShader = &ctx->gs->info.base;
1110 else
1111 pPrevShader = &ctx->vs->info.base;
1112
1113 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
1114 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
1115
1116 memset(inputs, 0, sizeof(inputs));
1117 memset(outputs, 0, sizeof(outputs));
1118
1119 struct lp_build_sampler_soa *sampler = NULL;
1120
1121 AttrBuilder attrBuilder;
1122 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float));
1123
1124 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0),
1125 PointerType::get(mInt8Ty, 0),
1126 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
1127 FunctionType *funcType =
1128 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false);
1129
1130 auto pFunction = Function::Create(funcType,
1131 GlobalValue::ExternalLinkage,
1132 "FS",
1133 JM()->mpCurrentModule);
1134 #if LLVM_VERSION_MAJOR < 5
1135 AttributeSet attrSet = AttributeSet::get(
1136 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder);
1137 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet);
1138 #else
1139 pFunction->addAttributes(AttributeList::FunctionIndex, attrBuilder);
1140 #endif
1141
1142 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction);
1143 IRB()->SetInsertPoint(block);
1144 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block));
1145
1146 auto args = pFunction->arg_begin();
1147 Value *hPrivateData = &*args++;
1148 hPrivateData->setName("hPrivateData");
1149 Value *pWorkerData = &*args++;
1150 pWorkerData->setName("pWorkerData");
1151 Value *pPS = &*args++;
1152 pPS->setName("psCtx");
1153
1154 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS});
1155 consts_ptr->setName("fs_constants");
1156 Value *const_sizes_ptr =
1157 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS});
1158 const_sizes_ptr->setName("num_fs_constants");
1159
1160 // load *pAttribs, *pPerspAttribs
1161 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs");
1162 Value *pPerspAttribs =
1163 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs");
1164
1165 swr_fs->constantMask = 0;
1166 swr_fs->flatConstantMask = 0;
1167 swr_fs->pointSpriteMask = 0;
1168
1169 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) {
1170 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib];
1171 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib];
1172 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib];
1173
1174 if (!mask)
1175 continue;
1176
1177 // load i,j
1178 Value *vi = nullptr, *vj = nullptr;
1179 switch (interpLoc) {
1180 case TGSI_INTERPOLATE_LOC_CENTER:
1181 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i");
1182 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j");
1183 break;
1184 case TGSI_INTERPOLATE_LOC_CENTROID:
1185 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i");
1186 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j");
1187 break;
1188 case TGSI_INTERPOLATE_LOC_SAMPLE:
1189 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i");
1190 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j");
1191 break;
1192 }
1193
1194 // load/compute w
1195 Value *vw = nullptr, *pAttribs;
1196 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
1197 interpMode == TGSI_INTERPOLATE_COLOR) {
1198 pAttribs = pPerspAttribs;
1199 switch (interpLoc) {
1200 case TGSI_INTERPOLATE_LOC_CENTER:
1201 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}));
1202 break;
1203 case TGSI_INTERPOLATE_LOC_CENTROID:
1204 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid}));
1205 break;
1206 case TGSI_INTERPOLATE_LOC_SAMPLE:
1207 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample}));
1208 break;
1209 }
1210 } else {
1211 pAttribs = pRawAttribs;
1212 vw = VIMMED1(1.f);
1213 }
1214
1215 vw->setName("w");
1216
1217 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib];
1218 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib];
1219
1220 if (semantic_name == TGSI_SEMANTIC_FACE) {
1221 Value *ff =
1222 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty);
1223 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f));
1224 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace");
1225
1226 inputs[attrib][0] = wrap(ff);
1227 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1228 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1229 inputs[attrib][3] = wrap(VIMMED1(1.0f));
1230 continue;
1231 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord
1232 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] ==
1233 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) {
1234 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX"));
1235 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY"));
1236 } else {
1237 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX"));
1238 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY"));
1239 }
1240 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ"));
1241 inputs[attrib][3] =
1242 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW"));
1243 continue;
1244 } else if (semantic_name == TGSI_SEMANTIC_LAYER) { // gl_Layer
1245 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_renderTargetArrayIndex});
1246 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vRenderTargetArrayIndex");
1247 inputs[attrib][0] = wrap(ff);
1248 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1249 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1250 inputs[attrib][3] = wrap(VIMMED1(0.0f));
1251 continue;
1252 } else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { // gl_ViewportIndex
1253 Value *ff = LOAD(pPS, {0, SWR_PS_CONTEXT_viewportIndex});
1254 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vViewportIndex");
1255 inputs[attrib][0] = wrap(ff);
1256 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1257 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1258 inputs[attrib][3] = wrap(VIMMED1(0.0f));
1259 continue;
1260 }
1261 unsigned linkedAttrib =
1262 locate_linkage(semantic_name, semantic_idx, pPrevShader) - 1;
1263
1264 uint32_t extraAttribs = 0;
1265 if (semantic_name == TGSI_SEMANTIC_PRIMID && !ctx->gs) {
1266 /* non-gs generated primID - need to grab from swizzleMap override */
1267 linkedAttrib = pPrevShader->num_outputs - 1;
1268 swr_fs->constantMask |= 1 << linkedAttrib;
1269 extraAttribs++;
1270 } else if (semantic_name == TGSI_SEMANTIC_GENERIC &&
1271 key.sprite_coord_enable & (1 << semantic_idx)) {
1272 /* we add an extra attrib to the backendState in swr_update_derived. */
1273 linkedAttrib = pPrevShader->num_outputs + extraAttribs - 1;
1274 swr_fs->pointSpriteMask |= (1 << linkedAttrib);
1275 extraAttribs++;
1276 } else if (linkedAttrib == 0xFFFFFFFF) {
1277 inputs[attrib][0] = wrap(VIMMED1(0.0f));
1278 inputs[attrib][1] = wrap(VIMMED1(0.0f));
1279 inputs[attrib][2] = wrap(VIMMED1(0.0f));
1280 inputs[attrib][3] = wrap(VIMMED1(1.0f));
1281 /* If we're reading in color and 2-sided lighting is enabled, we have
1282 * to keep going.
1283 */
1284 if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside)
1285 continue;
1286 } else {
1287 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1288 swr_fs->constantMask |= 1 << linkedAttrib;
1289 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1290 swr_fs->flatConstantMask |= 1 << linkedAttrib;
1291 }
1292 }
1293
1294 unsigned bcolorAttrib = 0xFFFFFFFF;
1295 Value *offset = NULL;
1296 if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) {
1297 bcolorAttrib = locate_linkage(
1298 TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader) - 1;
1299 /* Neither front nor back colors were available. Nothing to load. */
1300 if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF)
1301 continue;
1302 /* If there is no front color, just always use the back color. */
1303 if (linkedAttrib == 0xFFFFFFFF)
1304 linkedAttrib = bcolorAttrib;
1305
1306 if (bcolorAttrib != 0xFFFFFFFF) {
1307 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1308 swr_fs->constantMask |= 1 << bcolorAttrib;
1309 } else if (interpMode == TGSI_INTERPOLATE_COLOR) {
1310 swr_fs->flatConstantMask |= 1 << bcolorAttrib;
1311 }
1312
1313 unsigned diff = 12 * (bcolorAttrib - linkedAttrib);
1314
1315 if (diff) {
1316 Value *back =
1317 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace");
1318
1319 offset = MUL(back, C(diff));
1320 offset->setName("offset");
1321 }
1322 }
1323 }
1324
1325 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1326 if (mask & (1 << channel)) {
1327 Value *indexA = C(linkedAttrib * 12 + channel);
1328 Value *indexB = C(linkedAttrib * 12 + channel + 4);
1329 Value *indexC = C(linkedAttrib * 12 + channel + 8);
1330
1331 if (offset) {
1332 indexA = ADD(indexA, offset);
1333 indexB = ADD(indexB, offset);
1334 indexC = ADD(indexC, offset);
1335 }
1336
1337 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA)));
1338 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB)));
1339 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC)));
1340
1341 if (interpMode == TGSI_INTERPOLATE_CONSTANT) {
1342 inputs[attrib][channel] = wrap(va);
1343 } else {
1344 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj);
1345
1346 vc = FMUL(vk, vc);
1347
1348 Value *interp = FMUL(va, vi);
1349 Value *interp1 = FMUL(vb, vj);
1350 interp = FADD(interp, interp1);
1351 interp = FADD(interp, vc);
1352 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE ||
1353 interpMode == TGSI_INTERPOLATE_COLOR)
1354 interp = FMUL(interp, vw);
1355 inputs[attrib][channel] = wrap(interp);
1356 }
1357 }
1358 }
1359 }
1360
1361 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT);
1362
1363 struct lp_bld_tgsi_system_values system_values;
1364 memset(&system_values, 0, sizeof(system_values));
1365
1366 struct lp_build_mask_context mask;
1367 bool uses_mask = false;
1368
1369 if (swr_fs->info.base.uses_kill ||
1370 key.poly_stipple_enable) {
1371 Value *vActiveMask = NULL;
1372 if (swr_fs->info.base.uses_kill) {
1373 vActiveMask = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask");
1374 }
1375 if (key.poly_stipple_enable) {
1376 // first get fragment xy coords and clip to stipple bounds
1377 Value *vXf = LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL});
1378 Value *vYf = LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL});
1379 Value *vXu = FP_TO_UI(vXf, mSimdInt32Ty);
1380 Value *vYu = FP_TO_UI(vYf, mSimdInt32Ty);
1381
1382 // stipple pattern is 32x32, which means that one line of stipple
1383 // is stored in one word:
1384 // vXstipple is bit offset inside 32-bit stipple word
1385 // vYstipple is word index is stipple array
1386 Value *vXstipple = AND(vXu, VIMMED1(0x1f)); // & (32-1)
1387 Value *vYstipple = AND(vYu, VIMMED1(0x1f)); // & (32-1)
1388
1389 // grab stipple pattern base address
1390 Value *stipplePtr = GEP(hPrivateData, {0, swr_draw_context_polyStipple, 0});
1391 stipplePtr = BITCAST(stipplePtr, mInt8PtrTy);
1392
1393 // peform a gather to grab stipple words for each lane
1394 Value *vStipple = GATHERDD(VUNDEF_I(), stipplePtr, vYstipple,
1395 VIMMED1(0xffffffff), 4);
1396
1397 // create a mask with one bit corresponding to the x stipple
1398 // and AND it with the pattern, to see if we have a bit
1399 Value *vBitMask = LSHR(VIMMED1(0x80000000), vXstipple);
1400 Value *vStippleMask = AND(vStipple, vBitMask);
1401 vStippleMask = ICMP_NE(vStippleMask, VIMMED1(0));
1402 vStippleMask = VMASK(vStippleMask);
1403
1404 if (swr_fs->info.base.uses_kill) {
1405 vActiveMask = AND(vActiveMask, vStippleMask);
1406 } else {
1407 vActiveMask = vStippleMask;
1408 }
1409 }
1410 lp_build_mask_begin(
1411 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(vActiveMask));
1412 uses_mask = true;
1413 }
1414
1415 struct lp_build_tgsi_params params;
1416 memset(&params, 0, sizeof(params));
1417 params.type = lp_type_float_vec(32, 32 * 8);
1418 params.mask = uses_mask ? &mask : NULL;
1419 params.consts_ptr = wrap(consts_ptr);
1420 params.const_sizes_ptr = wrap(const_sizes_ptr);
1421 params.system_values = &system_values;
1422 params.inputs = inputs;
1423 params.context_ptr = wrap(hPrivateData);
1424 params.sampler = sampler;
1425 params.info = &swr_fs->info.base;
1426
1427 lp_build_tgsi_soa(gallivm,
1428 swr_fs->pipe.tokens,
1429 &params,
1430 outputs);
1431
1432 sampler->destroy(sampler);
1433
1434 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1435
1436 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs;
1437 attrib++) {
1438 switch (swr_fs->info.base.output_semantic_name[attrib]) {
1439 case TGSI_SEMANTIC_POSITION: {
1440 // write z
1441 LLVMValueRef outZ =
1442 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], "");
1443 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ});
1444 break;
1445 }
1446 case TGSI_SEMANTIC_COLOR: {
1447 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) {
1448 if (!outputs[attrib][channel])
1449 continue;
1450
1451 LLVMValueRef out =
1452 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], "");
1453 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
1454 swr_fs->info.base.output_semantic_index[attrib] == 0) {
1455 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) {
1456 STORE(unwrap(out),
1457 pPS,
1458 {0, SWR_PS_CONTEXT_shaded, rt, channel});
1459 }
1460 } else {
1461 STORE(unwrap(out),
1462 pPS,
1463 {0,
1464 SWR_PS_CONTEXT_shaded,
1465 swr_fs->info.base.output_semantic_index[attrib],
1466 channel});
1467 }
1468 }
1469 break;
1470 }
1471 default: {
1472 fprintf(stderr,
1473 "unknown output from FS %s[%d]\n",
1474 tgsi_semantic_names[swr_fs->info.base
1475 .output_semantic_name[attrib]],
1476 swr_fs->info.base.output_semantic_index[attrib]);
1477 break;
1478 }
1479 }
1480 }
1481
1482 LLVMValueRef mask_result = 0;
1483 if (uses_mask) {
1484 mask_result = lp_build_mask_end(&mask);
1485 }
1486
1487 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder)));
1488
1489 if (uses_mask) {
1490 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask});
1491 }
1492
1493 RET_VOID();
1494
1495 gallivm_verify_function(gallivm, wrap(pFunction));
1496
1497 gallivm_compile_module(gallivm);
1498
1499 // after the gallivm passes, we have to lower the core's intrinsics
1500 llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule);
1501 lowerPass.add(createLowerX86Pass(this));
1502 lowerPass.run(*pFunction);
1503
1504 PFN_PIXEL_KERNEL kernel =
1505 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
1506 debug_printf("frag shader %p\n", kernel);
1507 assert(kernel && "Error: FragShader = NULL");
1508
1509 JM()->mIsModuleFinalized = true;
1510
1511 return kernel;
1512 }
1513
1514 PFN_PIXEL_KERNEL
1515 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key)
1516 {
1517 if (!ctx->fs->pipe.tokens)
1518 return NULL;
1519
1520 BuilderSWR builder(
1521 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr),
1522 "FS");
1523 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key);
1524
1525 ctx->fs->map.insert(std::make_pair(key, std::make_unique<VariantFS>(builder.gallivm, func)));
1526 return func;
1527 }