1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ***************************************************************************/
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
27 #include "JitManager.h"
28 #include "llvm-c/Core.h"
29 #include "llvm/Support/CBindingWrapping.h"
30 #pragma pop_macro("DEBUG")
33 #include "state_llvm.h"
36 #include "tgsi/tgsi_strings.h"
37 #include "util/u_format.h"
38 #include "gallivm/lp_bld_init.h"
39 #include "gallivm/lp_bld_flow.h"
40 #include "gallivm/lp_bld_struct.h"
41 #include "gallivm/lp_bld_tgsi.h"
43 #include "swr_context.h"
44 #include "swr_context_llvm.h"
45 #include "swr_resource.h"
46 #include "swr_state.h"
47 #include "swr_screen.h"
49 using namespace SwrJit
;
52 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
);
54 bool operator==(const swr_jit_fs_key
&lhs
, const swr_jit_fs_key
&rhs
)
56 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
59 bool operator==(const swr_jit_vs_key
&lhs
, const swr_jit_vs_key
&rhs
)
61 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
64 bool operator==(const swr_jit_fetch_key
&lhs
, const swr_jit_fetch_key
&rhs
)
66 return !memcmp(&lhs
, &rhs
, sizeof(lhs
));
70 swr_generate_sampler_key(const struct lp_tgsi_info
&info
,
71 struct swr_context
*ctx
,
73 struct swr_jit_sampler_key
&key
)
75 key
.nr_samplers
= info
.base
.file_max
[TGSI_FILE_SAMPLER
] + 1;
77 for (unsigned i
= 0; i
< key
.nr_samplers
; i
++) {
78 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
79 lp_sampler_static_sampler_state(
80 &key
.sampler
[i
].sampler_state
,
81 ctx
->samplers
[shader_type
][i
]);
86 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes
87 * are dx10-style? Can't really have mixed opcodes, at least not
88 * if we want to skip the holes here (without rescanning tgsi).
90 if (info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] != -1) {
91 key
.nr_sampler_views
=
92 info
.base
.file_max
[TGSI_FILE_SAMPLER_VIEW
] + 1;
93 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
94 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER_VIEW
] & (1 << i
)) {
95 const struct pipe_sampler_view
*view
=
96 ctx
->sampler_views
[shader_type
][i
];
97 lp_sampler_static_texture_state(
98 &key
.sampler
[i
].texture_state
, view
);
100 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
101 const struct util_format_description
*desc
=
102 util_format_description(view
->format
);
103 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
104 !util_format_has_depth(desc
))
105 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
110 key
.nr_sampler_views
= key
.nr_samplers
;
111 for (unsigned i
= 0; i
< key
.nr_sampler_views
; i
++) {
112 if (info
.base
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
)) {
113 const struct pipe_sampler_view
*view
=
114 ctx
->sampler_views
[shader_type
][i
];
115 lp_sampler_static_texture_state(
116 &key
.sampler
[i
].texture_state
, view
);
118 struct swr_resource
*swr_res
= swr_resource(view
->texture
);
119 const struct util_format_description
*desc
=
120 util_format_description(view
->format
);
121 if (swr_res
->has_depth
&& swr_res
->has_stencil
&&
122 !util_format_has_depth(desc
))
123 key
.sampler
[i
].texture_state
.format
= PIPE_FORMAT_S8_UINT
;
131 swr_generate_fs_key(struct swr_jit_fs_key
&key
,
132 struct swr_context
*ctx
,
133 swr_fragment_shader
*swr_fs
)
135 memset(&key
, 0, sizeof(key
));
137 key
.nr_cbufs
= ctx
->framebuffer
.nr_cbufs
;
138 key
.light_twoside
= ctx
->rasterizer
->light_twoside
;
139 key
.sprite_coord_enable
= ctx
->rasterizer
->sprite_coord_enable
;
140 memcpy(&key
.vs_output_semantic_name
,
141 &ctx
->vs
->info
.base
.output_semantic_name
,
142 sizeof(key
.vs_output_semantic_name
));
143 memcpy(&key
.vs_output_semantic_idx
,
144 &ctx
->vs
->info
.base
.output_semantic_index
,
145 sizeof(key
.vs_output_semantic_idx
));
147 swr_generate_sampler_key(swr_fs
->info
, ctx
, PIPE_SHADER_FRAGMENT
, key
);
151 swr_generate_vs_key(struct swr_jit_vs_key
&key
,
152 struct swr_context
*ctx
,
153 swr_vertex_shader
*swr_vs
)
155 memset(&key
, 0, sizeof(key
));
157 key
.clip_plane_mask
=
158 swr_vs
->info
.base
.clipdist_writemask
?
159 swr_vs
->info
.base
.clipdist_writemask
& ctx
->rasterizer
->clip_plane_enable
:
160 ctx
->rasterizer
->clip_plane_enable
;
162 swr_generate_sampler_key(swr_vs
->info
, ctx
, PIPE_SHADER_VERTEX
, key
);
166 swr_generate_fetch_key(struct swr_jit_fetch_key
&key
,
167 struct swr_vertex_element_state
*velems
)
169 memset(&key
, 0, sizeof(key
));
171 key
.fsState
= velems
->fsState
;
174 struct BuilderSWR
: public Builder
{
175 BuilderSWR(JitManager
*pJitMgr
, const char *pName
)
178 pJitMgr
->SetupNewModule();
179 gallivm
= gallivm_create(pName
, wrap(&JM()->mContext
));
180 pJitMgr
->mpCurrentModule
= unwrap(gallivm
->module
);
184 gallivm_free_ir(gallivm
);
187 struct gallivm_state
*gallivm
;
188 PFN_VERTEX_FUNC
CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
);
189 PFN_PIXEL_KERNEL
CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
);
193 BuilderSWR::CompileVS(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
195 struct swr_vertex_shader
*swr_vs
= ctx
->vs
;
197 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
198 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
200 memset(outputs
, 0, sizeof(outputs
));
202 AttrBuilder attrBuilder
;
203 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
204 AttributeSet attrSet
= AttributeSet::get(
205 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
207 std::vector
<Type
*> vsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
208 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)};
209 FunctionType
*vsFuncType
=
210 FunctionType::get(Type::getVoidTy(JM()->mContext
), vsArgs
, false);
212 // create new vertex shader function
213 auto pFunction
= Function::Create(vsFuncType
,
214 GlobalValue::ExternalLinkage
,
216 JM()->mpCurrentModule
);
217 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
219 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
220 IRB()->SetInsertPoint(block
);
221 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
223 auto argitr
= pFunction
->arg_begin();
224 Value
*hPrivateData
= &*argitr
++;
225 hPrivateData
->setName("hPrivateData");
226 Value
*pVsCtx
= &*argitr
++;
227 pVsCtx
->setName("vsCtx");
229 Value
*consts_ptr
= GEP(hPrivateData
, {C(0), C(swr_draw_context_constantVS
)});
231 consts_ptr
->setName("vs_constants");
232 Value
*const_sizes_ptr
=
233 GEP(hPrivateData
, {0, swr_draw_context_num_constantsVS
});
234 const_sizes_ptr
->setName("num_vs_constants");
236 Value
*vtxInput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVin
});
238 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
239 const unsigned mask
= swr_vs
->info
.base
.input_usage_mask
[attrib
];
240 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
241 if (mask
& (1 << channel
)) {
242 inputs
[attrib
][channel
] =
243 wrap(LOAD(vtxInput
, {0, 0, attrib
, channel
}));
248 struct lp_build_sampler_soa
*sampler
=
249 swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_VERTEX
);
251 struct lp_bld_tgsi_system_values system_values
;
252 memset(&system_values
, 0, sizeof(system_values
));
253 system_values
.instance_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_InstanceID
}));
254 system_values
.vertex_id
= wrap(LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_VertexID
}));
256 lp_build_tgsi_soa(gallivm
,
258 lp_type_float_vec(32, 32 * 8),
261 wrap(const_sizes_ptr
),
265 wrap(hPrivateData
), // (sampler context)
269 NULL
); // geometry shader face
271 sampler
->destroy(sampler
);
273 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
275 Value
*vtxOutput
= LOAD(pVsCtx
, {0, SWR_VS_CONTEXT_pVout
});
277 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
278 for (uint32_t attrib
= 0; attrib
< PIPE_MAX_SHADER_OUTPUTS
; attrib
++) {
279 if (!outputs
[attrib
][channel
])
282 Value
*val
= LOAD(unwrap(outputs
[attrib
][channel
]));
284 uint32_t outSlot
= attrib
;
285 if (swr_vs
->info
.base
.output_semantic_name
[attrib
] == TGSI_SEMANTIC_PSIZE
)
286 outSlot
= VERTEX_POINT_SIZE_SLOT
;
287 STORE(val
, vtxOutput
, {0, 0, outSlot
, channel
});
291 if (ctx
->rasterizer
->clip_plane_enable
||
292 swr_vs
->info
.base
.culldist_writemask
) {
293 unsigned clip_mask
= ctx
->rasterizer
->clip_plane_enable
;
296 if (swr_vs
->info
.base
.writes_clipvertex
) {
297 cv
= 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX
, 0,
300 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
301 if (swr_vs
->info
.base
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
&&
302 swr_vs
->info
.base
.output_semantic_index
[i
] == 0) {
308 LLVMValueRef cx
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][0], "");
309 LLVMValueRef cy
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][1], "");
310 LLVMValueRef cz
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][2], "");
311 LLVMValueRef cw
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][3], "");
313 for (unsigned val
= 0; val
< PIPE_MAX_CLIP_PLANES
; val
++) {
314 // clip distance overrides user clip planes
315 if ((swr_vs
->info
.base
.clipdist_writemask
& clip_mask
& (1 << val
)) ||
316 ((swr_vs
->info
.base
.culldist_writemask
<< swr_vs
->info
.base
.num_written_clipdistance
) & (1 << val
))) {
317 unsigned cv
= 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST
, val
< 4 ? 0 : 1,
320 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
], "");
321 STORE(unwrap(dist
), vtxOutput
, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
});
323 LLVMValueRef dist
= LLVMBuildLoad(gallivm
->builder
, outputs
[cv
][val
- 4], "");
324 STORE(unwrap(dist
), vtxOutput
, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4});
329 if (!(clip_mask
& (1 << val
)))
332 Value
*px
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 0}));
333 Value
*py
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 1}));
334 Value
*pz
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 2}));
335 Value
*pw
= LOAD(GEP(hPrivateData
, {0, swr_draw_context_userClipPlanes
, val
, 3}));
336 Value
*dist
= FADD(FMUL(unwrap(cx
), VBROADCAST(px
)),
337 FADD(FMUL(unwrap(cy
), VBROADCAST(py
)),
338 FADD(FMUL(unwrap(cz
), VBROADCAST(pz
)),
339 FMUL(unwrap(cw
), VBROADCAST(pw
)))));
342 STORE(dist
, vtxOutput
, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT
, val
});
344 STORE(dist
, vtxOutput
, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT
, val
- 4});
350 gallivm_verify_function(gallivm
, wrap(pFunction
));
351 gallivm_compile_module(gallivm
);
353 // lp_debug_dump_value(func);
355 PFN_VERTEX_FUNC pFunc
=
356 (PFN_VERTEX_FUNC
)gallivm_jit_function(gallivm
, wrap(pFunction
));
358 debug_printf("vert shader %p\n", pFunc
);
359 assert(pFunc
&& "Error: VertShader = NULL");
361 JM()->mIsModuleFinalized
= true;
367 swr_compile_vs(struct swr_context
*ctx
, swr_jit_vs_key
&key
)
370 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
372 PFN_VERTEX_FUNC func
= builder
.CompileVS(ctx
, key
);
374 ctx
->vs
->map
.insert(std::make_pair(key
, make_unique
<VariantVS
>(builder
.gallivm
, func
)));
379 locate_linkage(ubyte name
, ubyte index
, struct tgsi_shader_info
*info
)
381 for (int i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; i
++) {
382 if ((info
->output_semantic_name
[i
] == name
)
383 && (info
->output_semantic_index
[i
] == index
)) {
384 return i
- 1; // position is not part of the linkage
392 BuilderSWR::CompileFS(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
394 struct swr_fragment_shader
*swr_fs
= ctx
->fs
;
396 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][TGSI_NUM_CHANNELS
];
397 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][TGSI_NUM_CHANNELS
];
399 memset(inputs
, 0, sizeof(inputs
));
400 memset(outputs
, 0, sizeof(outputs
));
402 struct lp_build_sampler_soa
*sampler
= NULL
;
404 AttrBuilder attrBuilder
;
405 attrBuilder
.addStackAlignmentAttr(JM()->mVWidth
* sizeof(float));
406 AttributeSet attrSet
= AttributeSet::get(
407 JM()->mContext
, AttributeSet::FunctionIndex
, attrBuilder
);
409 std::vector
<Type
*> fsArgs
{PointerType::get(Gen_swr_draw_context(JM()), 0),
410 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)};
411 FunctionType
*funcType
=
412 FunctionType::get(Type::getVoidTy(JM()->mContext
), fsArgs
, false);
414 auto pFunction
= Function::Create(funcType
,
415 GlobalValue::ExternalLinkage
,
417 JM()->mpCurrentModule
);
418 pFunction
->addAttributes(AttributeSet::FunctionIndex
, attrSet
);
420 BasicBlock
*block
= BasicBlock::Create(JM()->mContext
, "entry", pFunction
);
421 IRB()->SetInsertPoint(block
);
422 LLVMPositionBuilderAtEnd(gallivm
->builder
, wrap(block
));
424 auto args
= pFunction
->arg_begin();
425 Value
*hPrivateData
= &*args
++;
426 hPrivateData
->setName("hPrivateData");
427 Value
*pPS
= &*args
++;
428 pPS
->setName("psCtx");
430 Value
*consts_ptr
= GEP(hPrivateData
, {0, swr_draw_context_constantFS
});
431 consts_ptr
->setName("fs_constants");
432 Value
*const_sizes_ptr
=
433 GEP(hPrivateData
, {0, swr_draw_context_num_constantsFS
});
434 const_sizes_ptr
->setName("num_fs_constants");
436 // load *pAttribs, *pPerspAttribs
437 Value
*pRawAttribs
= LOAD(pPS
, {0, SWR_PS_CONTEXT_pAttribs
}, "pRawAttribs");
438 Value
*pPerspAttribs
=
439 LOAD(pPS
, {0, SWR_PS_CONTEXT_pPerspAttribs
}, "pPerspAttribs");
441 swr_fs
->constantMask
= 0;
442 swr_fs
->flatConstantMask
= 0;
443 swr_fs
->pointSpriteMask
= 0;
445 for (int attrib
= 0; attrib
< PIPE_MAX_SHADER_INPUTS
; attrib
++) {
446 const unsigned mask
= swr_fs
->info
.base
.input_usage_mask
[attrib
];
447 const unsigned interpMode
= swr_fs
->info
.base
.input_interpolate
[attrib
];
448 const unsigned interpLoc
= swr_fs
->info
.base
.input_interpolate_loc
[attrib
];
454 Value
*vi
= nullptr, *vj
= nullptr;
456 case TGSI_INTERPOLATE_LOC_CENTER
:
457 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_center
}, "i");
458 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_center
}, "j");
460 case TGSI_INTERPOLATE_LOC_CENTROID
:
461 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_centroid
}, "i");
462 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_centroid
}, "j");
464 case TGSI_INTERPOLATE_LOC_SAMPLE
:
465 vi
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vI
, PixelPositions_sample
}, "i");
466 vj
= LOAD(pPS
, {0, SWR_PS_CONTEXT_vJ
, PixelPositions_sample
}, "j");
471 Value
*vw
= nullptr, *pAttribs
;
472 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
473 interpMode
== TGSI_INTERPOLATE_COLOR
) {
474 pAttribs
= pPerspAttribs
;
476 case TGSI_INTERPOLATE_LOC_CENTER
:
477 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}));
479 case TGSI_INTERPOLATE_LOC_CENTROID
:
480 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_centroid
}));
482 case TGSI_INTERPOLATE_LOC_SAMPLE
:
483 vw
= VRCP(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_sample
}));
487 pAttribs
= pRawAttribs
;
493 ubyte semantic_name
= swr_fs
->info
.base
.input_semantic_name
[attrib
];
494 ubyte semantic_idx
= swr_fs
->info
.base
.input_semantic_index
[attrib
];
496 if (semantic_name
== TGSI_SEMANTIC_FACE
) {
498 UI_TO_FP(LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), mFP32Ty
);
499 ff
= FSUB(FMUL(ff
, C(2.0f
)), C(1.0f
));
500 ff
= VECTOR_SPLAT(JM()->mVWidth
, ff
, "vFrontFace");
502 inputs
[attrib
][0] = wrap(ff
);
503 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
504 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
505 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
507 } else if (semantic_name
== TGSI_SEMANTIC_POSITION
) { // gl_FragCoord
508 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER
] ==
509 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER
) {
510 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_center
}, "vX"));
511 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_center
}, "vY"));
513 inputs
[attrib
][0] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vX
, PixelPositions_UL
}, "vX"));
514 inputs
[attrib
][1] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vY
, PixelPositions_UL
}, "vY"));
516 inputs
[attrib
][2] = wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vZ
}, "vZ"));
518 wrap(LOAD(pPS
, {0, SWR_PS_CONTEXT_vOneOverW
, PixelPositions_center
}, "vOneOverW"));
520 } else if (semantic_name
== TGSI_SEMANTIC_PRIMID
) {
521 Value
*primID
= LOAD(pPS
, {0, SWR_PS_CONTEXT_primID
}, "primID");
522 inputs
[attrib
][0] = wrap(VECTOR_SPLAT(JM()->mVWidth
, primID
));
523 inputs
[attrib
][1] = wrap(VIMMED1(0));
524 inputs
[attrib
][2] = wrap(VIMMED1(0));
525 inputs
[attrib
][3] = wrap(VIMMED1(0));
529 unsigned linkedAttrib
=
530 locate_linkage(semantic_name
, semantic_idx
, &ctx
->vs
->info
.base
);
531 if (semantic_name
== TGSI_SEMANTIC_GENERIC
&&
532 key
.sprite_coord_enable
& (1 << semantic_idx
)) {
533 /* we add an extra attrib to the backendState in swr_update_derived. */
534 linkedAttrib
= ctx
->vs
->info
.base
.num_outputs
- 1;
535 swr_fs
->pointSpriteMask
|= (1 << linkedAttrib
);
536 } else if (linkedAttrib
== 0xFFFFFFFF) {
537 inputs
[attrib
][0] = wrap(VIMMED1(0.0f
));
538 inputs
[attrib
][1] = wrap(VIMMED1(0.0f
));
539 inputs
[attrib
][2] = wrap(VIMMED1(0.0f
));
540 inputs
[attrib
][3] = wrap(VIMMED1(1.0f
));
541 /* If we're reading in color and 2-sided lighting is enabled, we have
544 if (semantic_name
!= TGSI_SEMANTIC_COLOR
|| !key
.light_twoside
)
547 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
548 swr_fs
->constantMask
|= 1 << linkedAttrib
;
549 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
550 swr_fs
->flatConstantMask
|= 1 << linkedAttrib
;
554 unsigned bcolorAttrib
= 0xFFFFFFFF;
555 Value
*offset
= NULL
;
556 if (semantic_name
== TGSI_SEMANTIC_COLOR
&& key
.light_twoside
) {
557 bcolorAttrib
= locate_linkage(
558 TGSI_SEMANTIC_BCOLOR
, semantic_idx
, &ctx
->vs
->info
.base
);
559 /* Neither front nor back colors were available. Nothing to load. */
560 if (bcolorAttrib
== 0xFFFFFFFF && linkedAttrib
== 0xFFFFFFFF)
562 /* If there is no front color, just always use the back color. */
563 if (linkedAttrib
== 0xFFFFFFFF)
564 linkedAttrib
= bcolorAttrib
;
566 if (bcolorAttrib
!= 0xFFFFFFFF) {
567 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
568 swr_fs
->constantMask
|= 1 << bcolorAttrib
;
569 } else if (interpMode
== TGSI_INTERPOLATE_COLOR
) {
570 swr_fs
->flatConstantMask
|= 1 << bcolorAttrib
;
573 unsigned diff
= 12 * (bcolorAttrib
- linkedAttrib
);
577 XOR(C(1), LOAD(pPS
, {0, SWR_PS_CONTEXT_frontFace
}), "backFace");
579 offset
= MUL(back
, C(diff
));
580 offset
->setName("offset");
585 for (int channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
586 if (mask
& (1 << channel
)) {
587 Value
*indexA
= C(linkedAttrib
* 12 + channel
);
588 Value
*indexB
= C(linkedAttrib
* 12 + channel
+ 4);
589 Value
*indexC
= C(linkedAttrib
* 12 + channel
+ 8);
592 indexA
= ADD(indexA
, offset
);
593 indexB
= ADD(indexB
, offset
);
594 indexC
= ADD(indexC
, offset
);
597 Value
*va
= VBROADCAST(LOAD(GEP(pAttribs
, indexA
)));
598 Value
*vb
= VBROADCAST(LOAD(GEP(pAttribs
, indexB
)));
599 Value
*vc
= VBROADCAST(LOAD(GEP(pAttribs
, indexC
)));
601 if (interpMode
== TGSI_INTERPOLATE_CONSTANT
) {
602 inputs
[attrib
][channel
] = wrap(va
);
604 Value
*vk
= FSUB(FSUB(VIMMED1(1.0f
), vi
), vj
);
608 Value
*interp
= FMUL(va
, vi
);
609 Value
*interp1
= FMUL(vb
, vj
);
610 interp
= FADD(interp
, interp1
);
611 interp
= FADD(interp
, vc
);
612 if (interpMode
== TGSI_INTERPOLATE_PERSPECTIVE
||
613 interpMode
== TGSI_INTERPOLATE_COLOR
)
614 interp
= FMUL(interp
, vw
);
615 inputs
[attrib
][channel
] = wrap(interp
);
621 sampler
= swr_sampler_soa_create(key
.sampler
, PIPE_SHADER_FRAGMENT
);
623 struct lp_bld_tgsi_system_values system_values
;
624 memset(&system_values
, 0, sizeof(system_values
));
626 struct lp_build_mask_context mask
;
628 if (swr_fs
->info
.base
.uses_kill
) {
629 Value
*mask_val
= LOAD(pPS
, {0, SWR_PS_CONTEXT_activeMask
}, "activeMask");
631 &mask
, gallivm
, lp_type_float_vec(32, 32 * 8), wrap(mask_val
));
634 lp_build_tgsi_soa(gallivm
,
636 lp_type_float_vec(32, 32 * 8),
637 swr_fs
->info
.base
.uses_kill
? &mask
: NULL
, // mask
639 wrap(const_sizes_ptr
),
647 NULL
); // geometry shader face
649 sampler
->destroy(sampler
);
651 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
653 for (uint32_t attrib
= 0; attrib
< swr_fs
->info
.base
.num_outputs
;
655 switch (swr_fs
->info
.base
.output_semantic_name
[attrib
]) {
656 case TGSI_SEMANTIC_POSITION
: {
659 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][2], "");
660 STORE(unwrap(outZ
), pPS
, {0, SWR_PS_CONTEXT_vZ
});
663 case TGSI_SEMANTIC_COLOR
: {
664 for (uint32_t channel
= 0; channel
< TGSI_NUM_CHANNELS
; channel
++) {
665 if (!outputs
[attrib
][channel
])
669 LLVMBuildLoad(gallivm
->builder
, outputs
[attrib
][channel
], "");
670 if (swr_fs
->info
.base
.properties
[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
] &&
671 swr_fs
->info
.base
.output_semantic_index
[attrib
] == 0) {
672 for (uint32_t rt
= 0; rt
< key
.nr_cbufs
; rt
++) {
675 {0, SWR_PS_CONTEXT_shaded
, rt
, channel
});
681 SWR_PS_CONTEXT_shaded
,
682 swr_fs
->info
.base
.output_semantic_index
[attrib
],
690 "unknown output from FS %s[%d]\n",
691 tgsi_semantic_names
[swr_fs
->info
.base
692 .output_semantic_name
[attrib
]],
693 swr_fs
->info
.base
.output_semantic_index
[attrib
]);
699 LLVMValueRef mask_result
= 0;
700 if (swr_fs
->info
.base
.uses_kill
) {
701 mask_result
= lp_build_mask_end(&mask
);
704 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm
->builder
)));
706 if (swr_fs
->info
.base
.uses_kill
) {
707 STORE(unwrap(mask_result
), pPS
, {0, SWR_PS_CONTEXT_activeMask
});
712 gallivm_verify_function(gallivm
, wrap(pFunction
));
714 gallivm_compile_module(gallivm
);
716 PFN_PIXEL_KERNEL kernel
=
717 (PFN_PIXEL_KERNEL
)gallivm_jit_function(gallivm
, wrap(pFunction
));
718 debug_printf("frag shader %p\n", kernel
);
719 assert(kernel
&& "Error: FragShader = NULL");
721 JM()->mIsModuleFinalized
= true;
727 swr_compile_fs(struct swr_context
*ctx
, swr_jit_fs_key
&key
)
730 reinterpret_cast<JitManager
*>(swr_screen(ctx
->pipe
.screen
)->hJitMgr
),
732 PFN_PIXEL_KERNEL func
= builder
.CompileFS(ctx
, key
);
734 ctx
->fs
->map
.insert(std::make_pair(key
, make_unique
<VariantFS
>(builder
.gallivm
, func
)));