2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "si_shader_internal.h"
29 LLVMValueRef
si_get_sample_id(struct si_shader_context
*ctx
)
31 return si_unpack_param(ctx
, ctx
->args
.ancillary
, 8, 4);
34 static LLVMValueRef
load_sample_mask_in(struct ac_shader_abi
*abi
)
36 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
37 return ac_to_integer(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.sample_coverage
));
40 static LLVMValueRef
load_sample_position(struct ac_shader_abi
*abi
, LLVMValueRef sample_id
)
42 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
43 LLVMValueRef desc
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
44 LLVMValueRef buf_index
= LLVMConstInt(ctx
->ac
.i32
, SI_PS_CONST_SAMPLE_POSITIONS
, 0);
45 LLVMValueRef resource
= ac_build_load_to_sgpr(&ctx
->ac
, desc
, buf_index
);
47 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
48 LLVMValueRef offset0
=
49 LLVMBuildMul(ctx
->ac
.builder
, sample_id
, LLVMConstInt(ctx
->ac
.i32
, 8, 0), "");
50 LLVMValueRef offset1
=
51 LLVMBuildAdd(ctx
->ac
.builder
, offset0
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
53 LLVMValueRef pos
[4] = {si_buffer_load_const(ctx
, resource
, offset0
),
54 si_buffer_load_const(ctx
, resource
, offset1
),
55 LLVMConstReal(ctx
->ac
.f32
, 0), LLVMConstReal(ctx
->ac
.f32
, 0)};
57 return ac_build_gather_values(&ctx
->ac
, pos
, 4);
60 static LLVMValueRef
si_nir_emit_fbfetch(struct ac_shader_abi
*abi
)
62 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
63 struct ac_image_args args
= {};
64 LLVMValueRef ptr
, image
, fmask
;
66 /* Ignore src0, because KHR_blend_func_extended disallows multiple render
70 /* Load the image descriptor. */
71 STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0
% 2 == 0);
72 ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
74 LLVMBuildPointerCast(ctx
->ac
.builder
, ptr
, ac_array_in_const32_addr_space(ctx
->ac
.v8i32
), "");
76 ac_build_load_to_sgpr(&ctx
->ac
, ptr
, LLVMConstInt(ctx
->ac
.i32
, SI_PS_IMAGE_COLORBUF0
/ 2, 0));
80 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->pos_fixed_pt
, 0, 16);
82 if (!ctx
->shader
->key
.mono
.u
.ps
.fbfetch_is_1D
)
83 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->pos_fixed_pt
, 16, 16);
85 /* Get the current render target layer index. */
86 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
)
87 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->args
.ancillary
, 16, 11);
89 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
)
90 args
.coords
[chan
++] = si_get_sample_id(ctx
);
92 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
&& !(ctx
->screen
->debug_flags
& DBG(NO_FMASK
))) {
93 fmask
= ac_build_load_to_sgpr(&ctx
->ac
, ptr
,
94 LLVMConstInt(ctx
->ac
.i32
, SI_PS_IMAGE_COLORBUF0_FMASK
/ 2, 0));
96 ac_apply_fmask_to_sample(&ctx
->ac
, fmask
, args
.coords
,
97 ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
);
100 args
.opcode
= ac_image_load
;
101 args
.resource
= image
;
103 args
.attributes
= AC_FUNC_ATTR_READNONE
;
105 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
)
107 ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
? ac_image_2darraymsaa
: ac_image_2dmsaa
;
108 else if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_is_1D
)
109 args
.dim
= ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
? ac_image_1darray
: ac_image_1d
;
111 args
.dim
= ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
? ac_image_2darray
: ac_image_2d
;
113 return ac_build_image_opcode(&ctx
->ac
, &args
);
116 static LLVMValueRef
si_build_fs_interp(struct si_shader_context
*ctx
, unsigned attr_index
,
117 unsigned chan
, LLVMValueRef prim_mask
, LLVMValueRef i
,
121 return ac_build_fs_interp(&ctx
->ac
, LLVMConstInt(ctx
->ac
.i32
, chan
, 0),
122 LLVMConstInt(ctx
->ac
.i32
, attr_index
, 0), prim_mask
, i
, j
);
124 return ac_build_fs_interp_mov(&ctx
->ac
, LLVMConstInt(ctx
->ac
.i32
, 2, 0), /* P0 */
125 LLVMConstInt(ctx
->ac
.i32
, chan
, 0),
126 LLVMConstInt(ctx
->ac
.i32
, attr_index
, 0), prim_mask
);
130 * Interpolate a fragment shader input.
133 * @param input_index index of the input in hardware
134 * @param semantic_name TGSI_SEMANTIC_*
135 * @param semantic_index semantic index
136 * @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset)
137 * @param colors_read_mask color components read (4 bits for each color, 8 bits in total)
138 * @param interp_param interpolation weights (i,j)
139 * @param prim_mask SI_PARAM_PRIM_MASK
140 * @param face SI_PARAM_FRONT_FACE
141 * @param result the return value (4 components)
143 static void interp_fs_color(struct si_shader_context
*ctx
, unsigned input_index
,
144 unsigned semantic_index
, unsigned num_interp_inputs
,
145 unsigned colors_read_mask
, LLVMValueRef interp_param
,
146 LLVMValueRef prim_mask
, LLVMValueRef face
, LLVMValueRef result
[4])
148 LLVMValueRef i
= NULL
, j
= NULL
;
151 /* fs.constant returns the param from the middle vertex, so it's not
152 * really useful for flat shading. It's meant to be used for custom
153 * interpolation (but the intrinsic can't fetch from the other two
156 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
157 * to do the right thing. The only reason we use fs.constant is that
158 * fs.interp cannot be used on integers, because they can be equal
161 * When interp is false we will use fs.constant or for newer llvm,
164 bool interp
= interp_param
!= NULL
;
168 LLVMBuildBitCast(ctx
->ac
.builder
, interp_param
, LLVMVectorType(ctx
->ac
.f32
, 2), "");
170 i
= LLVMBuildExtractElement(ctx
->ac
.builder
, interp_param
, ctx
->ac
.i32_0
, "");
171 j
= LLVMBuildExtractElement(ctx
->ac
.builder
, interp_param
, ctx
->ac
.i32_1
, "");
174 if (ctx
->shader
->key
.part
.ps
.prolog
.color_two_side
) {
175 LLVMValueRef is_face_positive
;
177 /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
178 * otherwise it's at offset "num_inputs".
180 unsigned back_attr_offset
= num_interp_inputs
;
181 if (semantic_index
== 1 && colors_read_mask
& 0xf)
182 back_attr_offset
+= 1;
184 is_face_positive
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntNE
, face
, ctx
->ac
.i32_0
, "");
186 for (chan
= 0; chan
< 4; chan
++) {
187 LLVMValueRef front
, back
;
189 front
= si_build_fs_interp(ctx
, input_index
, chan
, prim_mask
, i
, j
);
190 back
= si_build_fs_interp(ctx
, back_attr_offset
, chan
, prim_mask
, i
, j
);
192 result
[chan
] = LLVMBuildSelect(ctx
->ac
.builder
, is_face_positive
, front
, back
, "");
195 for (chan
= 0; chan
< 4; chan
++) {
196 result
[chan
] = si_build_fs_interp(ctx
, input_index
, chan
, prim_mask
, i
, j
);
201 static void si_alpha_test(struct si_shader_context
*ctx
, LLVMValueRef alpha
)
203 if (ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
!= PIPE_FUNC_NEVER
) {
204 static LLVMRealPredicate cond_map
[PIPE_FUNC_ALWAYS
+ 1] = {
205 [PIPE_FUNC_LESS
] = LLVMRealOLT
, [PIPE_FUNC_EQUAL
] = LLVMRealOEQ
,
206 [PIPE_FUNC_LEQUAL
] = LLVMRealOLE
, [PIPE_FUNC_GREATER
] = LLVMRealOGT
,
207 [PIPE_FUNC_NOTEQUAL
] = LLVMRealONE
, [PIPE_FUNC_GEQUAL
] = LLVMRealOGE
,
209 LLVMRealPredicate cond
= cond_map
[ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
];
212 LLVMValueRef alpha_ref
= LLVMGetParam(ctx
->main_fn
, SI_PARAM_ALPHA_REF
);
213 LLVMValueRef alpha_pass
= LLVMBuildFCmp(ctx
->ac
.builder
, cond
, alpha
, alpha_ref
, "");
214 ac_build_kill_if_false(&ctx
->ac
, alpha_pass
);
216 ac_build_kill_if_false(&ctx
->ac
, ctx
->ac
.i1false
);
220 static LLVMValueRef
si_scale_alpha_by_sample_mask(struct si_shader_context
*ctx
, LLVMValueRef alpha
,
221 unsigned samplemask_param
)
223 LLVMValueRef coverage
;
225 /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
226 coverage
= LLVMGetParam(ctx
->main_fn
, samplemask_param
);
227 coverage
= ac_to_integer(&ctx
->ac
, coverage
);
229 coverage
= ac_build_intrinsic(&ctx
->ac
, "llvm.ctpop.i32", ctx
->ac
.i32
, &coverage
, 1,
230 AC_FUNC_ATTR_READNONE
);
232 coverage
= LLVMBuildUIToFP(ctx
->ac
.builder
, coverage
, ctx
->ac
.f32
, "");
234 coverage
= LLVMBuildFMul(ctx
->ac
.builder
, coverage
,
235 LLVMConstReal(ctx
->ac
.f32
, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES
), "");
237 return LLVMBuildFMul(ctx
->ac
.builder
, alpha
, coverage
, "");
240 struct si_ps_exports
{
242 struct ac_export_args args
[10];
245 static void si_export_mrt_z(struct si_shader_context
*ctx
, LLVMValueRef depth
, LLVMValueRef stencil
,
246 LLVMValueRef samplemask
, struct si_ps_exports
*exp
)
248 struct ac_export_args args
;
250 ac_export_mrt_z(&ctx
->ac
, depth
, stencil
, samplemask
, &args
);
252 memcpy(&exp
->args
[exp
->num
++], &args
, sizeof(args
));
255 /* Initialize arguments for the shader export intrinsic */
256 static void si_llvm_init_ps_export_args(struct si_shader_context
*ctx
, LLVMValueRef
*values
,
257 unsigned target
, struct ac_export_args
*args
)
259 const struct si_shader_key
*key
= &ctx
->shader
->key
;
260 unsigned col_formats
= key
->part
.ps
.epilog
.spi_shader_col_format
;
261 LLVMValueRef f32undef
= LLVMGetUndef(ctx
->ac
.f32
);
262 unsigned spi_shader_col_format
;
264 bool is_int8
, is_int10
;
265 int cbuf
= target
- V_008DFC_SQ_EXP_MRT
;
267 assert(cbuf
>= 0 && cbuf
< 8);
269 spi_shader_col_format
= (col_formats
>> (cbuf
* 4)) & 0xf;
270 is_int8
= (key
->part
.ps
.epilog
.color_is_int8
>> cbuf
) & 0x1;
271 is_int10
= (key
->part
.ps
.epilog
.color_is_int10
>> cbuf
) & 0x1;
273 /* Default is 0xf. Adjusted below depending on the format. */
274 args
->enabled_channels
= 0xf; /* writemask */
276 /* Specify whether the EXEC mask represents the valid mask */
277 args
->valid_mask
= 0;
279 /* Specify whether this is the last export */
282 /* Specify the target we are exporting */
283 args
->target
= target
;
286 args
->out
[0] = f32undef
;
287 args
->out
[1] = f32undef
;
288 args
->out
[2] = f32undef
;
289 args
->out
[3] = f32undef
;
291 LLVMValueRef (*packf
)(struct ac_llvm_context
* ctx
, LLVMValueRef args
[2]) = NULL
;
292 LLVMValueRef (*packi
)(struct ac_llvm_context
* ctx
, LLVMValueRef args
[2], unsigned bits
,
295 switch (spi_shader_col_format
) {
296 case V_028714_SPI_SHADER_ZERO
:
297 args
->enabled_channels
= 0; /* writemask */
298 args
->target
= V_008DFC_SQ_EXP_NULL
;
301 case V_028714_SPI_SHADER_32_R
:
302 args
->enabled_channels
= 1; /* writemask */
303 args
->out
[0] = values
[0];
306 case V_028714_SPI_SHADER_32_GR
:
307 args
->enabled_channels
= 0x3; /* writemask */
308 args
->out
[0] = values
[0];
309 args
->out
[1] = values
[1];
312 case V_028714_SPI_SHADER_32_AR
:
313 if (ctx
->screen
->info
.chip_class
>= GFX10
) {
314 args
->enabled_channels
= 0x3; /* writemask */
315 args
->out
[0] = values
[0];
316 args
->out
[1] = values
[3];
318 args
->enabled_channels
= 0x9; /* writemask */
319 args
->out
[0] = values
[0];
320 args
->out
[3] = values
[3];
324 case V_028714_SPI_SHADER_FP16_ABGR
:
325 packf
= ac_build_cvt_pkrtz_f16
;
328 case V_028714_SPI_SHADER_UNORM16_ABGR
:
329 packf
= ac_build_cvt_pknorm_u16
;
332 case V_028714_SPI_SHADER_SNORM16_ABGR
:
333 packf
= ac_build_cvt_pknorm_i16
;
336 case V_028714_SPI_SHADER_UINT16_ABGR
:
337 packi
= ac_build_cvt_pk_u16
;
340 case V_028714_SPI_SHADER_SINT16_ABGR
:
341 packi
= ac_build_cvt_pk_i16
;
344 case V_028714_SPI_SHADER_32_ABGR
:
345 memcpy(&args
->out
[0], values
, sizeof(values
[0]) * 4);
349 /* Pack f16 or norm_i16/u16. */
351 for (chan
= 0; chan
< 2; chan
++) {
352 LLVMValueRef pack_args
[2] = {values
[2 * chan
], values
[2 * chan
+ 1]};
355 packed
= packf(&ctx
->ac
, pack_args
);
356 args
->out
[chan
] = ac_to_float(&ctx
->ac
, packed
);
358 args
->compr
= 1; /* COMPR flag */
362 for (chan
= 0; chan
< 2; chan
++) {
363 LLVMValueRef pack_args
[2] = {ac_to_integer(&ctx
->ac
, values
[2 * chan
]),
364 ac_to_integer(&ctx
->ac
, values
[2 * chan
+ 1])};
367 packed
= packi(&ctx
->ac
, pack_args
, is_int8
? 8 : is_int10
? 10 : 16, chan
== 1);
368 args
->out
[chan
] = ac_to_float(&ctx
->ac
, packed
);
370 args
->compr
= 1; /* COMPR flag */
374 static void si_export_mrt_color(struct si_shader_context
*ctx
, LLVMValueRef
*color
, unsigned index
,
375 unsigned samplemask_param
, bool is_last
, struct si_ps_exports
*exp
)
380 if (ctx
->shader
->key
.part
.ps
.epilog
.clamp_color
)
381 for (i
= 0; i
< 4; i
++)
382 color
[i
] = ac_build_clamp(&ctx
->ac
, color
[i
]);
385 if (ctx
->shader
->key
.part
.ps
.epilog
.alpha_to_one
)
386 color
[3] = ctx
->ac
.f32_1
;
389 if (index
== 0 && ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
!= PIPE_FUNC_ALWAYS
)
390 si_alpha_test(ctx
, color
[3]);
392 /* Line & polygon smoothing */
393 if (ctx
->shader
->key
.part
.ps
.epilog
.poly_line_smoothing
)
394 color
[3] = si_scale_alpha_by_sample_mask(ctx
, color
[3], samplemask_param
);
396 /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
397 if (ctx
->shader
->key
.part
.ps
.epilog
.last_cbuf
> 0) {
398 struct ac_export_args args
[8];
401 /* Get the export arguments, also find out what the last one is. */
402 for (c
= 0; c
<= ctx
->shader
->key
.part
.ps
.epilog
.last_cbuf
; c
++) {
403 si_llvm_init_ps_export_args(ctx
, color
, V_008DFC_SQ_EXP_MRT
+ c
, &args
[c
]);
404 if (args
[c
].enabled_channels
)
408 /* Emit all exports. */
409 for (c
= 0; c
<= ctx
->shader
->key
.part
.ps
.epilog
.last_cbuf
; c
++) {
410 if (is_last
&& last
== c
) {
411 args
[c
].valid_mask
= 1; /* whether the EXEC mask is valid */
412 args
[c
].done
= 1; /* DONE bit */
413 } else if (!args
[c
].enabled_channels
)
414 continue; /* unnecessary NULL export */
416 memcpy(&exp
->args
[exp
->num
++], &args
[c
], sizeof(args
[c
]));
419 struct ac_export_args args
;
422 si_llvm_init_ps_export_args(ctx
, color
, V_008DFC_SQ_EXP_MRT
+ index
, &args
);
424 args
.valid_mask
= 1; /* whether the EXEC mask is valid */
425 args
.done
= 1; /* DONE bit */
426 } else if (!args
.enabled_channels
)
427 return; /* unnecessary NULL export */
429 memcpy(&exp
->args
[exp
->num
++], &args
, sizeof(args
));
433 static void si_emit_ps_exports(struct si_shader_context
*ctx
, struct si_ps_exports
*exp
)
435 for (unsigned i
= 0; i
< exp
->num
; i
++)
436 ac_build_export(&ctx
->ac
, &exp
->args
[i
]);
440 * Return PS outputs in this order:
442 * v[0:3] = color0.xyzw
443 * v[4:7] = color1.xyzw
448 * vN+3 = SampleMaskIn (used for OpenGL smoothing)
450 * The alpha-ref SGPR is returned via its original location.
452 static void si_llvm_return_fs_outputs(struct ac_shader_abi
*abi
, unsigned max_outputs
,
455 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
456 struct si_shader
*shader
= ctx
->shader
;
457 struct si_shader_info
*info
= &shader
->selector
->info
;
458 LLVMBuilderRef builder
= ctx
->ac
.builder
;
459 unsigned i
, j
, first_vgpr
, vgpr
;
461 LLVMValueRef color
[8][4] = {};
462 LLVMValueRef depth
= NULL
, stencil
= NULL
, samplemask
= NULL
;
465 if (ctx
->postponed_kill
)
466 ac_build_kill_if_false(&ctx
->ac
, LLVMBuildLoad(builder
, ctx
->postponed_kill
, ""));
468 /* Read the output values. */
469 for (i
= 0; i
< info
->num_outputs
; i
++) {
470 unsigned semantic_name
= info
->output_semantic_name
[i
];
471 unsigned semantic_index
= info
->output_semantic_index
[i
];
473 switch (semantic_name
) {
474 case TGSI_SEMANTIC_COLOR
:
475 assert(semantic_index
< 8);
476 for (j
= 0; j
< 4; j
++) {
477 LLVMValueRef ptr
= addrs
[4 * i
+ j
];
478 LLVMValueRef result
= LLVMBuildLoad(builder
, ptr
, "");
479 color
[semantic_index
][j
] = result
;
482 case TGSI_SEMANTIC_POSITION
:
483 depth
= LLVMBuildLoad(builder
, addrs
[4 * i
+ 0], "");
485 case TGSI_SEMANTIC_STENCIL
:
486 stencil
= LLVMBuildLoad(builder
, addrs
[4 * i
+ 0], "");
488 case TGSI_SEMANTIC_SAMPLEMASK
:
489 samplemask
= LLVMBuildLoad(builder
, addrs
[4 * i
+ 0], "");
492 fprintf(stderr
, "Warning: GFX6 unhandled fs output type:%d\n", semantic_name
);
496 /* Fill the return structure. */
497 ret
= ctx
->return_value
;
500 ret
= LLVMBuildInsertValue(
501 builder
, ret
, ac_to_integer(&ctx
->ac
, LLVMGetParam(ctx
->main_fn
, SI_PARAM_ALPHA_REF
)),
502 SI_SGPR_ALPHA_REF
, "");
505 first_vgpr
= vgpr
= SI_SGPR_ALPHA_REF
+ 1;
506 for (i
= 0; i
< ARRAY_SIZE(color
); i
++) {
510 for (j
= 0; j
< 4; j
++)
511 ret
= LLVMBuildInsertValue(builder
, ret
, color
[i
][j
], vgpr
++, "");
514 ret
= LLVMBuildInsertValue(builder
, ret
, depth
, vgpr
++, "");
516 ret
= LLVMBuildInsertValue(builder
, ret
, stencil
, vgpr
++, "");
518 ret
= LLVMBuildInsertValue(builder
, ret
, samplemask
, vgpr
++, "");
520 /* Add the input sample mask for smoothing at the end. */
521 if (vgpr
< first_vgpr
+ PS_EPILOG_SAMPLEMASK_MIN_LOC
)
522 vgpr
= first_vgpr
+ PS_EPILOG_SAMPLEMASK_MIN_LOC
;
523 ret
= LLVMBuildInsertValue(builder
, ret
, LLVMGetParam(ctx
->main_fn
, SI_PARAM_SAMPLE_COVERAGE
),
526 ctx
->return_value
= ret
;
529 static void si_llvm_emit_polygon_stipple(struct si_shader_context
*ctx
,
530 LLVMValueRef param_rw_buffers
,
531 struct ac_arg param_pos_fixed_pt
)
533 LLVMBuilderRef builder
= ctx
->ac
.builder
;
534 LLVMValueRef slot
, desc
, offset
, row
, bit
, address
[2];
536 /* Use the fixed-point gl_FragCoord input.
537 * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
538 * per coordinate to get the repeating effect.
540 address
[0] = si_unpack_param(ctx
, param_pos_fixed_pt
, 0, 5);
541 address
[1] = si_unpack_param(ctx
, param_pos_fixed_pt
, 16, 5);
543 /* Load the buffer descriptor. */
544 slot
= LLVMConstInt(ctx
->ac
.i32
, SI_PS_CONST_POLY_STIPPLE
, 0);
545 desc
= ac_build_load_to_sgpr(&ctx
->ac
, param_rw_buffers
, slot
);
547 /* The stipple pattern is 32x32, each row has 32 bits. */
548 offset
= LLVMBuildMul(builder
, address
[1], LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
549 row
= si_buffer_load_const(ctx
, desc
, offset
);
550 row
= ac_to_integer(&ctx
->ac
, row
);
551 bit
= LLVMBuildLShr(builder
, row
, address
[0], "");
552 bit
= LLVMBuildTrunc(builder
, bit
, ctx
->ac
.i1
, "");
553 ac_build_kill_if_false(&ctx
->ac
, bit
);
557 * Build the pixel shader prolog function. This handles:
558 * - two-side color selection and interpolation
559 * - overriding interpolation parameters for the API PS
560 * - polygon stippling
562 * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
563 * overriden by other states. (e.g. per-sample interpolation)
564 * Interpolated colors are stored after the preloaded VGPRs.
566 void si_llvm_build_ps_prolog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
568 LLVMValueRef ret
, func
;
569 int num_returns
, i
, num_color_channels
;
571 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
573 /* Declare inputs. */
574 LLVMTypeRef return_types
[AC_MAX_ARGS
];
576 num_color_channels
= util_bitcount(key
->ps_prolog
.colors_read
);
577 assert(key
->ps_prolog
.num_input_sgprs
+ key
->ps_prolog
.num_input_vgprs
+ num_color_channels
<=
579 for (i
= 0; i
< key
->ps_prolog
.num_input_sgprs
; i
++) {
580 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
581 return_types
[num_returns
++] = ctx
->ac
.i32
;
584 struct ac_arg pos_fixed_pt
;
585 struct ac_arg ancillary
;
586 struct ac_arg param_sample_mask
;
587 for (i
= 0; i
< key
->ps_prolog
.num_input_vgprs
; i
++) {
588 struct ac_arg
*arg
= NULL
;
589 if (i
== key
->ps_prolog
.ancillary_vgpr_index
) {
591 } else if (i
== key
->ps_prolog
.ancillary_vgpr_index
+ 1) {
592 arg
= ¶m_sample_mask
;
593 } else if (i
== key
->ps_prolog
.num_input_vgprs
- 1) {
594 /* POS_FIXED_PT is always last. */
597 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_FLOAT
, arg
);
598 return_types
[num_returns
++] = ctx
->ac
.f32
;
601 /* Declare outputs (same as inputs + add colors if needed) */
602 for (i
= 0; i
< num_color_channels
; i
++)
603 return_types
[num_returns
++] = ctx
->ac
.f32
;
605 /* Create the function. */
606 si_llvm_create_func(ctx
, "ps_prolog", return_types
, num_returns
, 0);
609 /* Copy inputs to outputs. This should be no-op, as the registers match,
610 * but it will prevent the compiler from overwriting them unintentionally.
612 ret
= ctx
->return_value
;
613 for (i
= 0; i
< ctx
->args
.arg_count
; i
++) {
614 LLVMValueRef p
= LLVMGetParam(func
, i
);
615 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, p
, i
, "");
618 /* Polygon stippling. */
619 if (key
->ps_prolog
.states
.poly_stipple
) {
620 LLVMValueRef list
= si_prolog_get_rw_buffers(ctx
);
622 si_llvm_emit_polygon_stipple(ctx
, list
, pos_fixed_pt
);
625 if (key
->ps_prolog
.states
.bc_optimize_for_persp
||
626 key
->ps_prolog
.states
.bc_optimize_for_linear
) {
627 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
628 LLVMValueRef center
[2], centroid
[2], tmp
, bc_optimize
;
630 /* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
631 * The hw doesn't compute CENTROID if the whole wave only
632 * contains fully-covered quads.
634 * PRIM_MASK is after user SGPRs.
636 bc_optimize
= LLVMGetParam(func
, SI_PS_NUM_USER_SGPR
);
638 LLVMBuildLShr(ctx
->ac
.builder
, bc_optimize
, LLVMConstInt(ctx
->ac
.i32
, 31, 0), "");
639 bc_optimize
= LLVMBuildTrunc(ctx
->ac
.builder
, bc_optimize
, ctx
->ac
.i1
, "");
641 if (key
->ps_prolog
.states
.bc_optimize_for_persp
) {
642 /* Read PERSP_CENTER. */
643 for (i
= 0; i
< 2; i
++)
644 center
[i
] = LLVMGetParam(func
, base
+ 2 + i
);
645 /* Read PERSP_CENTROID. */
646 for (i
= 0; i
< 2; i
++)
647 centroid
[i
] = LLVMGetParam(func
, base
+ 4 + i
);
648 /* Select PERSP_CENTROID. */
649 for (i
= 0; i
< 2; i
++) {
650 tmp
= LLVMBuildSelect(ctx
->ac
.builder
, bc_optimize
, center
[i
], centroid
[i
], "");
651 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, tmp
, base
+ 4 + i
, "");
654 if (key
->ps_prolog
.states
.bc_optimize_for_linear
) {
655 /* Read LINEAR_CENTER. */
656 for (i
= 0; i
< 2; i
++)
657 center
[i
] = LLVMGetParam(func
, base
+ 8 + i
);
658 /* Read LINEAR_CENTROID. */
659 for (i
= 0; i
< 2; i
++)
660 centroid
[i
] = LLVMGetParam(func
, base
+ 10 + i
);
661 /* Select LINEAR_CENTROID. */
662 for (i
= 0; i
< 2; i
++) {
663 tmp
= LLVMBuildSelect(ctx
->ac
.builder
, bc_optimize
, center
[i
], centroid
[i
], "");
664 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, tmp
, base
+ 10 + i
, "");
669 /* Force per-sample interpolation. */
670 if (key
->ps_prolog
.states
.force_persp_sample_interp
) {
671 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
672 LLVMValueRef persp_sample
[2];
674 /* Read PERSP_SAMPLE. */
675 for (i
= 0; i
< 2; i
++)
676 persp_sample
[i
] = LLVMGetParam(func
, base
+ i
);
677 /* Overwrite PERSP_CENTER. */
678 for (i
= 0; i
< 2; i
++)
679 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_sample
[i
], base
+ 2 + i
, "");
680 /* Overwrite PERSP_CENTROID. */
681 for (i
= 0; i
< 2; i
++)
682 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_sample
[i
], base
+ 4 + i
, "");
684 if (key
->ps_prolog
.states
.force_linear_sample_interp
) {
685 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
686 LLVMValueRef linear_sample
[2];
688 /* Read LINEAR_SAMPLE. */
689 for (i
= 0; i
< 2; i
++)
690 linear_sample
[i
] = LLVMGetParam(func
, base
+ 6 + i
);
691 /* Overwrite LINEAR_CENTER. */
692 for (i
= 0; i
< 2; i
++)
693 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_sample
[i
], base
+ 8 + i
, "");
694 /* Overwrite LINEAR_CENTROID. */
695 for (i
= 0; i
< 2; i
++)
696 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_sample
[i
], base
+ 10 + i
, "");
699 /* Force center interpolation. */
700 if (key
->ps_prolog
.states
.force_persp_center_interp
) {
701 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
702 LLVMValueRef persp_center
[2];
704 /* Read PERSP_CENTER. */
705 for (i
= 0; i
< 2; i
++)
706 persp_center
[i
] = LLVMGetParam(func
, base
+ 2 + i
);
707 /* Overwrite PERSP_SAMPLE. */
708 for (i
= 0; i
< 2; i
++)
709 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_center
[i
], base
+ i
, "");
710 /* Overwrite PERSP_CENTROID. */
711 for (i
= 0; i
< 2; i
++)
712 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_center
[i
], base
+ 4 + i
, "");
714 if (key
->ps_prolog
.states
.force_linear_center_interp
) {
715 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
716 LLVMValueRef linear_center
[2];
718 /* Read LINEAR_CENTER. */
719 for (i
= 0; i
< 2; i
++)
720 linear_center
[i
] = LLVMGetParam(func
, base
+ 8 + i
);
721 /* Overwrite LINEAR_SAMPLE. */
722 for (i
= 0; i
< 2; i
++)
723 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_center
[i
], base
+ 6 + i
, "");
724 /* Overwrite LINEAR_CENTROID. */
725 for (i
= 0; i
< 2; i
++)
726 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_center
[i
], base
+ 10 + i
, "");
729 /* Interpolate colors. */
730 unsigned color_out_idx
= 0;
731 for (i
= 0; i
< 2; i
++) {
732 unsigned writemask
= (key
->ps_prolog
.colors_read
>> (i
* 4)) & 0xf;
733 unsigned face_vgpr
= key
->ps_prolog
.num_input_sgprs
+ key
->ps_prolog
.face_vgpr_index
;
734 LLVMValueRef interp
[2], color
[4];
735 LLVMValueRef interp_ij
= NULL
, prim_mask
= NULL
, face
= NULL
;
740 /* If the interpolation qualifier is not CONSTANT (-1). */
741 if (key
->ps_prolog
.color_interp_vgpr_index
[i
] != -1) {
742 unsigned interp_vgpr
=
743 key
->ps_prolog
.num_input_sgprs
+ key
->ps_prolog
.color_interp_vgpr_index
[i
];
745 /* Get the (i,j) updated by bc_optimize handling. */
746 interp
[0] = LLVMBuildExtractValue(ctx
->ac
.builder
, ret
, interp_vgpr
, "");
747 interp
[1] = LLVMBuildExtractValue(ctx
->ac
.builder
, ret
, interp_vgpr
+ 1, "");
748 interp_ij
= ac_build_gather_values(&ctx
->ac
, interp
, 2);
751 /* Use the absolute location of the input. */
752 prim_mask
= LLVMGetParam(func
, SI_PS_NUM_USER_SGPR
);
754 if (key
->ps_prolog
.states
.color_two_side
) {
755 face
= LLVMGetParam(func
, face_vgpr
);
756 face
= ac_to_integer(&ctx
->ac
, face
);
759 interp_fs_color(ctx
, key
->ps_prolog
.color_attr_index
[i
], i
, key
->ps_prolog
.num_interp_inputs
,
760 key
->ps_prolog
.colors_read
, interp_ij
, prim_mask
, face
, color
);
763 unsigned chan
= u_bit_scan(&writemask
);
764 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, color
[chan
],
765 ctx
->args
.arg_count
+ color_out_idx
++, "");
769 /* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
772 * "When per-sample shading is active due to the use of a fragment
773 * input qualified by sample or due to the use of the gl_SampleID
774 * or gl_SamplePosition variables, only the bit for the current
775 * sample is set in gl_SampleMaskIn. When state specifies multiple
776 * fragment shader invocations for a given fragment, the sample
777 * mask for any single fragment shader invocation may specify a
778 * subset of the covered samples for the fragment. In this case,
779 * the bit corresponding to each covered sample will be set in
780 * exactly one fragment shader invocation."
782 * The samplemask loaded by hardware is always the coverage of the
783 * entire pixel/fragment, so mask bits out based on the sample ID.
785 if (key
->ps_prolog
.states
.samplemask_log_ps_iter
) {
786 /* The bit pattern matches that used by fixed function fragment
788 static const uint16_t ps_iter_masks
[] = {
789 0xffff, /* not used */
790 0x5555, 0x1111, 0x0101, 0x0001,
792 assert(key
->ps_prolog
.states
.samplemask_log_ps_iter
< ARRAY_SIZE(ps_iter_masks
));
794 uint32_t ps_iter_mask
= ps_iter_masks
[key
->ps_prolog
.states
.samplemask_log_ps_iter
];
795 LLVMValueRef sampleid
= si_unpack_param(ctx
, ancillary
, 8, 4);
796 LLVMValueRef samplemask
= ac_get_arg(&ctx
->ac
, param_sample_mask
);
798 samplemask
= ac_to_integer(&ctx
->ac
, samplemask
);
800 LLVMBuildAnd(ctx
->ac
.builder
, samplemask
,
801 LLVMBuildShl(ctx
->ac
.builder
, LLVMConstInt(ctx
->ac
.i32
, ps_iter_mask
, false),
804 samplemask
= ac_to_float(&ctx
->ac
, samplemask
);
806 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, samplemask
, param_sample_mask
.arg_index
, "");
809 /* Tell LLVM to insert WQM instruction sequence when needed. */
810 if (key
->ps_prolog
.wqm
) {
811 LLVMAddTargetDependentFunctionAttr(func
, "amdgpu-ps-wqm-outputs", "");
814 si_llvm_build_ret(ctx
, ret
);
818 * Build the pixel shader epilog function. This handles everything that must be
819 * emulated for pixel shader exports. (alpha-test, format conversions, etc)
821 void si_llvm_build_ps_epilog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
823 LLVMValueRef depth
= NULL
, stencil
= NULL
, samplemask
= NULL
;
825 struct si_ps_exports exp
= {};
827 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
829 /* Declare input SGPRs. */
830 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->rw_buffers
);
831 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->bindless_samplers_and_images
);
832 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->const_and_shader_buffers
);
833 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->samplers_and_images
);
834 si_add_arg_checked(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_FLOAT
, NULL
, SI_PARAM_ALPHA_REF
);
836 /* Declare input VGPRs. */
837 unsigned required_num_params
=
838 ctx
->args
.num_sgprs_used
+ util_bitcount(key
->ps_epilog
.colors_written
) * 4 +
839 key
->ps_epilog
.writes_z
+ key
->ps_epilog
.writes_stencil
+ key
->ps_epilog
.writes_samplemask
;
841 required_num_params
=
842 MAX2(required_num_params
, ctx
->args
.num_sgprs_used
+ PS_EPILOG_SAMPLEMASK_MIN_LOC
+ 1);
844 while (ctx
->args
.arg_count
< required_num_params
)
845 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_FLOAT
, NULL
);
847 /* Create the function. */
848 si_llvm_create_func(ctx
, "ps_epilog", NULL
, 0, 0);
849 /* Disable elimination of unused inputs. */
850 ac_llvm_add_target_dep_function_attr(ctx
->main_fn
, "InitialPSInputAddr", 0xffffff);
852 /* Process colors. */
853 unsigned vgpr
= ctx
->args
.num_sgprs_used
;
854 unsigned colors_written
= key
->ps_epilog
.colors_written
;
855 int last_color_export
= -1;
857 /* Find the last color export. */
858 if (!key
->ps_epilog
.writes_z
&& !key
->ps_epilog
.writes_stencil
&&
859 !key
->ps_epilog
.writes_samplemask
) {
860 unsigned spi_format
= key
->ps_epilog
.states
.spi_shader_col_format
;
862 /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
863 if (colors_written
== 0x1 && key
->ps_epilog
.states
.last_cbuf
> 0) {
864 /* Just set this if any of the colorbuffers are enabled. */
865 if (spi_format
& ((1ull << (4 * (key
->ps_epilog
.states
.last_cbuf
+ 1))) - 1))
866 last_color_export
= 0;
868 for (i
= 0; i
< 8; i
++)
869 if (colors_written
& (1 << i
) && (spi_format
>> (i
* 4)) & 0xf)
870 last_color_export
= i
;
874 while (colors_written
) {
875 LLVMValueRef color
[4];
876 int mrt
= u_bit_scan(&colors_written
);
878 for (i
= 0; i
< 4; i
++)
879 color
[i
] = LLVMGetParam(ctx
->main_fn
, vgpr
++);
881 si_export_mrt_color(ctx
, color
, mrt
, ctx
->args
.arg_count
- 1, mrt
== last_color_export
, &exp
);
884 /* Process depth, stencil, samplemask. */
885 if (key
->ps_epilog
.writes_z
)
886 depth
= LLVMGetParam(ctx
->main_fn
, vgpr
++);
887 if (key
->ps_epilog
.writes_stencil
)
888 stencil
= LLVMGetParam(ctx
->main_fn
, vgpr
++);
889 if (key
->ps_epilog
.writes_samplemask
)
890 samplemask
= LLVMGetParam(ctx
->main_fn
, vgpr
++);
892 if (depth
|| stencil
|| samplemask
)
893 si_export_mrt_z(ctx
, depth
, stencil
, samplemask
, &exp
);
894 else if (last_color_export
== -1)
895 ac_build_export_null(&ctx
->ac
);
898 si_emit_ps_exports(ctx
, &exp
);
901 LLVMBuildRetVoid(ctx
->ac
.builder
);
904 void si_llvm_build_monolithic_ps(struct si_shader_context
*ctx
, struct si_shader
*shader
)
906 LLVMValueRef parts
[3];
907 unsigned num_parts
= 0, main_index
;
909 union si_shader_part_key prolog_key
;
910 si_get_ps_prolog_key(shader
, &prolog_key
, false);
912 if (si_need_ps_prolog(&prolog_key
)) {
913 si_llvm_build_ps_prolog(ctx
, &prolog_key
);
914 parts
[num_parts
++] = ctx
->main_fn
;
917 main_index
= num_parts
;
918 parts
[num_parts
++] = ctx
->main_fn
;
920 union si_shader_part_key epilog_key
;
921 si_get_ps_epilog_key(shader
, &epilog_key
);
922 si_llvm_build_ps_epilog(ctx
, &epilog_key
);
923 parts
[num_parts
++] = ctx
->main_fn
;
925 si_build_wrapper_function(ctx
, parts
, num_parts
, main_index
, 0);
928 void si_llvm_init_ps_callbacks(struct si_shader_context
*ctx
)
930 ctx
->abi
.emit_outputs
= si_llvm_return_fs_outputs
;
931 ctx
->abi
.load_sample_position
= load_sample_position
;
932 ctx
->abi
.load_sample_mask_in
= load_sample_mask_in
;
933 ctx
->abi
.emit_fbfetch
= si_nir_emit_fbfetch
;