2 * Copyright 2020 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "si_shader_internal.h"
29 LLVMValueRef
si_get_sample_id(struct si_shader_context
*ctx
)
31 return si_unpack_param(ctx
, ctx
->args
.ancillary
, 8, 4);
34 static LLVMValueRef
load_sample_mask_in(struct ac_shader_abi
*abi
)
36 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
37 return ac_to_integer(&ctx
->ac
, ac_get_arg(&ctx
->ac
, ctx
->args
.sample_coverage
));
40 static LLVMValueRef
load_sample_position(struct ac_shader_abi
*abi
, LLVMValueRef sample_id
)
42 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
43 LLVMValueRef desc
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
44 LLVMValueRef buf_index
= LLVMConstInt(ctx
->ac
.i32
, SI_PS_CONST_SAMPLE_POSITIONS
, 0);
45 LLVMValueRef resource
= ac_build_load_to_sgpr(&ctx
->ac
, desc
, buf_index
);
47 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
48 LLVMValueRef offset0
=
49 LLVMBuildMul(ctx
->ac
.builder
, sample_id
, LLVMConstInt(ctx
->ac
.i32
, 8, 0), "");
50 LLVMValueRef offset1
=
51 LLVMBuildAdd(ctx
->ac
.builder
, offset0
, LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
53 LLVMValueRef pos
[4] = {si_buffer_load_const(ctx
, resource
, offset0
),
54 si_buffer_load_const(ctx
, resource
, offset1
),
55 LLVMConstReal(ctx
->ac
.f32
, 0), LLVMConstReal(ctx
->ac
.f32
, 0)};
57 return ac_build_gather_values(&ctx
->ac
, pos
, 4);
60 static LLVMValueRef
si_nir_emit_fbfetch(struct ac_shader_abi
*abi
)
62 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
63 struct ac_image_args args
= {};
64 LLVMValueRef ptr
, image
, fmask
;
66 /* Ignore src0, because KHR_blend_func_extended disallows multiple render
70 /* Load the image descriptor. */
71 STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0
% 2 == 0);
72 ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
74 LLVMBuildPointerCast(ctx
->ac
.builder
, ptr
, ac_array_in_const32_addr_space(ctx
->ac
.v8i32
), "");
76 ac_build_load_to_sgpr(&ctx
->ac
, ptr
, LLVMConstInt(ctx
->ac
.i32
, SI_PS_IMAGE_COLORBUF0
/ 2, 0));
80 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->pos_fixed_pt
, 0, 16);
82 if (!ctx
->shader
->key
.mono
.u
.ps
.fbfetch_is_1D
)
83 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->pos_fixed_pt
, 16, 16);
85 /* Get the current render target layer index. */
86 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
)
87 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->args
.ancillary
, 16, 11);
89 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
)
90 args
.coords
[chan
++] = si_get_sample_id(ctx
);
92 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
&& !(ctx
->screen
->debug_flags
& DBG(NO_FMASK
))) {
93 fmask
= ac_build_load_to_sgpr(&ctx
->ac
, ptr
,
94 LLVMConstInt(ctx
->ac
.i32
, SI_PS_IMAGE_COLORBUF0_FMASK
/ 2, 0));
96 ac_apply_fmask_to_sample(&ctx
->ac
, fmask
, args
.coords
,
97 ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
);
100 args
.opcode
= ac_image_load
;
101 args
.resource
= image
;
103 args
.attributes
= AC_FUNC_ATTR_READNONE
;
105 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
)
107 ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
? ac_image_2darraymsaa
: ac_image_2dmsaa
;
108 else if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_is_1D
)
109 args
.dim
= ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
? ac_image_1darray
: ac_image_1d
;
111 args
.dim
= ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
? ac_image_2darray
: ac_image_2d
;
113 return ac_build_image_opcode(&ctx
->ac
, &args
);
116 static LLVMValueRef
si_build_fs_interp(struct si_shader_context
*ctx
, unsigned attr_index
,
117 unsigned chan
, LLVMValueRef prim_mask
, LLVMValueRef i
,
121 return ac_build_fs_interp(&ctx
->ac
, LLVMConstInt(ctx
->ac
.i32
, chan
, 0),
122 LLVMConstInt(ctx
->ac
.i32
, attr_index
, 0), prim_mask
, i
, j
);
124 return ac_build_fs_interp_mov(&ctx
->ac
, LLVMConstInt(ctx
->ac
.i32
, 2, 0), /* P0 */
125 LLVMConstInt(ctx
->ac
.i32
, chan
, 0),
126 LLVMConstInt(ctx
->ac
.i32
, attr_index
, 0), prim_mask
);
130 * Interpolate a fragment shader input.
133 * @param input_index index of the input in hardware
134 * @param semantic_name TGSI_SEMANTIC_*
135 * @param semantic_index semantic index
136 * @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset)
137 * @param colors_read_mask color components read (4 bits for each color, 8 bits in total)
138 * @param interp_param interpolation weights (i,j)
139 * @param prim_mask SI_PARAM_PRIM_MASK
140 * @param face SI_PARAM_FRONT_FACE
141 * @param result the return value (4 components)
143 static void interp_fs_color(struct si_shader_context
*ctx
, unsigned input_index
,
144 unsigned semantic_index
, unsigned num_interp_inputs
,
145 unsigned colors_read_mask
, LLVMValueRef interp_param
,
146 LLVMValueRef prim_mask
, LLVMValueRef face
, LLVMValueRef result
[4])
148 LLVMValueRef i
= NULL
, j
= NULL
;
151 /* fs.constant returns the param from the middle vertex, so it's not
152 * really useful for flat shading. It's meant to be used for custom
153 * interpolation (but the intrinsic can't fetch from the other two
156 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
157 * to do the right thing. The only reason we use fs.constant is that
158 * fs.interp cannot be used on integers, because they can be equal
161 * When interp is false we will use fs.constant or for newer llvm,
164 bool interp
= interp_param
!= NULL
;
168 LLVMBuildBitCast(ctx
->ac
.builder
, interp_param
, ctx
->ac
.v2f32
, "");
170 i
= LLVMBuildExtractElement(ctx
->ac
.builder
, interp_param
, ctx
->ac
.i32_0
, "");
171 j
= LLVMBuildExtractElement(ctx
->ac
.builder
, interp_param
, ctx
->ac
.i32_1
, "");
174 if (ctx
->shader
->key
.part
.ps
.prolog
.color_two_side
) {
175 LLVMValueRef is_face_positive
;
177 /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
178 * otherwise it's at offset "num_inputs".
180 unsigned back_attr_offset
= num_interp_inputs
;
181 if (semantic_index
== 1 && colors_read_mask
& 0xf)
182 back_attr_offset
+= 1;
184 is_face_positive
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntNE
, face
, ctx
->ac
.i32_0
, "");
186 for (chan
= 0; chan
< 4; chan
++) {
187 LLVMValueRef front
, back
;
189 front
= si_build_fs_interp(ctx
, input_index
, chan
, prim_mask
, i
, j
);
190 back
= si_build_fs_interp(ctx
, back_attr_offset
, chan
, prim_mask
, i
, j
);
192 result
[chan
] = LLVMBuildSelect(ctx
->ac
.builder
, is_face_positive
, front
, back
, "");
195 for (chan
= 0; chan
< 4; chan
++) {
196 result
[chan
] = si_build_fs_interp(ctx
, input_index
, chan
, prim_mask
, i
, j
);
201 static void si_alpha_test(struct si_shader_context
*ctx
, LLVMValueRef alpha
)
203 if (ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
!= PIPE_FUNC_NEVER
) {
204 static LLVMRealPredicate cond_map
[PIPE_FUNC_ALWAYS
+ 1] = {
205 [PIPE_FUNC_LESS
] = LLVMRealOLT
, [PIPE_FUNC_EQUAL
] = LLVMRealOEQ
,
206 [PIPE_FUNC_LEQUAL
] = LLVMRealOLE
, [PIPE_FUNC_GREATER
] = LLVMRealOGT
,
207 [PIPE_FUNC_NOTEQUAL
] = LLVMRealONE
, [PIPE_FUNC_GEQUAL
] = LLVMRealOGE
,
209 LLVMRealPredicate cond
= cond_map
[ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
];
212 LLVMValueRef alpha_ref
= LLVMGetParam(ctx
->main_fn
, SI_PARAM_ALPHA_REF
);
213 LLVMValueRef alpha_pass
= LLVMBuildFCmp(ctx
->ac
.builder
, cond
, alpha
, alpha_ref
, "");
214 ac_build_kill_if_false(&ctx
->ac
, alpha_pass
);
216 ac_build_kill_if_false(&ctx
->ac
, ctx
->ac
.i1false
);
220 static LLVMValueRef
si_scale_alpha_by_sample_mask(struct si_shader_context
*ctx
, LLVMValueRef alpha
,
221 unsigned samplemask_param
)
223 LLVMValueRef coverage
;
225 /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
226 coverage
= LLVMGetParam(ctx
->main_fn
, samplemask_param
);
227 coverage
= ac_to_integer(&ctx
->ac
, coverage
);
229 coverage
= ac_build_intrinsic(&ctx
->ac
, "llvm.ctpop.i32", ctx
->ac
.i32
, &coverage
, 1,
230 AC_FUNC_ATTR_READNONE
);
232 coverage
= LLVMBuildUIToFP(ctx
->ac
.builder
, coverage
, ctx
->ac
.f32
, "");
234 coverage
= LLVMBuildFMul(ctx
->ac
.builder
, coverage
,
235 LLVMConstReal(ctx
->ac
.f32
, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES
), "");
237 return LLVMBuildFMul(ctx
->ac
.builder
, alpha
, coverage
, "");
240 struct si_ps_exports
{
242 struct ac_export_args args
[10];
245 static void si_export_mrt_z(struct si_shader_context
*ctx
, LLVMValueRef depth
, LLVMValueRef stencil
,
246 LLVMValueRef samplemask
, struct si_ps_exports
*exp
)
248 struct ac_export_args args
;
250 ac_export_mrt_z(&ctx
->ac
, depth
, stencil
, samplemask
, &args
);
252 memcpy(&exp
->args
[exp
->num
++], &args
, sizeof(args
));
255 /* Initialize arguments for the shader export intrinsic */
256 static void si_llvm_init_ps_export_args(struct si_shader_context
*ctx
, LLVMValueRef
*values
,
257 unsigned cbuf
, unsigned compacted_mrt_index
,
258 struct ac_export_args
*args
)
260 const struct si_shader_key
*key
= &ctx
->shader
->key
;
261 unsigned col_formats
= key
->part
.ps
.epilog
.spi_shader_col_format
;
262 LLVMValueRef f32undef
= LLVMGetUndef(ctx
->ac
.f32
);
263 unsigned spi_shader_col_format
;
265 bool is_int8
, is_int10
;
267 assert(cbuf
>= 0 && cbuf
< 8);
269 spi_shader_col_format
= (col_formats
>> (cbuf
* 4)) & 0xf;
270 is_int8
= (key
->part
.ps
.epilog
.color_is_int8
>> cbuf
) & 0x1;
271 is_int10
= (key
->part
.ps
.epilog
.color_is_int10
>> cbuf
) & 0x1;
273 /* Default is 0xf. Adjusted below depending on the format. */
274 args
->enabled_channels
= 0xf; /* writemask */
276 /* Specify whether the EXEC mask represents the valid mask */
277 args
->valid_mask
= 0;
279 /* Specify whether this is the last export */
282 /* Specify the target we are exporting */
283 args
->target
= V_008DFC_SQ_EXP_MRT
+ compacted_mrt_index
;
286 args
->out
[0] = f32undef
;
287 args
->out
[1] = f32undef
;
288 args
->out
[2] = f32undef
;
289 args
->out
[3] = f32undef
;
291 LLVMValueRef (*packf
)(struct ac_llvm_context
* ctx
, LLVMValueRef args
[2]) = NULL
;
292 LLVMValueRef (*packi
)(struct ac_llvm_context
* ctx
, LLVMValueRef args
[2], unsigned bits
,
295 switch (spi_shader_col_format
) {
296 case V_028714_SPI_SHADER_ZERO
:
297 args
->enabled_channels
= 0; /* writemask */
298 args
->target
= V_008DFC_SQ_EXP_NULL
;
301 case V_028714_SPI_SHADER_32_R
:
302 args
->enabled_channels
= 1; /* writemask */
303 args
->out
[0] = values
[0];
306 case V_028714_SPI_SHADER_32_GR
:
307 args
->enabled_channels
= 0x3; /* writemask */
308 args
->out
[0] = values
[0];
309 args
->out
[1] = values
[1];
312 case V_028714_SPI_SHADER_32_AR
:
313 if (ctx
->screen
->info
.chip_class
>= GFX10
) {
314 args
->enabled_channels
= 0x3; /* writemask */
315 args
->out
[0] = values
[0];
316 args
->out
[1] = values
[3];
318 args
->enabled_channels
= 0x9; /* writemask */
319 args
->out
[0] = values
[0];
320 args
->out
[3] = values
[3];
324 case V_028714_SPI_SHADER_FP16_ABGR
:
325 packf
= ac_build_cvt_pkrtz_f16
;
328 case V_028714_SPI_SHADER_UNORM16_ABGR
:
329 packf
= ac_build_cvt_pknorm_u16
;
332 case V_028714_SPI_SHADER_SNORM16_ABGR
:
333 packf
= ac_build_cvt_pknorm_i16
;
336 case V_028714_SPI_SHADER_UINT16_ABGR
:
337 packi
= ac_build_cvt_pk_u16
;
340 case V_028714_SPI_SHADER_SINT16_ABGR
:
341 packi
= ac_build_cvt_pk_i16
;
344 case V_028714_SPI_SHADER_32_ABGR
:
345 memcpy(&args
->out
[0], values
, sizeof(values
[0]) * 4);
349 /* Pack f16 or norm_i16/u16. */
351 for (chan
= 0; chan
< 2; chan
++) {
352 LLVMValueRef pack_args
[2] = {values
[2 * chan
], values
[2 * chan
+ 1]};
355 packed
= packf(&ctx
->ac
, pack_args
);
356 args
->out
[chan
] = ac_to_float(&ctx
->ac
, packed
);
358 args
->compr
= 1; /* COMPR flag */
362 for (chan
= 0; chan
< 2; chan
++) {
363 LLVMValueRef pack_args
[2] = {ac_to_integer(&ctx
->ac
, values
[2 * chan
]),
364 ac_to_integer(&ctx
->ac
, values
[2 * chan
+ 1])};
367 packed
= packi(&ctx
->ac
, pack_args
, is_int8
? 8 : is_int10
? 10 : 16, chan
== 1);
368 args
->out
[chan
] = ac_to_float(&ctx
->ac
, packed
);
370 args
->compr
= 1; /* COMPR flag */
374 static bool si_export_mrt_color(struct si_shader_context
*ctx
, LLVMValueRef
*color
, unsigned index
,
375 unsigned compacted_mrt_index
, unsigned samplemask_param
,
376 bool is_last
, struct si_ps_exports
*exp
)
381 if (ctx
->shader
->key
.part
.ps
.epilog
.clamp_color
)
382 for (i
= 0; i
< 4; i
++)
383 color
[i
] = ac_build_clamp(&ctx
->ac
, color
[i
]);
386 if (ctx
->shader
->key
.part
.ps
.epilog
.alpha_to_one
)
387 color
[3] = ctx
->ac
.f32_1
;
390 if (index
== 0 && ctx
->shader
->key
.part
.ps
.epilog
.alpha_func
!= PIPE_FUNC_ALWAYS
)
391 si_alpha_test(ctx
, color
[3]);
393 /* Line & polygon smoothing */
394 if (ctx
->shader
->key
.part
.ps
.epilog
.poly_line_smoothing
)
395 color
[3] = si_scale_alpha_by_sample_mask(ctx
, color
[3], samplemask_param
);
397 /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
398 if (ctx
->shader
->key
.part
.ps
.epilog
.last_cbuf
> 0) {
399 struct ac_export_args args
[8];
402 assert(compacted_mrt_index
== 0);
404 /* Get the export arguments, also find out what the last one is. */
405 for (c
= 0; c
<= ctx
->shader
->key
.part
.ps
.epilog
.last_cbuf
; c
++) {
406 si_llvm_init_ps_export_args(ctx
, color
, c
, compacted_mrt_index
, &args
[c
]);
407 if (args
[c
].enabled_channels
) {
408 compacted_mrt_index
++;
415 /* Emit all exports. */
416 for (c
= 0; c
<= ctx
->shader
->key
.part
.ps
.epilog
.last_cbuf
; c
++) {
417 if (is_last
&& last
== c
) {
418 args
[c
].valid_mask
= 1; /* whether the EXEC mask is valid */
419 args
[c
].done
= 1; /* DONE bit */
420 } else if (!args
[c
].enabled_channels
)
421 continue; /* unnecessary NULL export */
423 memcpy(&exp
->args
[exp
->num
++], &args
[c
], sizeof(args
[c
]));
426 struct ac_export_args args
;
429 si_llvm_init_ps_export_args(ctx
, color
, index
, compacted_mrt_index
, &args
);
431 args
.valid_mask
= 1; /* whether the EXEC mask is valid */
432 args
.done
= 1; /* DONE bit */
433 } else if (!args
.enabled_channels
)
434 return false; /* unnecessary NULL export */
436 memcpy(&exp
->args
[exp
->num
++], &args
, sizeof(args
));
441 static void si_emit_ps_exports(struct si_shader_context
*ctx
, struct si_ps_exports
*exp
)
443 for (unsigned i
= 0; i
< exp
->num
; i
++)
444 ac_build_export(&ctx
->ac
, &exp
->args
[i
]);
448 * Return PS outputs in this order:
450 * v[0:3] = color0.xyzw
451 * v[4:7] = color1.xyzw
456 * vN+3 = SampleMaskIn (used for OpenGL smoothing)
458 * The alpha-ref SGPR is returned via its original location.
460 static void si_llvm_return_fs_outputs(struct ac_shader_abi
*abi
, unsigned max_outputs
,
463 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
464 struct si_shader
*shader
= ctx
->shader
;
465 struct si_shader_info
*info
= &shader
->selector
->info
;
466 LLVMBuilderRef builder
= ctx
->ac
.builder
;
467 unsigned i
, j
, first_vgpr
, vgpr
;
469 LLVMValueRef color
[8][4] = {};
470 LLVMValueRef depth
= NULL
, stencil
= NULL
, samplemask
= NULL
;
473 if (ctx
->postponed_kill
)
474 ac_build_kill_if_false(&ctx
->ac
, LLVMBuildLoad(builder
, ctx
->postponed_kill
, ""));
476 /* Read the output values. */
477 for (i
= 0; i
< info
->num_outputs
; i
++) {
478 unsigned semantic_name
= info
->output_semantic_name
[i
];
479 unsigned semantic_index
= info
->output_semantic_index
[i
];
481 switch (semantic_name
) {
482 case TGSI_SEMANTIC_COLOR
:
483 assert(semantic_index
< 8);
484 for (j
= 0; j
< 4; j
++) {
485 LLVMValueRef ptr
= addrs
[4 * i
+ j
];
486 LLVMValueRef result
= LLVMBuildLoad(builder
, ptr
, "");
487 color
[semantic_index
][j
] = result
;
490 case TGSI_SEMANTIC_POSITION
:
491 depth
= LLVMBuildLoad(builder
, addrs
[4 * i
+ 0], "");
493 case TGSI_SEMANTIC_STENCIL
:
494 stencil
= LLVMBuildLoad(builder
, addrs
[4 * i
+ 0], "");
496 case TGSI_SEMANTIC_SAMPLEMASK
:
497 samplemask
= LLVMBuildLoad(builder
, addrs
[4 * i
+ 0], "");
500 fprintf(stderr
, "Warning: GFX6 unhandled fs output type:%d\n", semantic_name
);
504 /* Fill the return structure. */
505 ret
= ctx
->return_value
;
508 ret
= LLVMBuildInsertValue(
509 builder
, ret
, ac_to_integer(&ctx
->ac
, LLVMGetParam(ctx
->main_fn
, SI_PARAM_ALPHA_REF
)),
510 SI_SGPR_ALPHA_REF
, "");
513 first_vgpr
= vgpr
= SI_SGPR_ALPHA_REF
+ 1;
514 for (i
= 0; i
< ARRAY_SIZE(color
); i
++) {
518 for (j
= 0; j
< 4; j
++)
519 ret
= LLVMBuildInsertValue(builder
, ret
, color
[i
][j
], vgpr
++, "");
522 ret
= LLVMBuildInsertValue(builder
, ret
, depth
, vgpr
++, "");
524 ret
= LLVMBuildInsertValue(builder
, ret
, stencil
, vgpr
++, "");
526 ret
= LLVMBuildInsertValue(builder
, ret
, samplemask
, vgpr
++, "");
528 /* Add the input sample mask for smoothing at the end. */
529 if (vgpr
< first_vgpr
+ PS_EPILOG_SAMPLEMASK_MIN_LOC
)
530 vgpr
= first_vgpr
+ PS_EPILOG_SAMPLEMASK_MIN_LOC
;
531 ret
= LLVMBuildInsertValue(builder
, ret
, LLVMGetParam(ctx
->main_fn
, SI_PARAM_SAMPLE_COVERAGE
),
534 ctx
->return_value
= ret
;
537 static void si_llvm_emit_polygon_stipple(struct si_shader_context
*ctx
,
538 LLVMValueRef param_rw_buffers
,
539 struct ac_arg param_pos_fixed_pt
)
541 LLVMBuilderRef builder
= ctx
->ac
.builder
;
542 LLVMValueRef slot
, desc
, offset
, row
, bit
, address
[2];
544 /* Use the fixed-point gl_FragCoord input.
545 * Since the stipple pattern is 32x32 and it repeats, just get 5 bits
546 * per coordinate to get the repeating effect.
548 address
[0] = si_unpack_param(ctx
, param_pos_fixed_pt
, 0, 5);
549 address
[1] = si_unpack_param(ctx
, param_pos_fixed_pt
, 16, 5);
551 /* Load the buffer descriptor. */
552 slot
= LLVMConstInt(ctx
->ac
.i32
, SI_PS_CONST_POLY_STIPPLE
, 0);
553 desc
= ac_build_load_to_sgpr(&ctx
->ac
, param_rw_buffers
, slot
);
555 /* The stipple pattern is 32x32, each row has 32 bits. */
556 offset
= LLVMBuildMul(builder
, address
[1], LLVMConstInt(ctx
->ac
.i32
, 4, 0), "");
557 row
= si_buffer_load_const(ctx
, desc
, offset
);
558 row
= ac_to_integer(&ctx
->ac
, row
);
559 bit
= LLVMBuildLShr(builder
, row
, address
[0], "");
560 bit
= LLVMBuildTrunc(builder
, bit
, ctx
->ac
.i1
, "");
561 ac_build_kill_if_false(&ctx
->ac
, bit
);
565 * Build the pixel shader prolog function. This handles:
566 * - two-side color selection and interpolation
567 * - overriding interpolation parameters for the API PS
568 * - polygon stippling
570 * All preloaded SGPRs and VGPRs are passed through unmodified unless they are
571 * overriden by other states. (e.g. per-sample interpolation)
572 * Interpolated colors are stored after the preloaded VGPRs.
574 void si_llvm_build_ps_prolog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
576 LLVMValueRef ret
, func
;
577 int num_returns
, i
, num_color_channels
;
579 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
581 /* Declare inputs. */
582 LLVMTypeRef return_types
[AC_MAX_ARGS
];
584 num_color_channels
= util_bitcount(key
->ps_prolog
.colors_read
);
585 assert(key
->ps_prolog
.num_input_sgprs
+ key
->ps_prolog
.num_input_vgprs
+ num_color_channels
<=
587 for (i
= 0; i
< key
->ps_prolog
.num_input_sgprs
; i
++) {
588 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, NULL
);
589 return_types
[num_returns
++] = ctx
->ac
.i32
;
592 struct ac_arg pos_fixed_pt
;
593 struct ac_arg ancillary
;
594 struct ac_arg param_sample_mask
;
595 for (i
= 0; i
< key
->ps_prolog
.num_input_vgprs
; i
++) {
596 struct ac_arg
*arg
= NULL
;
597 if (i
== key
->ps_prolog
.ancillary_vgpr_index
) {
599 } else if (i
== key
->ps_prolog
.ancillary_vgpr_index
+ 1) {
600 arg
= ¶m_sample_mask
;
601 } else if (i
== key
->ps_prolog
.num_input_vgprs
- 1) {
602 /* POS_FIXED_PT is always last. */
605 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_FLOAT
, arg
);
606 return_types
[num_returns
++] = ctx
->ac
.f32
;
609 /* Declare outputs (same as inputs + add colors if needed) */
610 for (i
= 0; i
< num_color_channels
; i
++)
611 return_types
[num_returns
++] = ctx
->ac
.f32
;
613 /* Create the function. */
614 si_llvm_create_func(ctx
, "ps_prolog", return_types
, num_returns
, 0);
617 /* Copy inputs to outputs. This should be no-op, as the registers match,
618 * but it will prevent the compiler from overwriting them unintentionally.
620 ret
= ctx
->return_value
;
621 for (i
= 0; i
< ctx
->args
.arg_count
; i
++) {
622 LLVMValueRef p
= LLVMGetParam(func
, i
);
623 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, p
, i
, "");
626 /* Polygon stippling. */
627 if (key
->ps_prolog
.states
.poly_stipple
) {
628 LLVMValueRef list
= si_prolog_get_rw_buffers(ctx
);
630 si_llvm_emit_polygon_stipple(ctx
, list
, pos_fixed_pt
);
633 if (key
->ps_prolog
.states
.bc_optimize_for_persp
||
634 key
->ps_prolog
.states
.bc_optimize_for_linear
) {
635 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
636 LLVMValueRef center
[2], centroid
[2], tmp
, bc_optimize
;
638 /* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
639 * The hw doesn't compute CENTROID if the whole wave only
640 * contains fully-covered quads.
642 * PRIM_MASK is after user SGPRs.
644 bc_optimize
= LLVMGetParam(func
, SI_PS_NUM_USER_SGPR
);
646 LLVMBuildLShr(ctx
->ac
.builder
, bc_optimize
, LLVMConstInt(ctx
->ac
.i32
, 31, 0), "");
647 bc_optimize
= LLVMBuildTrunc(ctx
->ac
.builder
, bc_optimize
, ctx
->ac
.i1
, "");
649 if (key
->ps_prolog
.states
.bc_optimize_for_persp
) {
650 /* Read PERSP_CENTER. */
651 for (i
= 0; i
< 2; i
++)
652 center
[i
] = LLVMGetParam(func
, base
+ 2 + i
);
653 /* Read PERSP_CENTROID. */
654 for (i
= 0; i
< 2; i
++)
655 centroid
[i
] = LLVMGetParam(func
, base
+ 4 + i
);
656 /* Select PERSP_CENTROID. */
657 for (i
= 0; i
< 2; i
++) {
658 tmp
= LLVMBuildSelect(ctx
->ac
.builder
, bc_optimize
, center
[i
], centroid
[i
], "");
659 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, tmp
, base
+ 4 + i
, "");
662 if (key
->ps_prolog
.states
.bc_optimize_for_linear
) {
663 /* Read LINEAR_CENTER. */
664 for (i
= 0; i
< 2; i
++)
665 center
[i
] = LLVMGetParam(func
, base
+ 8 + i
);
666 /* Read LINEAR_CENTROID. */
667 for (i
= 0; i
< 2; i
++)
668 centroid
[i
] = LLVMGetParam(func
, base
+ 10 + i
);
669 /* Select LINEAR_CENTROID. */
670 for (i
= 0; i
< 2; i
++) {
671 tmp
= LLVMBuildSelect(ctx
->ac
.builder
, bc_optimize
, center
[i
], centroid
[i
], "");
672 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, tmp
, base
+ 10 + i
, "");
677 /* Force per-sample interpolation. */
678 if (key
->ps_prolog
.states
.force_persp_sample_interp
) {
679 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
680 LLVMValueRef persp_sample
[2];
682 /* Read PERSP_SAMPLE. */
683 for (i
= 0; i
< 2; i
++)
684 persp_sample
[i
] = LLVMGetParam(func
, base
+ i
);
685 /* Overwrite PERSP_CENTER. */
686 for (i
= 0; i
< 2; i
++)
687 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_sample
[i
], base
+ 2 + i
, "");
688 /* Overwrite PERSP_CENTROID. */
689 for (i
= 0; i
< 2; i
++)
690 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_sample
[i
], base
+ 4 + i
, "");
692 if (key
->ps_prolog
.states
.force_linear_sample_interp
) {
693 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
694 LLVMValueRef linear_sample
[2];
696 /* Read LINEAR_SAMPLE. */
697 for (i
= 0; i
< 2; i
++)
698 linear_sample
[i
] = LLVMGetParam(func
, base
+ 6 + i
);
699 /* Overwrite LINEAR_CENTER. */
700 for (i
= 0; i
< 2; i
++)
701 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_sample
[i
], base
+ 8 + i
, "");
702 /* Overwrite LINEAR_CENTROID. */
703 for (i
= 0; i
< 2; i
++)
704 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_sample
[i
], base
+ 10 + i
, "");
707 /* Force center interpolation. */
708 if (key
->ps_prolog
.states
.force_persp_center_interp
) {
709 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
710 LLVMValueRef persp_center
[2];
712 /* Read PERSP_CENTER. */
713 for (i
= 0; i
< 2; i
++)
714 persp_center
[i
] = LLVMGetParam(func
, base
+ 2 + i
);
715 /* Overwrite PERSP_SAMPLE. */
716 for (i
= 0; i
< 2; i
++)
717 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_center
[i
], base
+ i
, "");
718 /* Overwrite PERSP_CENTROID. */
719 for (i
= 0; i
< 2; i
++)
720 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, persp_center
[i
], base
+ 4 + i
, "");
722 if (key
->ps_prolog
.states
.force_linear_center_interp
) {
723 unsigned i
, base
= key
->ps_prolog
.num_input_sgprs
;
724 LLVMValueRef linear_center
[2];
726 /* Read LINEAR_CENTER. */
727 for (i
= 0; i
< 2; i
++)
728 linear_center
[i
] = LLVMGetParam(func
, base
+ 8 + i
);
729 /* Overwrite LINEAR_SAMPLE. */
730 for (i
= 0; i
< 2; i
++)
731 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_center
[i
], base
+ 6 + i
, "");
732 /* Overwrite LINEAR_CENTROID. */
733 for (i
= 0; i
< 2; i
++)
734 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, linear_center
[i
], base
+ 10 + i
, "");
737 /* Interpolate colors. */
738 unsigned color_out_idx
= 0;
739 for (i
= 0; i
< 2; i
++) {
740 unsigned writemask
= (key
->ps_prolog
.colors_read
>> (i
* 4)) & 0xf;
741 unsigned face_vgpr
= key
->ps_prolog
.num_input_sgprs
+ key
->ps_prolog
.face_vgpr_index
;
742 LLVMValueRef interp
[2], color
[4];
743 LLVMValueRef interp_ij
= NULL
, prim_mask
= NULL
, face
= NULL
;
748 /* If the interpolation qualifier is not CONSTANT (-1). */
749 if (key
->ps_prolog
.color_interp_vgpr_index
[i
] != -1) {
750 unsigned interp_vgpr
=
751 key
->ps_prolog
.num_input_sgprs
+ key
->ps_prolog
.color_interp_vgpr_index
[i
];
753 /* Get the (i,j) updated by bc_optimize handling. */
754 interp
[0] = LLVMBuildExtractValue(ctx
->ac
.builder
, ret
, interp_vgpr
, "");
755 interp
[1] = LLVMBuildExtractValue(ctx
->ac
.builder
, ret
, interp_vgpr
+ 1, "");
756 interp_ij
= ac_build_gather_values(&ctx
->ac
, interp
, 2);
759 /* Use the absolute location of the input. */
760 prim_mask
= LLVMGetParam(func
, SI_PS_NUM_USER_SGPR
);
762 if (key
->ps_prolog
.states
.color_two_side
) {
763 face
= LLVMGetParam(func
, face_vgpr
);
764 face
= ac_to_integer(&ctx
->ac
, face
);
767 interp_fs_color(ctx
, key
->ps_prolog
.color_attr_index
[i
], i
, key
->ps_prolog
.num_interp_inputs
,
768 key
->ps_prolog
.colors_read
, interp_ij
, prim_mask
, face
, color
);
771 unsigned chan
= u_bit_scan(&writemask
);
772 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, color
[chan
],
773 ctx
->args
.arg_count
+ color_out_idx
++, "");
777 /* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
780 * "When per-sample shading is active due to the use of a fragment
781 * input qualified by sample or due to the use of the gl_SampleID
782 * or gl_SamplePosition variables, only the bit for the current
783 * sample is set in gl_SampleMaskIn. When state specifies multiple
784 * fragment shader invocations for a given fragment, the sample
785 * mask for any single fragment shader invocation may specify a
786 * subset of the covered samples for the fragment. In this case,
787 * the bit corresponding to each covered sample will be set in
788 * exactly one fragment shader invocation."
790 * The samplemask loaded by hardware is always the coverage of the
791 * entire pixel/fragment, so mask bits out based on the sample ID.
793 if (key
->ps_prolog
.states
.samplemask_log_ps_iter
) {
794 /* The bit pattern matches that used by fixed function fragment
796 static const uint16_t ps_iter_masks
[] = {
797 0xffff, /* not used */
798 0x5555, 0x1111, 0x0101, 0x0001,
800 assert(key
->ps_prolog
.states
.samplemask_log_ps_iter
< ARRAY_SIZE(ps_iter_masks
));
802 uint32_t ps_iter_mask
= ps_iter_masks
[key
->ps_prolog
.states
.samplemask_log_ps_iter
];
803 LLVMValueRef sampleid
= si_unpack_param(ctx
, ancillary
, 8, 4);
804 LLVMValueRef samplemask
= ac_get_arg(&ctx
->ac
, param_sample_mask
);
806 samplemask
= ac_to_integer(&ctx
->ac
, samplemask
);
808 LLVMBuildAnd(ctx
->ac
.builder
, samplemask
,
809 LLVMBuildShl(ctx
->ac
.builder
, LLVMConstInt(ctx
->ac
.i32
, ps_iter_mask
, false),
812 samplemask
= ac_to_float(&ctx
->ac
, samplemask
);
814 ret
= LLVMBuildInsertValue(ctx
->ac
.builder
, ret
, samplemask
, param_sample_mask
.arg_index
, "");
817 /* Tell LLVM to insert WQM instruction sequence when needed. */
818 if (key
->ps_prolog
.wqm
) {
819 LLVMAddTargetDependentFunctionAttr(func
, "amdgpu-ps-wqm-outputs", "");
822 si_llvm_build_ret(ctx
, ret
);
826 * Build the pixel shader epilog function. This handles everything that must be
827 * emulated for pixel shader exports. (alpha-test, format conversions, etc)
829 void si_llvm_build_ps_epilog(struct si_shader_context
*ctx
, union si_shader_part_key
*key
)
831 LLVMValueRef depth
= NULL
, stencil
= NULL
, samplemask
= NULL
;
833 struct si_ps_exports exp
= {};
835 memset(&ctx
->args
, 0, sizeof(ctx
->args
));
837 /* Declare input SGPRs. */
838 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->rw_buffers
);
839 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->bindless_samplers_and_images
);
840 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->const_and_shader_buffers
);
841 ac_add_arg(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_INT
, &ctx
->samplers_and_images
);
842 si_add_arg_checked(&ctx
->args
, AC_ARG_SGPR
, 1, AC_ARG_FLOAT
, NULL
, SI_PARAM_ALPHA_REF
);
844 /* Declare input VGPRs. */
845 unsigned required_num_params
=
846 ctx
->args
.num_sgprs_used
+ util_bitcount(key
->ps_epilog
.colors_written
) * 4 +
847 key
->ps_epilog
.writes_z
+ key
->ps_epilog
.writes_stencil
+ key
->ps_epilog
.writes_samplemask
;
849 required_num_params
=
850 MAX2(required_num_params
, ctx
->args
.num_sgprs_used
+ PS_EPILOG_SAMPLEMASK_MIN_LOC
+ 1);
852 while (ctx
->args
.arg_count
< required_num_params
)
853 ac_add_arg(&ctx
->args
, AC_ARG_VGPR
, 1, AC_ARG_FLOAT
, NULL
);
855 /* Create the function. */
856 si_llvm_create_func(ctx
, "ps_epilog", NULL
, 0, 0);
857 /* Disable elimination of unused inputs. */
858 ac_llvm_add_target_dep_function_attr(ctx
->main_fn
, "InitialPSInputAddr", 0xffffff);
860 /* Process colors. */
861 unsigned vgpr
= ctx
->args
.num_sgprs_used
;
862 unsigned colors_written
= key
->ps_epilog
.colors_written
;
863 int last_color_export
= -1;
865 /* Find the last color export. */
866 if (!key
->ps_epilog
.writes_z
&& !key
->ps_epilog
.writes_stencil
&&
867 !key
->ps_epilog
.writes_samplemask
) {
868 unsigned spi_format
= key
->ps_epilog
.states
.spi_shader_col_format
;
870 /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
871 if (colors_written
== 0x1 && key
->ps_epilog
.states
.last_cbuf
> 0) {
872 /* Just set this if any of the colorbuffers are enabled. */
873 if (spi_format
& ((1ull << (4 * (key
->ps_epilog
.states
.last_cbuf
+ 1))) - 1))
874 last_color_export
= 0;
876 for (i
= 0; i
< 8; i
++)
877 if (colors_written
& (1 << i
) && (spi_format
>> (i
* 4)) & 0xf)
878 last_color_export
= i
;
882 unsigned num_compacted_mrts
= 0;
883 while (colors_written
) {
884 LLVMValueRef color
[4];
885 int output_index
= u_bit_scan(&colors_written
);
887 for (i
= 0; i
< 4; i
++)
888 color
[i
] = LLVMGetParam(ctx
->main_fn
, vgpr
++);
890 if (si_export_mrt_color(ctx
, color
, output_index
, num_compacted_mrts
,
891 ctx
->args
.arg_count
- 1,
892 output_index
== last_color_export
, &exp
))
893 num_compacted_mrts
++;
896 /* Process depth, stencil, samplemask. */
897 if (key
->ps_epilog
.writes_z
)
898 depth
= LLVMGetParam(ctx
->main_fn
, vgpr
++);
899 if (key
->ps_epilog
.writes_stencil
)
900 stencil
= LLVMGetParam(ctx
->main_fn
, vgpr
++);
901 if (key
->ps_epilog
.writes_samplemask
)
902 samplemask
= LLVMGetParam(ctx
->main_fn
, vgpr
++);
904 if (depth
|| stencil
|| samplemask
)
905 si_export_mrt_z(ctx
, depth
, stencil
, samplemask
, &exp
);
906 else if (last_color_export
== -1)
907 ac_build_export_null(&ctx
->ac
);
910 si_emit_ps_exports(ctx
, &exp
);
913 LLVMBuildRetVoid(ctx
->ac
.builder
);
916 void si_llvm_build_monolithic_ps(struct si_shader_context
*ctx
, struct si_shader
*shader
)
918 LLVMValueRef parts
[3];
919 unsigned num_parts
= 0, main_index
;
920 LLVMValueRef main_fn
= ctx
->main_fn
;
922 union si_shader_part_key prolog_key
;
923 si_get_ps_prolog_key(shader
, &prolog_key
, false);
925 if (si_need_ps_prolog(&prolog_key
)) {
926 si_llvm_build_ps_prolog(ctx
, &prolog_key
);
927 parts
[num_parts
++] = ctx
->main_fn
;
930 main_index
= num_parts
;
931 parts
[num_parts
++] = main_fn
;
933 union si_shader_part_key epilog_key
;
934 si_get_ps_epilog_key(shader
, &epilog_key
);
935 si_llvm_build_ps_epilog(ctx
, &epilog_key
);
936 parts
[num_parts
++] = ctx
->main_fn
;
938 si_build_wrapper_function(ctx
, parts
, num_parts
, main_index
, 0);
941 void si_llvm_init_ps_callbacks(struct si_shader_context
*ctx
)
943 ctx
->abi
.emit_outputs
= si_llvm_return_fs_outputs
;
944 ctx
->abi
.load_sample_position
= load_sample_position
;
945 ctx
->abi
.load_sample_mask_in
= load_sample_mask_in
;
946 ctx
->abi
.emit_fbfetch
= si_nir_emit_fbfetch
;