2 * Copyright 2016 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #ifndef SI_SHADER_PRIVATE_H
26 #define SI_SHADER_PRIVATE_H
28 #include "si_shader.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_tgsi.h"
32 #include "tgsi/tgsi_parse.h"
33 #include "ac_shader_abi.h"
35 #include <llvm-c/Core.h>
36 #include <llvm-c/TargetMachine.h>
38 struct pipe_debug_callback
;
40 #define RADEON_LLVM_MAX_INPUT_SLOTS 32
41 #define RADEON_LLVM_MAX_INPUTS 32 * 4
42 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
44 #define RADEON_LLVM_MAX_SYSTEM_VALUES 11
45 #define RADEON_LLVM_MAX_ADDRS 16
47 struct si_shader_output_values
{
48 LLVMValueRef values
[4];
49 unsigned semantic_name
;
50 unsigned semantic_index
;
51 ubyte vertex_stream
[4];
54 struct si_shader_context
{
55 struct lp_build_tgsi_context bld_base
;
56 struct gallivm_state gallivm
;
57 struct ac_llvm_context ac
;
58 struct si_shader
*shader
;
59 struct si_screen
*screen
;
61 unsigned type
; /* PIPE_SHADER_* specifies the type of shader. */
63 /* For clamping the non-constant index in resource indexing: */
64 unsigned num_const_buffers
;
65 unsigned num_shader_buffers
;
67 unsigned num_samplers
;
69 struct ac_shader_args args
;
70 struct ac_shader_abi abi
;
72 /** This function is responsible for initilizing the inputs array and will be
73 * called once for each input declared in the TGSI shader.
75 void (*load_input
)(struct si_shader_context
*,
77 const struct tgsi_full_declaration
*decl
,
80 /** This array contains the input values for the shader. Typically these
81 * values will be in the form of a target intrinsic that will inform the
82 * backend how to load the actual inputs to the shader.
84 struct tgsi_full_declaration input_decls
[RADEON_LLVM_MAX_INPUT_SLOTS
];
85 LLVMValueRef inputs
[RADEON_LLVM_MAX_INPUTS
];
86 LLVMValueRef outputs
[RADEON_LLVM_MAX_OUTPUTS
][TGSI_NUM_CHANNELS
];
87 LLVMValueRef addrs
[RADEON_LLVM_MAX_ADDRS
][TGSI_NUM_CHANNELS
];
89 /** This pointer is used to contain the temporary values.
90 * The amount of temporary used in tgsi can't be bound to a max value and
91 * thus we must allocate this array at runtime.
95 LLVMValueRef system_values
[RADEON_LLVM_MAX_SYSTEM_VALUES
];
100 LLVMBasicBlockRef merged_wrap_if_entry_block
;
101 int merged_wrap_if_label
;
103 struct tgsi_array_info
*temp_arrays
;
104 LLVMValueRef
*temp_array_allocas
;
106 LLVMValueRef undef_alloca
;
108 LLVMValueRef main_fn
;
109 LLVMTypeRef return_type
;
111 struct ac_arg const_and_shader_buffers
;
112 struct ac_arg samplers_and_images
;
114 /* For merged shaders, the per-stage descriptors for the stage other
115 * than the one we're processing, used to pass them through from the
116 * first stage to the second.
118 struct ac_arg other_const_and_shader_buffers
;
119 struct ac_arg other_samplers_and_images
;
121 struct ac_arg rw_buffers
;
122 struct ac_arg bindless_samplers_and_images
;
123 /* Common inputs for merged shaders. */
124 struct ac_arg merged_wave_info
;
125 struct ac_arg merged_scratch_offset
;
127 struct ac_arg vertex_buffers
;
128 struct ac_arg rel_auto_id
;
129 struct ac_arg vs_prim_id
;
130 struct ac_arg vertex_index0
;
131 /* VS states and layout of LS outputs / TCS inputs at the end
132 * [0] = clamp vertex color
134 * [8:20] = stride between patches in DW = num_inputs * num_vertices * 4
135 * max = 32*32*4 + 32*4
136 * [24:31] = stride between vertices in DW = num_inputs * 4
139 struct ac_arg vs_state_bits
;
140 struct ac_arg vs_blit_inputs
;
142 struct ac_arg streamout_config
;
143 struct ac_arg streamout_write_index
;
144 struct ac_arg streamout_offset
[4];
147 /* Layout of TCS outputs in the offchip buffer
149 * [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40)
151 * [6:11] = the number of output vertices per patch, max = 32
153 * [12:31] = the offset of per patch attributes in the buffer in bytes.
154 * max = NUM_PATCHES*32*32*16
156 struct ac_arg tcs_offchip_layout
;
159 /* Offsets where TCS outputs and TCS patch outputs live in LDS:
160 * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
161 * [16:31] = TCS output patch0 offset for per-patch / 16
162 * max = (NUM_PATCHES + 1) * 32*32
164 struct ac_arg tcs_out_lds_offsets
;
165 /* Layout of TCS outputs / TES inputs:
166 * [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
167 * max = 32*32*4 + 32*4
168 * [13:18] = gl_PatchVerticesIn, max = 32
169 * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
171 struct ac_arg tcs_out_lds_layout
;
172 struct ac_arg tcs_offchip_offset
;
173 struct ac_arg tcs_factor_offset
;
176 struct ac_arg tes_offchip_addr
;
179 struct ac_arg tes_rel_patch_id
;
181 struct ac_arg es2gs_offset
;
184 * - bits 0..10: ordered_wave_id
185 * - bits 12..20: number of vertices in group
186 * - bits 22..30: number of primitives in group
188 struct ac_arg gs_tg_info
;
190 struct ac_arg gs2vs_offset
;
191 struct ac_arg gs_wave_id
; /* GFX6 */
192 struct ac_arg gs_vtx_offset
[6]; /* in dwords (GFX6) */
193 struct ac_arg gs_vtx01_offset
; /* in dwords (GFX9) */
194 struct ac_arg gs_vtx23_offset
; /* in dwords (GFX9) */
195 struct ac_arg gs_vtx45_offset
; /* in dwords (GFX9) */
197 struct ac_arg pos_fixed_pt
;
199 struct ac_arg block_size
;
200 struct ac_arg cs_user_data
;
202 struct ac_llvm_compiler
*compiler
;
204 /* Preloaded descriptors. */
205 LLVMValueRef esgs_ring
;
206 LLVMValueRef gsvs_ring
[4];
207 LLVMValueRef tess_offchip_ring
;
209 LLVMValueRef invoc0_tess_factors
[6]; /* outer[4], inner[2] */
210 LLVMValueRef gs_next_vertex
[4];
211 LLVMValueRef gs_curprim_verts
[4];
212 LLVMValueRef gs_generated_prims
[4];
213 LLVMValueRef gs_ngg_emit
;
214 LLVMValueRef gs_ngg_scratch
;
215 LLVMValueRef postponed_kill
;
216 LLVMValueRef return_value
;
232 LLVMValueRef i1false
;
236 static inline struct si_shader_context
*
237 si_shader_context(struct lp_build_tgsi_context
*bld_base
)
239 return (struct si_shader_context
*)bld_base
;
242 static inline struct si_shader_context
*
243 si_shader_context_from_abi(struct ac_shader_abi
*abi
)
245 struct si_shader_context
*ctx
= NULL
;
246 return container_of(abi
, ctx
, abi
);
249 void si_create_function(struct si_shader_context
*ctx
,
251 LLVMTypeRef
*returns
, unsigned num_returns
,
252 unsigned max_workgroup_size
);
253 unsigned si_llvm_compile(LLVMModuleRef M
, struct si_shader_binary
*binary
,
254 struct ac_llvm_compiler
*compiler
,
255 struct pipe_debug_callback
*debug
,
256 bool less_optimized
, unsigned wave_size
);
258 LLVMTypeRef
tgsi2llvmtype(struct lp_build_tgsi_context
*bld_base
,
259 enum tgsi_opcode_type type
);
261 LLVMValueRef
bitcast(struct lp_build_tgsi_context
*bld_base
,
262 enum tgsi_opcode_type type
, LLVMValueRef value
);
264 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
268 void si_llvm_context_init(struct si_shader_context
*ctx
,
269 struct si_screen
*sscreen
,
270 struct ac_llvm_compiler
*compiler
,
272 unsigned ballot_mask_bits
);
273 void si_llvm_context_set_ir(struct si_shader_context
*ctx
,
274 struct si_shader
*shader
,
275 struct nir_shader
*nir
);
277 void si_llvm_create_func(struct si_shader_context
*ctx
,
279 LLVMTypeRef
*return_types
, unsigned num_return_elems
);
281 void si_llvm_dispose(struct si_shader_context
*ctx
);
283 void si_llvm_optimize_module(struct si_shader_context
*ctx
);
285 LLVMValueRef
si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context
*bld_base
,
290 LLVMValueRef
si_llvm_emit_fetch(struct lp_build_tgsi_context
*bld_base
,
291 const struct tgsi_full_src_register
*reg
,
292 enum tgsi_opcode_type type
,
295 void si_llvm_emit_kill(struct ac_shader_abi
*abi
, LLVMValueRef visible
);
297 LLVMValueRef
si_nir_load_input_tes(struct ac_shader_abi
*abi
,
299 LLVMValueRef vertex_index
,
300 LLVMValueRef param_index
,
301 unsigned const_index
,
303 unsigned driver_location
,
305 unsigned num_components
,
310 LLVMValueRef
si_llvm_load_input_gs(struct ac_shader_abi
*abi
,
311 unsigned input_index
,
312 unsigned vtx_offset_param
,
316 LLVMValueRef
si_nir_lookup_interp_param(struct ac_shader_abi
*abi
,
317 enum glsl_interp_mode interp
,
320 void si_llvm_emit_store(struct lp_build_tgsi_context
*bld_base
,
321 const struct tgsi_full_instruction
*inst
,
322 const struct tgsi_opcode_info
*info
,
324 LLVMValueRef dst
[4]);
326 LLVMValueRef
si_get_indirect_index(struct si_shader_context
*ctx
,
327 const struct tgsi_ind_register
*ind
,
328 unsigned addr_mul
, int rel_index
);
329 LLVMValueRef
si_get_bounded_indirect_index(struct si_shader_context
*ctx
,
330 const struct tgsi_ind_register
*ind
,
331 int rel_index
, unsigned num
);
332 LLVMValueRef
si_get_sample_id(struct si_shader_context
*ctx
);
334 void si_shader_context_init_alu(struct si_shader_context
*ctx
);
335 void si_shader_context_init_mem(struct si_shader_context
*ctx
);
337 LLVMValueRef
si_load_sampler_desc(struct si_shader_context
*ctx
,
338 LLVMValueRef list
, LLVMValueRef index
,
339 enum ac_descriptor_type type
);
340 LLVMValueRef
si_load_image_desc(struct si_shader_context
*ctx
,
341 LLVMValueRef list
, LLVMValueRef index
,
342 enum ac_descriptor_type desc_type
,
343 bool uses_store
, bool bindless
);
344 LLVMValueRef
si_nir_emit_fbfetch(struct ac_shader_abi
*abi
);
346 void si_load_system_value(struct si_shader_context
*ctx
,
348 const struct tgsi_full_declaration
*decl
);
349 void si_declare_compute_memory(struct si_shader_context
*ctx
);
350 void si_tgsi_declare_compute_memory(struct si_shader_context
*ctx
,
351 const struct tgsi_full_declaration
*decl
);
353 LLVMValueRef
si_get_primitive_id(struct si_shader_context
*ctx
,
355 void si_llvm_export_vs(struct si_shader_context
*ctx
,
356 struct si_shader_output_values
*outputs
,
358 void si_emit_streamout_output(struct si_shader_context
*ctx
,
359 LLVMValueRef
const *so_buffers
,
360 LLVMValueRef
const *so_write_offsets
,
361 struct pipe_stream_output
*stream_out
,
362 struct si_shader_output_values
*shader_out
);
364 void si_llvm_load_input_vs(
365 struct si_shader_context
*ctx
,
366 unsigned input_index
,
367 LLVMValueRef out
[4]);
368 void si_llvm_load_input_fs(
369 struct si_shader_context
*ctx
,
370 unsigned input_index
,
371 LLVMValueRef out
[4]);
373 bool si_nir_build_llvm(struct si_shader_context
*ctx
, struct nir_shader
*nir
);
375 LLVMValueRef
si_unpack_param(struct si_shader_context
*ctx
,
376 struct ac_arg param
, unsigned rshift
,
378 LLVMValueRef
si_is_es_thread(struct si_shader_context
*ctx
);
379 LLVMValueRef
si_is_gs_thread(struct si_shader_context
*ctx
);
381 void gfx10_emit_ngg_epilogue(struct ac_shader_abi
*abi
,
382 unsigned max_outputs
,
383 LLVMValueRef
*addrs
);
384 void gfx10_ngg_gs_emit_vertex(struct si_shader_context
*ctx
,
386 LLVMValueRef
*addrs
);
387 void gfx10_ngg_gs_emit_prologue(struct si_shader_context
*ctx
);
388 void gfx10_ngg_gs_emit_epilogue(struct si_shader_context
*ctx
);
389 void gfx10_ngg_calculate_subgroup_info(struct si_shader
*shader
);