- struct lp_build_tgsi_context bld_base;
- struct gallivm_state gallivm;
- struct ac_llvm_context ac;
- struct si_shader *shader;
- struct si_screen *screen;
-
- unsigned type; /* PIPE_SHADER_* specifies the type of shader. */
-
- /* For clamping the non-constant index in resource indexing: */
- unsigned num_const_buffers;
- unsigned num_shader_buffers;
- unsigned num_images;
- unsigned num_samplers;
-
- /* Whether the prolog will be compiled separately. */
- bool separate_prolog;
-
- struct ac_shader_abi abi;
-
- /** This function is responsible for initilizing the inputs array and will be
- * called once for each input declared in the TGSI shader.
- */
- void (*load_input)(struct si_shader_context *,
- unsigned input_index,
- const struct tgsi_full_declaration *decl,
- LLVMValueRef out[4]);
-
- /** This array contains the input values for the shader. Typically these
- * values will be in the form of a target intrinsic that will inform the
- * backend how to load the actual inputs to the shader.
- */
- struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS];
- LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
- LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
- LLVMValueRef addrs[RADEON_LLVM_MAX_ADDRS][TGSI_NUM_CHANNELS];
-
- /** This pointer is used to contain the temporary values.
- * The amount of temporary used in tgsi can't be bound to a max value and
- * thus we must allocate this array at runtime.
- */
- LLVMValueRef *temps;
- unsigned temps_count;
- LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
-
- LLVMValueRef *imms;
- unsigned imms_num;
-
- struct si_llvm_flow *flow;
- unsigned flow_depth;
- unsigned flow_depth_max;
-
- struct lp_build_if_state merged_wrap_if_state;
-
- struct tgsi_array_info *temp_arrays;
- LLVMValueRef *temp_array_allocas;
-
- LLVMValueRef undef_alloca;
-
- LLVMValueRef main_fn;
- LLVMTypeRef return_type;
-
- /* Parameter indices for LLVMGetParam. */
- int param_rw_buffers;
- int param_const_and_shader_buffers;
- int param_samplers_and_images;
- int param_bindless_samplers_and_images;
- /* Common inputs for merged shaders. */
- int param_merged_wave_info;
- int param_merged_scratch_offset;
- /* API VS */
- int param_vertex_buffers;
- int param_rel_auto_id;
- int param_vs_prim_id;
- int param_vertex_index0;
- /* VS states and layout of LS outputs / TCS inputs at the end
- * [0] = clamp vertex color
- * [1] = indexed
- * [8:20] = stride between patches in DW = num_inputs * num_vertices * 4
- * max = 32*32*4 + 32*4
- * [24:31] = stride between vertices in DW = num_inputs * 4
- * max = 32*4
- */
- int param_vs_state_bits;
- int param_vs_blit_inputs;
- /* HW VS */
- int param_streamout_config;
- int param_streamout_write_index;
- int param_streamout_offset[4];
-
- /* API TCS & TES */
- /* Layout of TCS outputs in the offchip buffer
- * # 6 bits
- * [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40)
- * # 6 bits
- * [6:11] = the number of output vertices per patch, max = 32
- * # 20 bits
- * [12:31] = the offset of per patch attributes in the buffer in bytes.
- * max = NUM_PATCHES*32*32*16
- */
- int param_tcs_offchip_layout;
-
- /* API TCS */
- /* Offsets where TCS outputs and TCS patch outputs live in LDS:
- * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
- * [16:31] = TCS output patch0 offset for per-patch / 16
- * max = (NUM_PATCHES + 1) * 32*32
- */
- int param_tcs_out_lds_offsets;
- /* Layout of TCS outputs / TES inputs:
- * [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
- * max = 32*32*4 + 32*4
- * [13:18] = gl_PatchVerticesIn, max = 32
- * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
- */
- int param_tcs_out_lds_layout;
- int param_tcs_offchip_offset;
- int param_tcs_factor_offset;
-
- /* API TES */
- int param_tes_offchip_addr;
- int param_tes_u;
- int param_tes_v;
- int param_tes_rel_patch_id;
- /* HW ES */
- int param_es2gs_offset;
- /* API GS */
- int param_gs2vs_offset;
- int param_gs_wave_id; /* GFX6 */
- LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */
- int param_gs_vtx01_offset; /* in dwords (GFX9) */
- int param_gs_vtx23_offset; /* in dwords (GFX9) */
- int param_gs_vtx45_offset; /* in dwords (GFX9) */
- /* CS */
- int param_block_size;
-
- LLVMTargetMachineRef tm;
-
- /* Preloaded descriptors. */
- LLVMValueRef esgs_ring;
- LLVMValueRef gsvs_ring[4];
- LLVMValueRef tess_offchip_ring;
-
- LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
- LLVMValueRef gs_next_vertex[4];
- LLVMValueRef postponed_kill;
- LLVMValueRef return_value;
-
- LLVMTypeRef voidt;
- LLVMTypeRef i1;
- LLVMTypeRef i8;
- LLVMTypeRef i32;
- LLVMTypeRef i64;
- LLVMTypeRef i128;
- LLVMTypeRef f32;
- LLVMTypeRef v2i32;
- LLVMTypeRef v4i32;
- LLVMTypeRef v4f32;
- LLVMTypeRef v8i32;
-
- LLVMValueRef i32_0;
- LLVMValueRef i32_1;
+ struct ac_llvm_context ac;
+ struct si_shader *shader;
+ struct si_screen *screen;
+
+ unsigned type; /* PIPE_SHADER_* specifies the type of shader. */
+
+ /* For clamping the non-constant index in resource indexing: */
+ unsigned num_const_buffers;
+ unsigned num_shader_buffers;
+ unsigned num_images;
+ unsigned num_samplers;
+
+ struct ac_shader_args args;
+ struct ac_shader_abi abi;
+
+ LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
+
+ LLVMBasicBlockRef merged_wrap_if_entry_block;
+ int merged_wrap_if_label;
+
+ LLVMValueRef main_fn;
+ LLVMTypeRef return_type;
+
+ struct ac_arg const_and_shader_buffers;
+ struct ac_arg samplers_and_images;
+
+ /* For merged shaders, the per-stage descriptors for the stage other
+ * than the one we're processing, used to pass them through from the
+ * first stage to the second.
+ */
+ struct ac_arg other_const_and_shader_buffers;
+ struct ac_arg other_samplers_and_images;
+
+ struct ac_arg rw_buffers;
+ struct ac_arg bindless_samplers_and_images;
+ /* Common inputs for merged shaders. */
+ struct ac_arg merged_wave_info;
+ struct ac_arg merged_scratch_offset;
+ struct ac_arg small_prim_cull_info;
+ /* API VS */
+ struct ac_arg vertex_buffers;
+ struct ac_arg vb_descriptors[5];
+ struct ac_arg rel_auto_id;
+ struct ac_arg vs_prim_id;
+ struct ac_arg vertex_index0;
+ /* VS states and layout of LS outputs / TCS inputs at the end
+ * [0] = clamp vertex color
+ * [1] = indexed
+ * [2:3] = NGG: output primitive type
+ * [4:5] = NGG: provoking vertex index
+ * [6] = NGG: streamout queries enabled
+ * [7:10] = NGG: small prim filter precision = num_samples / quant_mode,
+ * but in reality it's: 1/2^n, from 1/16 to 1/4096 = 1/2^4 to 1/2^12
+ * Only the first 4 bits of the exponent are stored.
+ * Set it like this: (fui(num_samples / quant_mode) >> 23)
+ * Expand to FP32 like this: ((0x70 | value) << 23);
+ * With 0x70 = 112, we get 2^(112 + value - 127) = 2^(value - 15)
+ * = 1/2^(15 - value) in FP32
+ * [11:23] = stride between patches in DW = num_inputs * num_vertices * 4
+ * max = 32*32*4 + 32*4
+ * [24:31] = stride between vertices in DW = num_inputs * 4
+ * max = 32*4
+ */
+ struct ac_arg vs_state_bits;
+ struct ac_arg vs_blit_inputs;
+ struct ac_arg ngg_old_thread_id; /* generated by the NGG cull shader */
+ /* HW VS */
+ struct ac_arg streamout_config;
+ struct ac_arg streamout_write_index;
+ struct ac_arg streamout_offset[4];
+
+ /* API TCS & TES */
+ /* Layout of TCS outputs in the offchip buffer
+ * # 6 bits
+ * [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40)
+ * # 6 bits
+ * [6:11] = the number of output vertices per patch, max = 32
+ * # 20 bits
+ * [12:31] = the offset of per patch attributes in the buffer in bytes.
+ * max = NUM_PATCHES*32*32*16
+ */
+ struct ac_arg tcs_offchip_layout;
+
+ /* API TCS */
+ /* Offsets where TCS outputs and TCS patch outputs live in LDS:
+ * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
+ * [16:31] = TCS output patch0 offset for per-patch / 16
+ * max = (NUM_PATCHES + 1) * 32*32
+ */
+ struct ac_arg tcs_out_lds_offsets;
+ /* Layout of TCS outputs / TES inputs:
+ * [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
+ * max = 32*32*4 + 32*4
+ * [13:18] = gl_PatchVerticesIn, max = 32
+ * [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
+ */
+ struct ac_arg tcs_out_lds_layout;
+ struct ac_arg tcs_offchip_offset;
+ struct ac_arg tcs_factor_offset;
+
+ /* API TES */
+ struct ac_arg tes_offchip_addr;
+ struct ac_arg tes_u;
+ struct ac_arg tes_v;
+ struct ac_arg tes_rel_patch_id;
+ /* HW ES */
+ struct ac_arg es2gs_offset;
+ /* HW GS */
+ /* On gfx10:
+ * - bits 0..11: ordered_wave_id
+ * - bits 12..20: number of vertices in group
+ * - bits 22..30: number of primitives in group
+ */
+ struct ac_arg gs_tg_info;
+ /* API GS */
+ struct ac_arg gs2vs_offset;
+ struct ac_arg gs_wave_id; /* GFX6 */
+ struct ac_arg gs_vtx_offset[6]; /* in dwords (GFX6) */
+ struct ac_arg gs_vtx01_offset; /* in dwords (GFX9) */
+ struct ac_arg gs_vtx23_offset; /* in dwords (GFX9) */
+ struct ac_arg gs_vtx45_offset; /* in dwords (GFX9) */
+ /* PS */
+ struct ac_arg pos_fixed_pt;
+ /* CS */
+ struct ac_arg block_size;
+ struct ac_arg cs_user_data;
+ struct ac_arg cs_shaderbuf[3];
+ struct ac_arg cs_image[3];
+
+ struct ac_llvm_compiler *compiler;
+
+ /* Preloaded descriptors. */
+ LLVMValueRef esgs_ring;
+ LLVMValueRef gsvs_ring[4];
+ LLVMValueRef tess_offchip_ring;
+
+ LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
+ LLVMValueRef gs_next_vertex[4];
+ LLVMValueRef gs_curprim_verts[4];
+ LLVMValueRef gs_generated_prims[4];
+ LLVMValueRef gs_ngg_emit;
+ LLVMValueRef gs_ngg_scratch;
+ LLVMValueRef postponed_kill;
+ LLVMValueRef return_value;