union si_shader_part_key *key);
static void si_build_ps_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
+static void si_fix_resource_usage(struct si_screen *sscreen,
+ struct si_shader *shader);
/* Ideally pass the sample mask input to the PS epilog as v14, which
* is its usual location, so that the shader doesn't have to add v_mov.
/* Do multiple loads for special formats. */
switch (fix_fetch) {
+ case SI_FIX_FETCH_RG_64_FLOAT:
+ num_fetches = 1; /* 1 2-dword or 4-dword load */
+ fetch_stride = 0;
+ if (util_last_bit(info->input_usage_mask[input_index]) >= 2)
+ num_channels = 4; /* 2 doubles in 4 dwords */
+ else
+ num_channels = 2; /* 1 double in 2 dwords */
+ break;
case SI_FIX_FETCH_RGB_64_FLOAT:
num_fetches = 3; /* 3 2-dword loads */
fetch_stride = 8;
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
LLVMValueRef desc0, desc1;
- if (HAVE_32BIT_POINTERS) {
- desc0 = ptr;
- desc1 = LLVMConstInt(ctx->i32,
- S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
- } else {
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
- desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
- desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
- /* Mask out all bits except BASE_ADDRESS_HI. */
- desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
- LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
- }
+ desc0 = ptr;
+ desc1 = LLVMConstInt(ctx->i32,
+ S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
LLVMValueRef desc_elems[] = {
desc0,
unsigned param, unsigned return_index)
{
LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef ptr, lo, hi;
-
- if (HAVE_32BIT_POINTERS) {
- ptr = LLVMGetParam(ctx->main_fn, param);
- ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
- return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
- }
-
- ptr = LLVMGetParam(ctx->main_fn, param);
- ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
- ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
- lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
- hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
- ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
- return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, "");
+ LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, param);
+ ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
+ return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
}
/* This only writes the tessellation factor levels. */
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
- if (HAVE_32BIT_POINTERS)
- ret = si_insert_input_ptr(ctx, ret, 1, 1);
+ ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
8 + SI_SGPR_VS_STATE_BITS);
-#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
- 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
- if (HAVE_32BIT_POINTERS)
- ret = si_insert_input_ptr(ctx, ret, 1, 1);
+ ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
ctx->param_bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
-#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
- 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
unsigned vgpr;
if (ctx->type == PIPE_SHADER_VERTEX)
vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
}
}
+static void declare_vs_blit_inputs(struct si_shader_context *ctx,
+ struct si_function_info *fninfo,
+ unsigned vs_blit_property)
+{
+ ctx->param_vs_blit_inputs = fninfo->num_params;
+ add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */
+ add_arg(fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* depth */
+
+ if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color0 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color1 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* color3 */
+ } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */
+ add_arg(fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
+ }
+}
+
static void declare_tes_input_vgprs(struct si_shader_context *ctx,
struct si_function_info *fninfo)
{
declare_global_desc_pointers(ctx, &fninfo);
if (vs_blit_property) {
- ctx->param_vs_blit_inputs = fninfo.num_params;
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x1, y1 */
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* i16 x2, y2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* depth */
-
- if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color0 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color1 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* color3 */
- } else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x1 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y1 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.x2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.y2 */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.z */
- add_arg(&fninfo, ARG_SGPR, ctx->f32); /* texcoord.w */
- }
+ declare_vs_blit_inputs(ctx, &fninfo, vs_blit_property);
/* VGPRs */
declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
/* Merged stages have 8 system SGPRs at the beginning. */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
- if (HAVE_32BIT_POINTERS) {
- declare_per_stage_desc_pointers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- } else {
- declare_const_and_shader_buffers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- }
+ declare_per_stage_desc_pointers(ctx, &fninfo,
+ ctx->type == PIPE_SHADER_TESS_CTRL);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->type == PIPE_SHADER_VERTEX);
declare_vs_specific_input_sgprs(ctx, &fninfo);
- if (!HAVE_32BIT_POINTERS) {
- declare_samplers_and_images(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- }
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
- if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->v4i32));
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
/* Merged stages have 8 system SGPRs at the beginning. */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
- if (HAVE_32BIT_POINTERS) {
- declare_per_stage_desc_pointers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- } else {
- declare_const_and_shader_buffers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- }
+ declare_per_stage_desc_pointers(ctx, &fninfo,
+ ctx->type == PIPE_SHADER_GEOMETRY);
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* Declare as many input SGPRs as the VS has. */
- if (!HAVE_32BIT_POINTERS)
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
}
- if (!HAVE_32BIT_POINTERS) {
- declare_samplers_and_images(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- }
if (ctx->type == PIPE_SHADER_VERTEX) {
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->v4i32));
!mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
- r600_resource_reference(&shader->bo, NULL);
+ si_resource_reference(&shader->bo, NULL);
shader->bo = si_aligned_buffer_create(&sscreen->b,
sscreen->cpdma_prefetch_writes_memory ?
0 : SI_RESOURCE_FLAG_READ_ONLY,
/* Upload. */
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
- PIPE_TRANSFER_UNSYNCHRONIZED);
+ PIPE_TRANSFER_UNSYNCHRONIZED |
+ RADEON_TRANSFER_TEMPORARY);
/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
* endian-independent. */
if (r != 0) {
FREE(shader);
shader = NULL;
+ } else {
+ si_fix_resource_usage(sscreen, shader);
}
return shader;
}
LLVMValueRef ptr[2], list;
bool merged_shader = is_merged_shader(ctx);
- if (HAVE_32BIT_POINTERS) {
- ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
- list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- return list;
- }
-
- /* Get the pointer to rw buffers. */
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
- ptr[1] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1);
- list = ac_build_gather_values(&ctx->ac, ptr, 2);
- list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
- list = LLVMBuildIntToPtr(ctx->ac.builder, list,
- ac_array_in_const_addr_space(ctx->v4i32), "");
+ list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
return list;
}
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
- if (!HAVE_32BIT_POINTERS)
- add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
void si_shader_destroy(struct si_shader *shader)
{
if (shader->scratch_bo)
- r600_resource_reference(&shader->scratch_bo, NULL);
+ si_resource_reference(&shader->scratch_bo, NULL);
- r600_resource_reference(&shader->bo, NULL);
+ si_resource_reference(&shader->bo, NULL);
if (!shader->is_binary_shared)
ac_shader_binary_clean(&shader->binary);