hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
if (device->physical_device->ge_wave_size == 32)
hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+ if (device->physical_device->use_aco)
+ hash_flags |= RADV_HASH_SHADER_ACO;
return hash_flags;
}
if (pipeline->device->physical_device->rad_info.chip_class < GFX8)
radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+ key.topology = pCreateInfo->pInputAssemblyState->topology;
+
return key;
}
keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
}
+ keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
if (nir[MESA_SHADER_TESS_CTRL]) {
keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
}
- if (device->physical_device->rad_info.chip_class >= GFX10 &&
- device->physical_device->rad_info.family != CHIP_NAVI14 &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) {
+ if (device->physical_device->use_ngg) {
if (nir[MESA_SHADER_TESS_CTRL]) {
keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
} else {
(cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
}
+static
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+{
+ return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+ stage == MESA_SHADER_FRAGMENT ||
+ stage == MESA_SHADER_COMPUTE;
+}
+
static
void radv_create_shaders(struct radv_pipeline *pipeline,
struct radv_device *device,
modules[MESA_SHADER_FRAGMENT] = &fs_m;
}
+ bool has_gs = modules[MESA_SHADER_GEOMETRY];
+ bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
+ bool use_aco = device->physical_device->use_aco;
+
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
const VkPipelineShaderStageCreateInfo *stage = pStages[i];
radv_start_feedback(stage_feedbacks[i]);
+ bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL,
- flags, pipeline->layout);
+ flags, pipeline->layout, aco);
/* We don't want to alter meta shaders IR directly so clone it
* first.
nir_lower_non_uniform_ssbo_access |
nir_lower_non_uniform_texture_access |
nir_lower_non_uniform_image_access);
- NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+
+ bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+ if (!aco)
+ NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
}
if (radv_can_dump_shader(device, modules[i], false))
if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
+ bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
pipeline->shaders[MESA_SHADER_FRAGMENT] =
radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
pipeline->layout, keys + MESA_SHADER_FRAGMENT,
infos + MESA_SHADER_FRAGMENT,
- keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
+ keep_executable_info, aco,
+ &binaries[MESA_SHADER_FRAGMENT]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
}
pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
pipeline->layout,
&key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
- &binaries[MESA_SHADER_TESS_CTRL]);
+ false, &binaries[MESA_SHADER_TESS_CTRL]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
}
pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
pipeline->layout,
&keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
- &binaries[MESA_SHADER_GEOMETRY]);
+ false, &binaries[MESA_SHADER_GEOMETRY]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
}
radv_start_feedback(stage_feedbacks[i]);
+ bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
pipeline->layout,
keys + i, infos + i,keep_executable_info,
- &binaries[i]);
+ aco, &binaries[i]);
radv_stop_feedback(stage_feedbacks[i], false);
}
color_bytes_per_pixel += vk_format_get_blocksize(format);
if (total_samples > 1) {
+ assert(samples_log <= 3);
const unsigned fmask_array[] = {0, 1, 1, 4};
fmask_bytes_per_pixel += fmask_array[samples_log];
}
const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
struct radv_render_pass_attachment *attachment = NULL;
uint32_t db_depth_control = 0, db_stencil_control = 0;
uint32_t db_render_control = 0, db_render_override2 = 0;
db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
- if (!pCreateInfo->pRasterizationState->depthClampEnable) {
+ if (!pCreateInfo->pRasterizationState->depthClampEnable &&
+ ps->info.ps.writes_z) {
/* From VK_EXT_depth_range_unrestricted spec:
*
* "The behavior described in Primitive Clipping still applies.
unsigned max_waves_per_sh = 0;
uint64_t va;
- pipeline->cs.buf = malloc(20 * 4);
- pipeline->cs.max_dw = 20;
+ pipeline->cs.max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 22 : 20;
+ pipeline->cs.buf = malloc(pipeline->cs.max_dw * 4);
compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
va = radv_buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
radeon_set_sh_reg_seq(&pipeline->cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
radeon_emit(&pipeline->cs, compute_shader->config.rsrc1);
radeon_emit(&pipeline->cs, compute_shader->config.rsrc2);
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_sh_reg(&pipeline->cs, R_00B8A0_COMPUTE_PGM_RSRC3, compute_shader->config.rsrc3);
+ }
radeon_set_sh_reg(&pipeline->cs, R_00B860_COMPUTE_TMPRING_SIZE,
S_00B860_WAVES(pipeline->max_waves) |
}
++p;
- /* LLVM IR */
+ /* backend IR */
if (p < end) {
p->isText = true;
- desc_copy(p->name, "LLVM IR");
- desc_copy(p->description, "The LLVM IR after some optimizations");
- if (radv_copy_representation(p->pData, &p->dataSize, shader->llvm_ir_string) != VK_SUCCESS)
+ if (shader->aco_used) {
+ desc_copy(p->name, "ACO IR");
+ desc_copy(p->description, "The ACO IR after some optimizations");
+ } else {
+ desc_copy(p->name, "LLVM IR");
+ desc_copy(p->description, "The LLVM IR after some optimizations");
+ }
+ if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
result = VK_INCOMPLETE;
}
++p;