print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
break;
case PKT3_SURFACE_SYNC:
- if (ib->chip_class >= CIK) {
+ if (ib->chip_class >= GFX7) {
ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
return 16;
default:
- fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
+ fprintf(stderr, "Invalid GFX7 pipe configuration, assuming P2\n");
assert(!"this should never occur");
return 2;
}
if (info->family >= CHIP_VEGA10)
info->chip_class = GFX9;
else if (info->family >= CHIP_TONGA)
- info->chip_class = VI;
+ info->chip_class = GFX8;
else if (info->family >= CHIP_BONAIRE)
- info->chip_class = CIK;
+ info->chip_class = GFX7;
else if (info->family >= CHIP_TAHITI)
- info->chip_class = SI;
+ info->chip_class = GFX6;
else {
fprintf(stderr, "amdgpu: Unknown family.\n");
return false;
info->has_gpu_reset_counter_query = false;
info->has_eqaa_surface_allocator = true;
info->has_format_bc1_through_bc7 = true;
- /* DRM 3.1.0 doesn't flush TC for VI correctly. */
- info->kernel_flushes_tc_l2_after_ib = info->chip_class != VI ||
+ /* DRM 3.1.0 doesn't flush TC for GFX8 correctly. */
+ info->kernel_flushes_tc_l2_after_ib = info->chip_class != GFX8 ||
info->drm_minor >= 2;
info->has_indirect_compute_dispatch = true;
- /* SI doesn't support unaligned loads. */
- info->has_unaligned_shader_loads = info->chip_class != SI;
- /* Disable sparse mappings on SI due to VM faults in CP DMA. Enable them once
+ /* GFX6 doesn't support unaligned loads. */
+ info->has_unaligned_shader_loads = info->chip_class != GFX6;
+ /* Disable sparse mappings on GFX6 due to VM faults in CP DMA. Enable them once
* these faults are mitigated in software.
* Disable sparse mappings on GFX9 due to hangs.
*/
info->has_sparse_vm_mappings =
- info->chip_class >= CIK && info->chip_class <= VI &&
+ info->chip_class >= GFX7 && info->chip_class <= GFX8 &&
info->drm_minor >= 13;
info->has_2d_tiling = true;
info->has_read_registers_query = true;
info->pte_fragment_size = alignment_info.size_local;
info->gart_page_size = alignment_info.size_remote;
- if (info->chip_class == SI)
+ if (info->chip_class == GFX6)
info->gfx_ib_pad_with_type2 = TRUE;
unsigned ib_align = 0;
assert(rb_per_pkr == 1 || rb_per_pkr == 2);
- if (info->chip_class >= CIK) {
+ if (info->chip_class >= GFX7) {
unsigned raster_config_1 = *cik_raster_config_1_p;
if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
(!se_mask[2] && !se_mask[3]))) {
static inline uint32_t
ac_get_num_physical_sgprs(enum chip_class chip_class)
{
- return chip_class >= VI ? 800 : 512;
+ return chip_class >= GFX8 ? 800 : 512;
}
#ifdef __cplusplus
* helper invocation which happens to fall on a different
* layer due to extrapolation."
*
- * VI and earlier attempt to implement this in hardware by
+ * GFX8 and earlier attempt to implement this in hardware by
* clamping the value of coords[2] = (8 * layer) + face.
* Unfortunately, this means that the we end up with the wrong
* face when clamping occurs.
*
* Clamp the layer earlier to work around the issue.
*/
- if (ctx->chip_class <= VI) {
+ if (ctx->chip_class <= GFX8) {
LLVMValueRef ge0;
ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
if (allow_smem && !slc &&
- (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= VI))) {
+ (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= GFX8))) {
assert(vindex == NULL);
LLVMValueRef result[8];
}
int log_recombine = 0;
- if (ctx->chip_class == SI && !known_aligned) {
+ if (ctx->chip_class == GFX6 && !known_aligned) {
/* Avoid alignment restrictions by loading one byte at a time. */
load_num_channels <<= load_log_size;
log_recombine = load_log_size;
}
if (log_recombine > 0) {
- /* Recombine bytes if necessary (SI only) */
+ /* Recombine bytes if necessary (GFX6 only) */
LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) {
}
/*
- * SI implements derivatives using the local data store (LDS)
+ * AMD GCN implements derivatives using the local data store (LDS)
* All writes to the LDS happen in all executing threads at
* the same time. TID is the Thread ID for the current
* thread and is a value between 0 and 63, representing
void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
{
- unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
+ unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS),
"lds");
* \param maxprefix specifies that the result only needs to be correct for a
* prefix of this many threads
*
- * TODO: add inclusive and excluse scan functions for SI chip class.
+ * TODO: add inclusive and excluse scan functions for GFX6.
*/
static LLVMValueRef
ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
result = ac_build_alu_op(ctx, result, swap, op);
if (cluster_size == 4) return ac_build_wwm(ctx, result);
- if (ctx->chip_class >= VI)
+ if (ctx->chip_class >= GFX8)
swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
else
swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
result = ac_build_alu_op(ctx, result, swap, op);
if (cluster_size == 8) return ac_build_wwm(ctx, result);
- if (ctx->chip_class >= VI)
+ if (ctx->chip_class >= GFX8)
swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
else
swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
result = ac_build_alu_op(ctx, result, swap, op);
if (cluster_size == 16) return ac_build_wwm(ctx, result);
- if (ctx->chip_class >= VI && cluster_size != 32)
+ if (ctx->chip_class >= GFX8 && cluster_size != 32)
swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
else
swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
result = ac_build_alu_op(ctx, result, swap, op);
if (cluster_size == 32) return ac_build_wwm(ctx, result);
- if (ctx->chip_class >= VI) {
+ if (ctx->chip_class >= GFX8) {
swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
result = ac_build_alu_op(ctx, result, swap, op);
result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
{
unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
- if (ctx->chip_class >= VI) {
+ if (ctx->chip_class >= GFX8) {
return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
} else {
return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
enum ac_image_dim dim = get_ac_sampler_dim(ctx, sdim, is_array);
if (dim == ac_image_cube ||
- (ctx->chip_class <= VI && dim == ac_image_3d))
+ (ctx->chip_class <= GFX8 && dim == ac_image_3d))
dim = ac_image_2darray;
return dim;
src0 = ac_to_float(ctx, src0);
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
- if (ctx->chip_class >= VI) {
+ if (ctx->chip_class >= GFX8) {
LLVMValueRef args[2];
/* Check if the result is a denormal - and flush to 0 if so. */
args[0] = result;
/* need to convert back up to f32 */
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
- if (ctx->chip_class >= VI)
+ if (ctx->chip_class >= GFX8)
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
else {
- /* for SI/CIK */
+ /* for GFX6-GFX7 */
/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
* so compare the result and flush to 0 if it's smaller.
*/
LLVMBuildExtractElement(ctx->ac.builder, descriptor,
LLVMConstInt(ctx->ac.i32, 2, false), "");
- /* VI only */
- if (ctx->ac.chip_class == VI && in_elements) {
- /* On VI, the descriptor contains the size in bytes,
+ /* GFX8 only */
+ if (ctx->ac.chip_class == GFX8 && in_elements) {
+ /* On GFX8, the descriptor contains the size in bytes,
* but TXQ must return the size in elements.
* The stride is always non-zero for resources using TXQ.
*/
break;
}
- if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
+ if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
const struct glsl_type *type = glsl_without_array(var->type);
{
unsigned cache_policy = 0;
- /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. All
+ /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
* store opcodes not aligned to a dword are affected. The only way to
* get unaligned stores is through shader images.
*/
- if (((may_store_unaligned && ctx->ac.chip_class == SI) ||
+ if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
/* If this is write-only, don't keep data in L1 to prevent
* evicting L1 cache lines that may be needed by other
* instructions.
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
{
- /* SI only (thanks to a hw bug workaround):
+ /* GFX6 only (thanks to a hw bug workaround):
* The real barrier instruction isn’t needed, because an entire patch
* always fits into a single wave.
*/
- if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) {
+ if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
return;
}
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
*
- * SI-CI:
+ * GFX6-GFX7:
* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
* filtering manually. The driver sets img7 to a mask clearing
* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
* s_and_b32 samp0, samp0, img7
*
- * VI:
+ * GFX8:
* The ANISO_OVERRIDE sampler field enables this fix in TA.
*/
static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef img7, samp0;
- if (ctx->ac.chip_class >= VI)
+ if (ctx->ac.chip_class >= GFX8)
return samp;
img7 = LLVMBuildExtractElement(builder, res,
* It's unnecessary if the original texture format was
* Z32_FLOAT, but we don't know that here.
*/
- if (args.compare && ctx->ac.chip_class >= VI && ctx->abi->clamp_shadow_reference)
+ if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->abi->clamp_shadow_reference)
args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));
/* pack derivatives */
* by the reality that LLVM 5.0 doesn't have working VGPR indexing
* on GFX9.
*/
- bool llvm_has_working_vgpr_indexing = chip_class <= VI;
+ bool llvm_has_working_vgpr_indexing = chip_class <= GFX8;
/* TODO: Indirect indexing of GS inputs is unimplemented.
*
LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
- /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
+ /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
* uses it due to promoting D16 to D32, but radv needs it off. */
bool clamp_shadow_reference;
return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
S_028A40_CUT_MODE(cut_mode)|
- S_028A40_ES_WRITE_OPTIMIZE(chip_class <= VI) |
+ S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) |
S_028A40_GS_WRITE_OPTIMIZE(1) |
S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
}
}
}
- /* SI (except OLAND and HAINAN) has a bug that it only looks
+ /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
* at the X writemask component. */
- if (ctx->chip_class == SI &&
+ if (ctx->chip_class == GFX6 &&
ctx->family != CHIP_OLAND &&
ctx->family != CHIP_HAINAN)
mask |= 0x1;
{
uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
- if (info->chip_class >= CIK)
+ if (info->chip_class >= GFX7)
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
else
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
}
/* Compute tile swizzle. */
- /* TODO: fix tile swizzle with mipmapping for SI */
- if ((info->chip_class >= CIK || config->info.levels == 1) &&
+ /* TODO: fix tile swizzle with mipmapping for GFX6 */
+ if ((info->chip_class >= GFX7 || config->info.levels == 1) &&
config->info.surf_index &&
surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
!(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
return;
- assert(info->chip_class <= VI);
+ assert(info->chip_class <= GFX8);
switch (num_pipes) {
case 2:
* driver team).
*/
AddrSurfInfoIn.flags.dccCompatible =
- info->chip_class >= VI &&
+ info->chip_class >= GFX8 &&
!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
!(surf->flags & RADEON_SURF_DISABLE_DCC) &&
!compressed &&
AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
- /* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit)
+ /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
* for Z and stencil. This can cause a number of problems which we work
* around here:
*
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
- if (info->chip_class == SI) {
+ if (info->chip_class == GFX6) {
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
if (surf->bpe == 2)
AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
}
} else {
- /* CIK - VI */
+ /* GFX7 - GFX8 */
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
else
uint32_t cmask_alignment;
union {
- /* R600-VI return values.
+ /* Return values for GFX8 and older.
*
* Some of them can be set by the caller if certain parameters are
* desirable. The allocator will try to obey them.
R700,
EVERGREEN,
CAYMAN,
- SI, /* GFX6 */
- CIK, /* GFX7 */
- VI, /* GFX8 */
+ GFX6,
+ GFX7,
+ GFX8,
GFX9,
};
bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
{
return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
- cmd_buffer->device->physical_device->rad_info.chip_class >= CIK;
+ cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
}
enum ring_type radv_queue_family_to_ring(int f) {
struct radv_image *image,
VkImageLayout layout)
{
- bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI;
+ bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8;
struct radv_color_buffer_info *cb = &att->cb;
uint32_t cb_color_info = cb->cb_color_info;
S_028208_BR_X(framebuffer->width) |
S_028208_BR_Y(framebuffer->height));
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
- uint8_t watermark = 4; /* Default value for VI. */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) {
+ uint8_t watermark = 4; /* Default value for GFX8. */
/* For optimal DCC performance. */
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
uint32_t db_count_control;
if(!cmd_buffer->state.active_occlusion_queries) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
pipeline->graphics.disable_out_of_order_rast_for_occlusion &&
has_perfect_queries) {
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
db_count_control =
S_028004_PERFECT_ZPASS_COUNTS(has_perfect_queries) |
S_028004_SAMPLE_RATE(sample_rate) |
va += offset + buffer->offset;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
- if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
+ if (cmd_buffer->device->physical_device->rad_info.chip_class <= GFX7 && stride)
desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1;
else
desc[2] = buffer->size - offset;
/* Set the descriptor.
*
- * On VI, the format must be non-INVALID, otherwise
+ * On GFX8, the format must be non-INVALID, otherwise
* the buffer will be considered not bound and store
* instructions will be no-ops.
*/
radeon_set_uconfig_reg_idx(cs,
R_030960_IA_MULTI_VGT_PARAM,
4, ia_multi_vgt_param);
- } else if (info->chip_class >= CIK) {
+ } else if (info->chip_class >= GFX7) {
radeon_set_context_reg_idx(cs,
R_028AA8_IA_MULTI_VGT_PARAM,
1, ia_multi_vgt_param);
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class == SI)
+ if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
si_emit_cache_flush(cmd_buffer);
}
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER)
radv_emit_index_buffer(cmd_buffer);
} else {
- /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
+ /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
* so the state must be re-emitted before the next indexed
* draw.
*/
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
cmd_buffer->state.last_index_type = -1;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
}
struct radeon_info *rad_info =
&cmd_buffer->device->physical_device->rad_info;
bool has_prefetch =
- cmd_buffer->device->physical_device->rad_info.chip_class >= CIK;
+ cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
bool pipeline_is_dirty =
(cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline;
cmd_buffer->cs, 4096);
if (likely(!info->indirect)) {
- /* SI-CI treat instance_count==0 as instance_count==1. There is
+ /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
* no workaround for indirect draws, but we can at least skip
* direct draws.
*/
{
struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
bool has_prefetch =
- cmd_buffer->device->physical_device->rad_info.chip_class >= CIK;
+ cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
bool pipeline_is_dirty = pipeline &&
pipeline != cmd_buffer->state.emitted_compute_pipeline;
unsigned reg_strmout_cntl;
/* The register is at different places on different ASICs. */
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
} else {
if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
counter_buffer_idx = -1;
- /* SI binds streamout buffers as shader resources.
+ /* AMD GCN binds streamout buffers as shader resources.
* VGT only counts primitives and tells the shader through
* SGPRs what to do.
*/
radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
- if (info->chip_class <= VI) {
+ if (info->chip_class <= GFX8) {
radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
if (i >= CHIP_VEGA10)
device->rad_info.chip_class = GFX9;
else if (i >= CHIP_TONGA)
- device->rad_info.chip_class = VI;
+ device->rad_info.chip_class = GFX8;
else if (i >= CHIP_BONAIRE)
- device->rad_info.chip_class = CIK;
+ device->rad_info.chip_class = GFX7;
else
- device->rad_info.chip_class = SI;
+ device->rad_info.chip_class = GFX6;
return;
}
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
- if (device->rad_info.chip_class < VI ||
+ if (device->rad_info.chip_class < GFX8 ||
device->rad_info.chip_class > GFX9)
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
}
/* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
- * on SI.
+ * on GFX6.
*/
- device->has_clear_state = device->rad_info.chip_class >= CIK;
+ device->has_clear_state = device->rad_info.chip_class >= GFX7;
- device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI;
+ device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= GFX8;
/* Vega10/Raven need a special workaround for a hardware bug. */
device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 ||
device->rad_info.family == CHIP_RAVEN;
/* Out-of-order primitive rasterization. */
- device->has_out_of_order_rast = device->rad_info.chip_class >= VI &&
+ device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 &&
device->rad_info.max_se >= 2;
device->out_of_order_rast_allowed = device->has_out_of_order_rast &&
!(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
device->dcc_msaa_allowed =
(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
- /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
+ /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 ||
- (device->rad_info.chip_class >= VI &&
+ (device->rad_info.chip_class >= GFX8 &&
device->rad_info.me_fw_feature >= 41);
radv_physical_device_init_mem_types(device);
.shaderTessellationAndGeometryPointSize = true,
.shaderImageGatherExtended = true,
.shaderStorageImageExtendedFormats = true,
- .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= VI,
+ .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= GFX8,
.shaderUniformBufferArrayDynamicIndexing = true,
.shaderSampledImageArrayDynamicIndexing = true,
.shaderStorageBufferArrayDynamicIndexing = true,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
VkPhysicalDevice16BitStorageFeatures *features =
(VkPhysicalDevice16BitStorageFeatures*)ext;
- bool enabled = pdevice->rad_info.chip_class >= VI;
+ bool enabled = pdevice->rad_info.chip_class >= GFX8;
features->storageBuffer16BitAccess = enabled;
features->uniformAndStorageBuffer16BitAccess = enabled;
features->storagePushConstant16 = enabled;
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
(VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
- features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK;
+ features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
VkPhysicalDevice8BitStorageFeaturesKHR *features =
(VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
- bool enabled = pdevice->rad_info.chip_class >= VI;
+ bool enabled = pdevice->rad_info.chip_class >= GFX8;
features->storageBuffer8BitAccess = enabled;
features->uniformAndStorageBuffer8BitAccess = enabled;
features->storagePushConstant8 = enabled;
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
(VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
- features->shaderFloat16 = pdevice->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800;
+ features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800;
features->shaderInt8 = true;
break;
}
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.sampledImageDepthSampleCounts = sample_counts,
.sampledImageStencilSampleCounts = sample_counts,
- .storageImageSampleCounts = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
+ .storageImageSampleCounts = pdevice->rad_info.chip_class >= GFX8 ? sample_counts : VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = true,
.timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT;
- if (pdevice->rad_info.chip_class >= VI) {
+ if (pdevice->rad_info.chip_class >= GFX8) {
properties->supportedOperations |=
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
properties->sgprsPerSimd =
ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
properties->minSgprAllocation =
- pdevice->rad_info.chip_class >= VI ? 16 : 8;
+ pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
properties->maxSgprAllocation =
pdevice->rad_info.family == CHIP_TONGA ||
pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104;
properties->sgprAllocationGranularity =
- pdevice->rad_info.chip_class >= VI ? 16 : 8;
+ pdevice->rad_info.chip_class >= GFX8 ? 16 : 8;
/* VGPR. */
properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS;
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
- if (device->physical_device->rad_info.chip_class >= CIK) {
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
/* If the KMD allows it (there is a KMD hw register for it),
* allow launching waves out-of-order.
*/
device->tess_offchip_block_dw_size =
device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
device->has_distributed_tess =
- device->physical_device->rad_info.chip_class >= VI &&
+ device->physical_device->rad_info.chip_class >= GFX8 &&
device->physical_device->rad_info.max_se >= 2;
if (getenv("RADV_TRACE_FILE")) {
device->ws->cs_finalize(device->empty_cs[family]);
}
- if (device->physical_device->rad_info.chip_class >= CIK)
+ if (device->physical_device->rad_info.chip_class >= GFX7)
cik_create_gfx_config(device);
VkPipelineCacheCreateInfo ci;
static unsigned
radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
{
- bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
+ bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
device->physical_device->rad_info.family != CHIP_CARRIZO &&
device->physical_device->rad_info.family != CHIP_STONEY;
unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
/*
* Per RadeonSI:
* This must be one less than the maximum number due to a hw limitation.
- * Various hardware bugs in SI, CIK, and GFX9 need this.
+ * Various hardware bugs need thGFX7
*
* Per AMDVLK:
* Vega10 should limit max_offchip_buffers to 508 (4 * 127).
* Follow AMDVLK here.
*/
if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
- device->physical_device->rad_info.chip_class == CIK ||
- device->physical_device->rad_info.chip_class == SI)
+ device->physical_device->rad_info.chip_class == GFX7 ||
+ device->physical_device->rad_info.chip_class == GFX6)
--max_offchip_buffers_per_se;
max_offchip_buffers = max_offchip_buffers_per_se *
}
switch (device->physical_device->rad_info.chip_class) {
- case SI:
+ case GFX6:
max_offchip_buffers = MIN2(max_offchip_buffers, 126);
break;
- case CIK:
- case VI:
+ case GFX7:
+ case GFX8:
case GFX9:
default:
max_offchip_buffers = MIN2(max_offchip_buffers, 508);
}
*max_offchip_buffers_p = max_offchip_buffers;
- if (device->physical_device->rad_info.chip_class >= CIK) {
- if (device->physical_device->rad_info.chip_class >= VI)
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
+ if (device->physical_device->rad_info.chip_class >= GFX8)
--max_offchip_buffers;
hs_offchip_param =
S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
if (gsvs_ring_bo)
radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
- if (queue->device->physical_device->rad_info.chip_class >= CIK) {
+ if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
radeon_emit(cs, esgs_ring_size >> 8);
radeon_emit(cs, gsvs_ring_size >> 8);
radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
- if (queue->device->physical_device->rad_info.chip_class >= CIK) {
+ if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
S_030938_SIZE(tf_ring_size / 4));
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
queue->device->physical_device->rad_info.chip_class,
NULL, 0,
queue->queue_family_index == RING_COMPUTE &&
- queue->device->physical_device->rad_info.chip_class >= CIK,
+ queue->device->physical_device->rad_info.chip_class >= GFX7,
(queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
queue->device->physical_device->rad_info.chip_class,
NULL, 0,
queue->queue_family_index == RING_COMPUTE &&
- queue->device->physical_device->rad_info.chip_class >= CIK,
+ queue->device->physical_device->rad_info.chip_class >= GFX7,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
if (radv_image_has_fmask(iview->image)) {
- if (device->physical_device->rad_info.chip_class >= CIK)
+ if (device->physical_device->rad_info.chip_class >= GFX7)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
} else {
/* This must be set for fast clear to work without FMASK. */
- if (device->physical_device->rad_info.chip_class >= CIK)
+ if (device->physical_device->rad_info.chip_class >= GFX7)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
S_028C70_ENDIAN(endian);
if (radv_image_has_fmask(iview->image)) {
cb->cb_color_info |= S_028C70_COMPRESSION(1);
- if (device->physical_device->rad_info.chip_class == SI) {
+ if (device->physical_device->rad_info.chip_class == GFX6) {
unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
}
/* This must be set for fast clear to work without FMASK. */
if (!radv_image_has_fmask(iview->image) &&
- device->physical_device->rad_info.chip_class == SI) {
+ device->physical_device->rad_info.chip_class == GFX6) {
unsigned bankh = util_logbase2(surf->u.legacy.bankh);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
}
if (iview->image->info.samples > 1)
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
- if (device->physical_device->rad_info.chip_class >= CIK) {
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
struct radeon_info *info = &device->physical_device->rad_info;
unsigned tiling_index = surf->u.legacy.tiling_index[level];
unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
{
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
- bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
+ bool is_vi = (device->physical_device->rad_info.chip_class >= GFX8);
unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction =
S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
S_008F38_MIP_POINT_PRECLAMP(0) |
- S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) |
+ S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
S_008F38_FILTER_PREC_FIX(1) |
S_008F38_ANISO_OVERRIDE(is_vi));
sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) |
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
Extension('VK_KHR_multiview', 1, True),
Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
- Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= VI'),
+ Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8'),
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
Extension('VK_EXT_buffer_device_address', 1, True),
Extension('VK_EXT_memory_priority', 1, True),
Extension('VK_EXT_pci_bus_info', 2, True),
Extension('VK_EXT_pipeline_creation_feedback', 1, True),
- Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
- Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'),
+ Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= GFX7'),
+ Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= GFX7'),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
Extension('VK_EXT_shader_stencil_export', 1, True),
Extension('VK_EXT_transform_feedback', 1, True),
Extension('VK_EXT_ycbcr_image_arrays', 1, True),
Extension('VK_AMD_draw_indirect_count', 1, True),
Extension('VK_AMD_gcn_shader', 1, True),
- Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'),
- Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= VI'),
+ Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800'),
+ Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= GFX8'),
Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'),
Extension('VK_AMD_shader_core_properties', 1, True),
Extension('VK_AMD_shader_info', 1, True),
Extension('VK_AMD_shader_trinary_minmax', 1, True),
Extension('VK_GOOGLE_decorate_string', 1, True),
Extension('VK_GOOGLE_hlsl_functionality1', 1, True),
- Extension('VK_NV_compute_shader_derivatives', 1, 'device->rad_info.chip_class >= VI'),
+ Extension('VK_NV_compute_shader_derivatives', 1, 'device->rad_info.chip_class >= GFX8'),
]
class VkVersion:
case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
- if (physical_device->rad_info.chip_class <= VI &&
+ if (physical_device->rad_info.chip_class <= GFX8 &&
physical_device->rad_info.family != CHIP_STONEY) {
buffer &= ~(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT |
VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT);
if (!vk_format_is_compressed(pCreateInfo->format) &&
!vk_format_is_depth_or_stencil(pCreateInfo->format)
- && device->physical_device->rad_info.chip_class <= VI) {
+ && device->physical_device->rad_info.chip_class <= GFX8) {
/* this causes hangs in some VK CTS tests on GFX9. */
/* Textures with a very small height are recommended to be linear. */
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
const VkImageCreateInfo *pCreateInfo)
{
/* TC-compat HTILE is only available for GFX8+. */
- if (device->physical_device->rad_info.chip_class < VI)
+ if (device->physical_device->rad_info.chip_class < GFX8)
return false;
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
bool blendable;
/* DCC (Delta Color Compression) is only available for GFX8+. */
- if (device->physical_device->rad_info.chip_class < VI)
+ if (device->physical_device->rad_info.chip_class < GFX8)
return false;
if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride);
- if (device->physical_device->rad_info.chip_class != VI && stride) {
+ if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
range /= stride;
}
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
- if (chip_class >= VI) {
+ if (chip_class >= GFX8) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
if (!is_storage_image && radv_dcc_enabled(image, first_level)) {
meta_va = gpu_address + image->dcc_offset;
- if (chip_class <= VI)
+ if (chip_class <= GFX8)
meta_va += base_level_info->dcc_offset;
} else if (!is_storage_image &&
radv_image_is_tc_compat_htile(image)) {
S_008F24_META_RB_ALIGNED(meta.rb_aligned);
}
} else {
- /* SI-CI-VI */
+ /* GFX6-GFX8 */
unsigned pitch = base_level_info->nblk_x * block_width;
unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
/* The last dword is unused by hw. The shader uses it to clear
* bits in the first dword of sampler state.
*/
- if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
+ if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
if (first_level == last_level)
state[7] = C_008F30_MAX_ANISO_RATIO;
else
memcpy(&md->metadata[2], desc, sizeof(desc));
/* Dwords [10:..] contain the mipmap level offsets. */
- if (device->physical_device->rad_info.chip_class <= VI) {
+ if (device->physical_device->rad_info.chip_class <= GFX8) {
for (i = 0; i <= image->info.levels - 1; i++)
md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8;
md->size_metadata = (11 + image->info.levels - 1) * 4;
*
* Test: dEQP-VK.tessellation.shader_input_output.barrier
*/
- if (ctx->options->chip_class >= CIK && ctx->options->family != CHIP_STONEY)
+ if (ctx->options->chip_class >= GFX7 && ctx->options->family != CHIP_STONEY)
hardware_lds_size = 65536;
num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
*/
num_patches = MIN2(num_patches, 40);
- /* SI bug workaround - limit LS-HS threadgroups to only one wave. */
- if (ctx->options->chip_class == SI) {
+ /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
+ if (ctx->options->chip_class == GFX6) {
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
num_patches = MIN2(num_patches, one_wave);
}
LLVMConstInt(ctx->ac.i32, 4 * stride, false), "");
unsigned tf_offset = 0;
- if (ctx->options->chip_class <= VI) {
+ if (ctx->options->chip_class <= GFX8) {
ac_nir_build_if(&inner_if_ctx, ctx,
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
rel_patch_id, ctx->ac.i32_0, ""));
static void
ac_setup_rings(struct radv_shader_context *ctx)
{
- if (ctx->options->chip_class <= VI &&
+ if (ctx->options->chip_class <= GFX8 &&
(ctx->stage == MESA_SHADER_GEOMETRY ||
ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) {
unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS
stride = 4 * num_components * ctx->gs_max_out_vertices;
- /* Limit on the stride field for <= CIK. */
+ /* Limit on the stride field for <= GFX7. */
assert(stride < (1 << 14));
ring = LLVMBuildBitCast(ctx->ac.builder,
{
switch (nir->info.stage) {
case MESA_SHADER_TESS_CTRL:
- return chip_class >= CIK ? 128 : 64;
+ return chip_class >= GFX7 ? 128 : 64;
case MESA_SHADER_GEOMETRY:
return chip_class >= GFX9 ? 128 : 64;
case MESA_SHADER_COMPUTE:
* - Floating-point output modifiers would be ignored by the hw.
* - Some opcodes don't support denormals, such as v_mad_f32. We would
* have to stop using those.
- * - SI & CI would be very slow.
+ * - GFX6 & GFX7 would be very slow.
*/
config->float_mode |= V_00B028_FP_64_DENORMS;
}
unsigned num_se = device->physical_device->rad_info.max_se;
unsigned wave_size = 64;
unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
- /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
- * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+ /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+ * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
*/
unsigned gs_vertex_reuse =
- (device->physical_device->rad_info.chip_class >= VI ? 32 : 16) * num_se;
+ (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
unsigned alignment = 256 * num_se;
/* The maximum size is 63.999 MB per SE. */
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
esgs_ring_size = align(esgs_ring_size, alignment);
gsvs_ring_size = align(gsvs_ring_size, alignment);
- if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
lds_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.lds_size;
- if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
assert(lds_size <= 65536);
lds_size = align(lds_size, 512) / 512;
} else {
key.vertex_attribute_offsets[location] = desc->offset;
key.vertex_attribute_strides[location] = radv_get_attrib_stride(input_state, desc->binding);
- if (pipeline->device->physical_device->rad_info.chip_class <= VI &&
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 &&
pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
uint64_t adjust;
}
key.col_format = blend->spi_shader_col_format;
- if (pipeline->device->physical_device->rad_info.chip_class < VI)
+ if (pipeline->device->physical_device->rad_info.chip_class < GFX8)
radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10);
return key;
* if no sample lies on the pixel boundary (-8 sample offset). It's
* currently always TRUE because the driver doesn't support 16 samples.
*/
- bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK;
+ bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
cull_dist_mask << 8 |
clip_dist_mask);
- if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF,
outinfo->writes_viewport_index);
}
radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size);
- if (pipeline->device->physical_device->rad_info.chip_class == CIK &&
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
tess->tf_param);
- if (pipeline->device->physical_device->rad_info.chip_class >= CIK)
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX7)
radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2,
tess->ls_hs_config);
else
radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline));
- if (pipeline->device->physical_device->rad_info.chip_class >= CIK) {
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
} else {
radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
/* GS requirement. */
ia_multi_vgt_param.partial_es_wave = false;
- if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= VI)
+ if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
ia_multi_vgt_param.partial_es_wave = true;
ia_multi_vgt_param.wd_switch_on_eop = false;
- if (device->physical_device->rad_info.chip_class >= CIK) {
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
* The other cases are hardware requirements. */
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
if (device->has_distributed_tess) {
if (radv_pipeline_has_gs(pipeline)) {
- if (device->physical_device->rad_info.chip_class <= VI)
+ if (device->physical_device->rad_info.chip_class <= GFX8)
ia_multi_vgt_param.partial_es_wave = true;
} else {
ia_multi_vgt_param.partial_vs_wave = true;
ia_multi_vgt_param.base =
S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
- S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == VI ? 2 : 0) |
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
compute_resource_limits =
S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
- if (device->physical_device->rad_info.chip_class >= CIK) {
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
unsigned num_cu_per_se =
device->physical_device->rad_info.num_good_compute_units /
device->physical_device->rad_info.max_se;
float sample_locations_8x[8][2];
float sample_locations_16x[16][2];
- /* CIK and later */
+ /* GFX7 and later */
uint32_t gfx_init_size_dw;
struct radeon_winsys_bo *gfx_init;
struct _mesa_string_buffer *buf)
{
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
- unsigned lds_increment = chip_class >= CIK ? 512 : 256;
+ unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
struct ac_shader_config *conf;
unsigned max_simd_waves;
unsigned lds_per_wave = 0;
if (!pInfo) {
*pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
} else {
- unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
+ unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256;
struct ac_shader_config *conf = &variant->config;
VkShaderStatisticsInfoAMD statistics = {};
* IN THE SOFTWARE.
*/
-/* command buffer handling for SI */
+/* command buffer handling for AMD GCN */
#include "radv_private.h"
#include "radv_shader.h"
raster_config_se);
for (se = 0; se < num_se; se++) {
- /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
- if (physical_device->rad_info.chip_class < CIK)
+ /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
+ if (physical_device->rad_info.chip_class < GFX7)
radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
S_00802C_SE_INDEX(se) |
S_00802C_SH_BROADCAST_WRITES(1) |
radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
}
- /* GRBM_GFX_INDEX has a different offset on SI and CI+ */
- if (physical_device->rad_info.chip_class < CIK)
+ /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
+ if (physical_device->rad_info.chip_class < GFX7)
radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
S_00802C_SE_BROADCAST_WRITES(1) |
S_00802C_SH_BROADCAST_WRITES(1) |
S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1));
- if (physical_device->rad_info.chip_class >= CIK)
+ if (physical_device->rad_info.chip_class >= GFX7)
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
}
radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff));
- if (physical_device->rad_info.chip_class >= CIK) {
+ if (physical_device->rad_info.chip_class >= GFX7) {
/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
radeon_set_sh_reg_seq(cs,
R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
* kernel if we want to use something other than the default value,
* which is now 0x22f.
*/
- if (physical_device->rad_info.chip_class <= SI) {
+ if (physical_device->rad_info.chip_class <= GFX6) {
/* XXX: This should be:
* (number of compute units) * 4 * (waves per simd) - 1 */
if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
raster_config);
- if (physical_device->rad_info.chip_class >= CIK)
+ if (physical_device->rad_info.chip_class >= GFX7)
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
raster_config_1);
} else {
{
int i;
- /* Only SI can disable CLEAR_STATE for now. */
+ /* Only GFX6 can disable CLEAR_STATE for now. */
assert(physical_device->has_clear_state ||
- physical_device->rad_info.chip_class == SI);
+ physical_device->rad_info.chip_class == GFX6);
radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1));
radeon_emit(cs, 0);
}
- if (physical_device->rad_info.chip_class <= VI)
+ if (physical_device->rad_info.chip_class <= GFX8)
si_set_raster_config(physical_device, cs);
radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
- if (physical_device->rad_info.chip_class <= VI) {
+ if (physical_device->rad_info.chip_class <= GFX8) {
radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
}
radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
if (!physical_device->has_clear_state)
radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
- if (physical_device->rad_info.chip_class < CIK)
+ if (physical_device->rad_info.chip_class < GFX7)
radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
S_008A14_CLIP_VTX_REORDER_ENA(1));
/* CLEAR_STATE doesn't clear these correctly on certain generations.
* I don't know why. Deduced by trial and error.
*/
- if (physical_device->rad_info.chip_class <= CIK) {
+ if (physical_device->rad_info.chip_class <= GFX7) {
radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
S_028204_WINDOW_OFFSET_DISABLE(1));
if (!physical_device->has_clear_state) {
radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
- /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
+ /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */
radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
}
- if (physical_device->rad_info.chip_class >= CIK) {
+ if (physical_device->rad_info.chip_class >= GFX7) {
if (physical_device->rad_info.chip_class >= GFX9) {
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
- if (physical_device->rad_info.chip_class >= VI) {
+ if (physical_device->rad_info.chip_class >= GFX8) {
uint32_t vgt_tess_distribution;
vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi;
partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave;
- if (chip_class >= CIK) {
+ if (chip_class >= GFX7) {
wd_switch_on_eop = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.wd_switch_on_eop;
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
* Assume indirect draws always use small instances.
* This is needed for good VS wave utilization.
*/
- if (chip_class <= VI &&
+ if (chip_class <= GFX8 &&
info->max_se == 4 &&
multi_instances_smaller_than_primgroup)
wd_switch_on_eop = true;
- /* Required on CIK and later. */
+ /* Required on GFX7 and later. */
if (info->max_se > 2 && !wd_switch_on_eop)
ia_switch_on_eoi = true;
- /* Required by Hawaii and, for some special cases, by VI. */
+ /* Required by Hawaii and, for some special cases, by GFX8. */
if (ia_switch_on_eoi &&
(family == CHIP_HAWAII ||
- (chip_class == VI &&
+ (chip_class == GFX8 &&
/* max primgroup in wave is always 2 - leave this for documentation */
(radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
partial_vs_wave = true;
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
- if (chip_class <= VI && ia_switch_on_eoi)
+ if (chip_class <= GFX8 && ia_switch_on_eoi)
partial_es_wave = true;
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
- S_028AA8_WD_SWITCH_ON_EOP(chip_class >= CIK ? wd_switch_on_eop : 0);
+ S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0);
}
if (!is_gfx8_mec)
radeon_emit(cs, 0); /* unused */
} else {
- if (chip_class == CIK ||
- chip_class == VI) {
+ if (chip_class == GFX7 ||
+ chip_class == GFX8) {
/* Two EOP events are required to make all engines go idle
* (and optional cache flushes executed) before the timestamp
* is written.
if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
- if (chip_class <= VI) {
+ if (chip_class <= GFX8) {
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
/* Necessary for DCC */
- if (chip_class >= VI) {
+ if (chip_class >= GFX8) {
si_cs_emit_write_event_eop(cs,
chip_class,
is_mec,
}
if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
- (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
+ (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1) |
- S_0301F0_TC_WB_ACTION_ENA(chip_class >= VI));
+ S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
cp_coher_cntl = 0;
} else {
if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
else if (flags & CP_DMA_USE_L2)
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating));
radeon_emit(cs, header);
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
{
- if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
+ if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7)
return;
if (!cmd_buffer->state.dma_is_busy)
uint32_t pad_word = 0xffff1000U;
bool emit_signal_sem = sem_info->cs_emit_signal;
- if (radv_amdgpu_winsys(ws)->info.chip_class == SI)
+ if (radv_amdgpu_winsys(ws)->info.chip_class == GFX6)
pad_word = 0x80000000;
assert(cs_count);
ws->info.num_sdma_rings = MIN2(ws->info.num_sdma_rings, MAX_RINGS_PER_TYPE);
ws->info.num_compute_rings = MIN2(ws->info.num_compute_rings, MAX_RINGS_PER_TYPE);
- ws->use_ib_bos = ws->info.chip_class >= CIK;
+ ws->use_ib_bos = ws->info.chip_class >= GFX7;
return true;
}
templ.bind |= new_bind_flag;
/* r600g doesn't react to dirty_tex_descriptor_counter */
- if (rctx->chip_class < SI)
+ if (rctx->chip_class < GFX6)
return;
if (rtex->resource.b.is_shared)
const struct pipe_box *box)
{
/* r600g doesn't react to dirty_tex_descriptor_counter */
- return rscreen->chip_class >= SI &&
+ return rscreen->chip_class >= GFX6 &&
!rtex->resource.b.is_shared &&
!(transfer_usage & PIPE_TRANSFER_READ) &&
rtex->resource.b.b.last_level == 0 &&
#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
#define PKT3_SET_SH_REG 0x76 /* SI and later */
-#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */
+#define PKT3_SET_UCONFIG_REG 0x79 /* GFX7 and later */
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */
copy_width <= (1 << 14) &&
copy_height <= (1 << 14) &&
copy_depth <= (1 << 11) &&
- /* HW limitation - CIK: */
- (sctx->chip_class != CIK ||
+ /* HW limitation - GFX7: */
+ (sctx->chip_class != GFX7 ||
(copy_width < (1 << 14) &&
copy_height < (1 << 14) &&
copy_depth < (1 << 11))) &&
- /* HW limitation - some CIK parts: */
+ /* HW limitation - some GFX7 parts: */
((sctx->family != CHIP_BONAIRE &&
sctx->family != CHIP_KAVERI) ||
(srcx + copy_width != (1 << 14) &&
radeon_emit(cs, dstx | (dsty << 16));
radeon_emit(cs, dstz | ((dst_pitch - 1) << 16));
radeon_emit(cs, dst_slice_pitch - 1);
- if (sctx->chip_class == CIK) {
+ if (sctx->chip_class == GFX7) {
radeon_emit(cs, copy_width | (copy_height << 16));
radeon_emit(cs, copy_depth);
} else {
bpp == 16)
return false;
- if (sctx->chip_class == CIK &&
+ if (sctx->chip_class == GFX7 &&
(copy_width_aligned == (1 << 14) ||
copy_height == (1 << 14) ||
copy_depth == (1 << 11)))
radeon_emit(cs, linear_x | (linear_y << 16));
radeon_emit(cs, linear_z | ((linear_pitch - 1) << 16));
radeon_emit(cs, linear_slice_pitch - 1);
- if (sctx->chip_class == CIK) {
+ if (sctx->chip_class == GFX7) {
radeon_emit(cs, copy_width_aligned | (copy_height << 16));
radeon_emit(cs, copy_depth);
} else {
dsty % 8 == 0 &&
srcx % 8 == 0 &&
srcy % 8 == 0 &&
- /* this can either be equal, or display->rotated (VI+ only) */
+ /* this can either be equal, or display->rotated (GFX8+ only) */
(src_micro_mode == dst_micro_mode ||
- (sctx->chip_class >= VI &&
+ (sctx->chip_class >= GFX8 &&
src_micro_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING &&
dst_micro_mode == V_009910_ADDR_SURF_ROTATED_MICRO_TILING))) {
assert(src_pitch % 8 == 0);
copy_depth <= (1 << 11) &&
copy_width_aligned % 8 == 0 &&
copy_height_aligned % 8 == 0 &&
- /* HW limitation - CIK: */
- (sctx->chip_class != CIK ||
+ /* HW limitation - GFX7: */
+ (sctx->chip_class != GFX7 ||
(copy_width_aligned < (1 << 14) &&
copy_height_aligned < (1 << 14) &&
copy_depth < (1 << 11))) &&
- /* HW limitation - some CIK parts: */
+ /* HW limitation - some GFX7 parts: */
((sctx->family != CHIP_BONAIRE &&
sctx->family != CHIP_KAVERI &&
sctx->family != CHIP_KABINI &&
radeon_emit(cs, dstz | (dst_pitch_tile_max << 16));
radeon_emit(cs, dst_slice_tile_max);
radeon_emit(cs, encode_tile_info(sctx, sdst, dst_level, false));
- if (sctx->chip_class == CIK) {
+ if (sctx->chip_class == GFX7) {
radeon_emit(cs, copy_width_aligned |
(copy_height_aligned << 16));
radeon_emit(cs, copy_depth);
return;
}
- if ((sctx->chip_class == CIK || sctx->chip_class == VI) &&
+ if ((sctx->chip_class == GFX7 || sctx->chip_class == GFX8) &&
cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
src, src_level, src_box))
return;
goto resolve_to_temp;
/* This can happen with mipmapping. */
- if (sctx->chip_class == VI &&
+ if (sctx->chip_class == GFX8 &&
!dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size)
goto resolve_to_temp;
assert(!"unexpected micro mode");
return;
}
- } else if (sscreen->info.chip_class >= CIK) {
+ } else if (sscreen->info.chip_class >= GFX7) {
/* These magic numbers were copied from addrlib. It doesn't use
* any definitions for them either. They are all 2D_TILED_THIN1
* modes with different bpp and micro tile mode.
assert(!"unexpected micro mode");
return;
}
- } else { /* SI */
+ } else { /* GFX6 */
switch (tex->last_msaa_resolve_target_micro_mode) {
case RADEON_MICRO_MODE_DISPLAY:
switch (tex->surface.bpe) {
!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
continue;
- if (sctx->chip_class <= VI &&
+ if (sctx->chip_class <= GFX8 &&
tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
!sctx->screen->info.htile_cmask_support_1d_tiling)
continue;
continue;
/* This can happen with mipmapping or MSAA. */
- if (sctx->chip_class == VI &&
+ if (sctx->chip_class == GFX8 &&
!tex->surface.u.legacy.level[level].dcc_fast_clear_size)
continue;
radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff));
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
radeon_set_sh_reg_seq(cs,
R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
* kernel if we want to use something other than the default value,
* which is now 0x22f.
*/
- if (sctx->chip_class <= SI) {
+ if (sctx->chip_class <= GFX6) {
/* XXX: This should be:
* (number of compute units) * 4 * (waves per simd) - 1 */
/* Set the pointer to border colors. */
bc_va = sctx->border_color_buffer->gpu_address;
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2);
radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */
radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */
}
lds_blocks = config->lds_size;
- /* XXX: We are over allocating LDS. For SI, the shader reports
+ /* XXX: We are over allocating LDS. For GFX6, the shader reports
* LDS in blocks of 256 bytes, so if there are 4 bytes lds
* allocated in the shader and 4 bytes allocated by the state
* tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
*/
- if (sctx->chip_class <= SI) {
+ if (sctx->chip_class <= GFX6) {
lds_blocks += align(program->local_size, 256) >> 8;
} else {
lds_blocks += align(program->local_size, 512) >> 9;
* command. However, that would add more complexity and we're likely
* to get a shader state change in that case anyway.
*/
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b,
0, program->shader.bo->b.b.width0);
}
} else {
scratch_dword3 |= S_008F0C_ELEMENT_SIZE(max_private_element_size);
- if (sctx->chip_class < VI) {
+ if (sctx->chip_class < GFX8) {
/* BUF_DATA_FORMAT is ignored, but it cannot be
* BUF_DATA_FORMAT_INVALID. */
scratch_dword3 |=
unsigned compute_resource_limits =
S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
unsigned num_cu_per_se = sscreen->info.num_good_compute_units /
sscreen->info.max_se;
compute_resource_limits |= S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh);
} else {
- /* SI */
+ /* GFX6 */
if (sctx->cs_max_waves_per_sh) {
unsigned limit_div16 = DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16);
compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16);
S_00B800_FORCE_START_AT_000(1) |
/* If the KMD allows it (there is a KMD hw register for it),
* allow launching waves out-of-order. (same as Vulkan) */
- S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
+ S_00B800_ORDER_MODE(sctx->chip_class >= GFX7);
const uint *last_block = info->last_block;
bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
* compute isn't used, i.e. only one compute job can run at a time.
* If async compute is possible, the threadgroup size must be limited
* to 256 threads on all queues to avoid the bug.
- * Only SI and certain CIK chips are affected.
+ * Only GFX6 and certain GFX7 chips are affected.
*/
bool cs_regalloc_hang =
- (sctx->chip_class == SI ||
+ (sctx->chip_class == GFX6 ||
sctx->family == CHIP_BONAIRE ||
sctx->family == CHIP_KABINI) &&
info->block[0] * info->block[1] * info->block[2] > 256;
si_context_add_resource_size(sctx, info->indirect);
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
- if (sctx->chip_class <= VI &&
+ if (sctx->chip_class <= GFX8 &&
si_resource(info->indirect)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
si_resource(info->indirect)->TC_L2_dirty = false;
{
if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META ||
coher == SI_COHERENCY_CP)) ||
- (sctx->chip_class >= CIK && coher == SI_COHERENCY_SHADER))
+ (sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER))
return size <= 256 * 1024 ? L2_LRU : L2_STREAM;
return L2_BYPASS;
(!force_cpdma &&
clear_value_size == 4 &&
offset % 4 == 0 &&
- (size > 32*1024 || sctx->chip_class <= VI))) {
+ (size > 32*1024 || sctx->chip_class <= GFX8))) {
si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0,
aligned_size, clear_value,
clear_value_size, coher);
ctx->launch_grid(ctx, &info);
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
- (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
+ (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
ctx->launch_grid(ctx, &info);
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
- (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
+ (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) |
si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image);
uint32_t header = 0, command = 0;
assert(size <= cp_dma_max_byte_count(sctx));
- assert(sctx->chip_class != SI || cache_policy == L2_BYPASS);
+ assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS);
if (sctx->chip_class >= GFX9)
command |= S_414_BYTE_COUNT_GFX9(size);
/* GDS increments the address, not CP. */
command |= S_414_DAS(V_414_REGISTER) |
S_414_DAIC(V_414_NO_INCREMENT);
- } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+ } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) |
S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM);
}
/* Both of these are required for GDS. It does increment the address. */
command |= S_414_SAS(V_414_REGISTER) |
S_414_SAIC(V_414_NO_INCREMENT);
- } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) {
+ } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) {
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM);
}
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, header);
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
uint64_t offset, unsigned size)
{
- assert(sctx->chip_class >= CIK);
+ assert(sctx->chip_class >= GFX7);
si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size,
SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU);
}
}
} else {
- /* SI-CI-VI */
+ /* GFX6-GFX8 */
/* Choose the right spot for the VBO prefetch. */
if (sctx->tes_shader.cso) {
if (mask & SI_PREFETCH_LS)
assert(offset % 4 == 0);
assert(size % 4 == 0);
- if (sctx->chip_class == SI && dst_sel == V_370_MEM)
+ if (sctx->chip_class == GFX6 && dst_sel == V_370_MEM)
dst_sel = V_370_MEM_GRBM;
radeon_add_to_buffer_list(sctx, cs, buf,
si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
base_level_info->mode == RADEON_SURF_MODE_2D)
state[0] |= tex->surface.tile_swizzle;
- if (sscreen->info.chip_class >= VI) {
+ if (sscreen->info.chip_class >= GFX8) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
meta_va = (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) +
tex->dcc_offset;
- if (sscreen->info.chip_class == VI) {
+ if (sscreen->info.chip_class == GFX8) {
meta_va += base_level_info->dcc_offset;
assert(base_level_info->mode == RADEON_SURF_MODE_2D);
}
S_008F24_META_RB_ALIGNED(meta.rb_aligned);
}
} else {
- /* SI-CI-VI */
+ /* GFX6-GFX8 */
unsigned pitch = base_level_info->nblk_x * block_width;
unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
uint64_t va = buf->gpu_address + offset;
int64_t num_records = (int64_t)buf->b.b.width0 - offset;
- if (sctx->chip_class != VI && vb->stride) {
+ if (sctx->chip_class != GFX8 && vb->stride) {
/* Round up by rounding down and adding 1 */
num_records = (num_records - velems->format_size[i]) /
vb->stride + 1;
assert(slot < descs->num_elements);
pipe_resource_reference(&buffers->buffers[slot], NULL);
- /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
+ /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
* with a NULL buffer). We need to use a dummy buffer instead. */
- if (sctx->chip_class == CIK &&
+ if (sctx->chip_class == GFX7 &&
(!input || (!input->buffer && !input->user_buffer)))
input = &sctx->null_const_buf;
break;
}
- if (sctx->chip_class >= VI && stride)
+ if (sctx->chip_class >= GFX8 && stride)
num_records *= stride;
/* Set the descriptor. */
struct radeon_cmdbuf *cs = sctx->dma_cs;
/* NOP waits for idle. */
- if (sctx->chip_class >= CIK)
+ if (sctx->chip_class >= GFX7)
radeon_emit(cs, 0x00000000); /* NOP */
else
radeon_emit(cs, 0xf0000000); /* NOP */
struct radeon_cmdbuf *cs = sctx->dma_cs;
uint64_t va = dst->gpu_address + offset;
- if (sctx->chip_class == SI) {
+ if (sctx->chip_class == GFX6) {
unreachable("SI DMA doesn't support the timestamp packet.");
return;
}
offset += sdst->gpu_address;
- if (sctx->chip_class == SI) {
+ if (sctx->chip_class == GFX6) {
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE);
si_need_dma_space(sctx, ncopy * 4, sdst, NULL);
return;
}
- /* The following code is for CI, VI, Vega/Raven, etc. */
+ /* The following code is for Sea Islands and later. */
/* the same maximum size as for copying */
ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE);
si_need_dma_space(sctx, ncopy * 5, sdst, NULL);
radeon_emit(cs, 0); /* immediate data hi */
radeon_emit(cs, 0); /* unused */
} else {
- if (ctx->chip_class == CIK ||
- ctx->chip_class == VI) {
+ if (ctx->chip_class == GFX7 ||
+ ctx->chip_class == GFX8) {
struct si_resource *scratch = ctx->eop_bug_scratch;
uint64_t va = scratch->gpu_address;
{
unsigned dwords = 6;
- if (screen->info.chip_class == CIK ||
- screen->info.chip_class == VI)
+ if (screen->info.chip_class == GFX7 ||
+ screen->info.chip_class == GFX8)
dwords *= 2;
return dwords;
return 32;
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
- return sscreen->info.chip_class <= VI ?
+ return sscreen->info.chip_class <= GFX8 ?
PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
/* Stream output. */
wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_GLOBAL_L2;
- } else if (ctx->chip_class == SI) {
+ } else if (ctx->chip_class == GFX6) {
/* The kernel flushes L2 before shaders are finished. */
wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH;
/* Make sure CP DMA is idle at the end of IBs after L2 prefetches
* because the kernel doesn't wait for it. */
- if (ctx->chip_class >= CIK)
+ if (ctx->chip_class >= GFX7)
si_cp_dma_wait_for_idle(ctx);
/* Wait for draw calls to finish if needed. */
ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff;
ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From VI */
+ ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */
/* Set all saved registers state to saved. */
ctx->tracked_regs.reg_saved = 0xffffffffffffffff;
UPDATE_COUNTER(gui, GUI_ACTIVE);
gui_busy = GUI_ACTIVE(value);
- if (sscreen->info.chip_class == CIK || sscreen->info.chip_class == VI) {
+ if (sscreen->info.chip_class == GFX7 || sscreen->info.chip_class == GFX8) {
/* SRBM_STATUS2 */
sscreen->ws->read_registers(sscreen->ws, SRBM_STATUS2, 1, &value);
sdma_busy = SDMA_BUSY(value);
}
- if (sscreen->info.chip_class >= VI) {
+ if (sscreen->info.chip_class >= GFX8) {
/* CP_STAT */
sscreen->ws->read_registers(sscreen->ws, CP_STAT, 1, &value);
unsigned i;
switch (screen->info.chip_class) {
- case CIK:
+ case GFX7:
blocks = groups_CIK;
num_blocks = ARRAY_SIZE(groups_CIK);
break;
- case VI:
+ case GFX8:
blocks = groups_VI;
num_blocks = ARRAY_SIZE(groups_VI);
break;
blocks = groups_gfx9;
num_blocks = ARRAY_SIZE(groups_gfx9);
break;
- case SI:
+ case GFX6:
default:
return; /* not implemented */
}
if (screen->info.max_sh_per_se != 1) {
- /* This should not happen on non-SI chips. */
+ /* This should not happen on non-GFX6 chips. */
fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not "
"supported (inaccurate performance counters)\n",
screen->info.max_sh_per_se);
/* Only create the less-optimizing version of the compiler on APUs
* predating Ryzen (Raven). */
bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram &&
- sscreen->info.chip_class <= VI;
+ sscreen->info.chip_class <= GFX8;
enum ac_target_machine_options tm_options =
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
if (!sctx)
return NULL;
- sctx->has_graphics = sscreen->info.chip_class == SI ||
+ sctx->has_graphics = sscreen->info.chip_class == GFX6 ||
!(flags & PIPE_CONTEXT_COMPUTE_ONLY);
if (flags & PIPE_CONTEXT_DEBUG)
}
- if (sctx->chip_class == CIK ||
- sctx->chip_class == VI ||
+ if (sctx->chip_class == GFX7 ||
+ sctx->chip_class == GFX8 ||
sctx->chip_class == GFX9) {
sctx->eop_bug_scratch = si_resource(
pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
}
/* Initialize SDMA functions. */
- if (sctx->chip_class >= CIK)
+ if (sctx->chip_class >= GFX7)
cik_init_sdma_functions(sctx);
else
si_init_dma_functions(sctx);
V_370_MEM, V_370_ME, &sctx->wait_mem_number);
}
- /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
+ /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
* if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
- if (sctx->chip_class == CIK) {
+ if (sctx->chip_class == GFX7) {
sctx->null_const_buf.buffer =
pipe_aligned_buffer_create(screen,
SI_RESOURCE_FLAG_32BIT,
/* this must be last */
si_begin_new_gfx_cs(sctx);
- if (sctx->chip_class == CIK) {
+ if (sctx->chip_class == GFX7) {
/* Clear the NULL constant buffer, because loads should return zeros.
* Note that this forces CP DMA to be used, because clover deadlocks
* for some reason when the compute codepath is used.
si_init_perfcounters(sscreen);
/* Determine tessellation ring info. */
- bool double_offchip_buffers = sscreen->info.chip_class >= CIK &&
+ bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 &&
sscreen->info.family != CHIP_CARRIZO &&
sscreen->info.family != CHIP_STONEY;
/* This must be one less than the maximum number due to a hw limitation.
- * Various hardware bugs in SI, CIK, and GFX9 need this.
+ * Various hardware bugs need this.
*/
unsigned max_offchip_buffers_per_se;
sscreen->tess_offchip_ring_size = max_offchip_buffers *
sscreen->tess_offchip_block_dw_size * 4;
- if (sscreen->info.chip_class >= CIK) {
- if (sscreen->info.chip_class >= VI)
+ if (sscreen->info.chip_class >= GFX7) {
+ if (sscreen->info.chip_class >= GFX8)
--max_offchip_buffers;
sscreen->vgt_hs_offchip_param =
S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
}
/* The mere presense of CLEAR_STATE in the IB causes random GPU hangs
- * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
- * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/
- sscreen->has_clear_state = sscreen->info.chip_class >= CIK &&
+ * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
+ * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.*/
+ sscreen->has_clear_state = sscreen->info.chip_class >= GFX7 &&
sscreen->info.drm_major == 3;
sscreen->has_distributed_tess =
- sscreen->info.chip_class >= VI &&
+ sscreen->info.chip_class >= GFX8 &&
sscreen->info.max_se >= 2;
sscreen->has_draw_indirect_multi =
(sscreen->info.family >= CHIP_POLARIS10) ||
- (sscreen->info.chip_class == VI &&
+ (sscreen->info.chip_class == GFX8 &&
sscreen->info.pfp_fw_version >= 121 &&
sscreen->info.me_fw_version >= 87) ||
- (sscreen->info.chip_class == CIK &&
+ (sscreen->info.chip_class == GFX7 &&
sscreen->info.pfp_fw_version >= 211 &&
sscreen->info.me_fw_version >= 173) ||
- (sscreen->info.chip_class == SI &&
+ (sscreen->info.chip_class == GFX6 &&
sscreen->info.pfp_fw_version >= 79 &&
sscreen->info.me_fw_version >= 142);
- sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI &&
+ sscreen->has_out_of_order_rast = sscreen->info.chip_class >= GFX8 &&
sscreen->info.max_se >= 2 &&
!(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
sscreen->assume_no_z_fights =
* by the reality that LLVM 5.0 doesn't have working VGPR indexing
* on GFX9.
*/
- sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI;
+ sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= GFX8;
/* Some chips have RB+ registers, but don't support RB+. Those must
* always disable it.
sscreen->dcc_msaa_allowed =
!(sscreen->debug_flags & DBG(NO_DCC_MSAA));
- sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
+ sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= GFX8;
(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
sscreen->use_monolithic_shaders =
sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
SI_CONTEXT_INV_VMEM_L1;
- if (sscreen->info.chip_class <= VI) {
+ if (sscreen->info.chip_class <= GFX8) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
#define SI_CONTEXT_INV_GLOBAL_L2 (1 << 6)
/* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
- * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */
+ * invalidate L2. GFX6-GFX7 can't do it, so they will do complete invalidation. */
#define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (1 << 7)
/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
* a CB or DB flush. */
unsigned cb_color_view;
unsigned cb_color_attrib;
unsigned cb_color_attrib2; /* GFX9 and later */
- unsigned cb_dcc_control; /* VI and later */
+ unsigned cb_dcc_control; /* GFX8 and later */
unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */
unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */
unsigned spi_shader_col_format_blend:8; /* blending without alpha. */
bool bo_list_add_all_compute_resources;
/* other shader resources */
- struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */
+ struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */
struct pipe_resource *esgs_ring;
struct pipe_resource *gsvs_ring;
struct pipe_resource *tess_rings;
else if (shaders_read_metadata)
sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
} else {
- /* SI-CI-VI */
+ /* GFX6-GFX8 */
sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
}
}
else if (shaders_read_metadata)
sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
} else {
- /* SI-CI-VI */
+ /* GFX6-GFX8 */
sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
}
}
struct pipe_screen *screen = sctx->b.screen;
unsigned aligned_ndw = align(state->ndw, 8);
- /* only supported on CIK and later */
- if (sctx->chip_class < CIK)
+ /* only supported on GFX7 and later */
+ if (sctx->chip_class < GFX7)
return;
assert(state->ndw);
/* Use the value written by compute shader as a workaround. Note that
* the wait flag does not apply in this predication mode.
*
- * The shader outputs the result value to L2. Workarounds only affect VI
+ * The shader outputs the result value to L2. Workarounds only affect GFX8
* and later, where the CP reads data from L2, so we don't need an
* additional flush.
*/
if (query) {
bool needs_workaround = false;
- /* There was a firmware regression in VI which causes successive
+ /* There was a firmware regression in GFX8 which causes successive
* SET_PREDICATION packets to give the wrong answer for
* non-inverted stream overflow predication.
*/
- if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) ||
+ if (((sctx->chip_class == GFX8 && sctx->screen->info.pfp_fw_feature < 49) ||
(sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) &&
!condition &&
(squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
{
/* amdgpu */
if (sscreen->info.drm_major == 3) {
- if (sscreen->info.chip_class >= VI)
+ if (sscreen->info.chip_class >= GFX8)
return ARRAY_SIZE(si_driver_query_list);
else
return ARRAY_SIZE(si_driver_query_list) - 7;
/* radeon */
if (sscreen->info.has_read_registers_query) {
- if (sscreen->info.chip_class == CIK)
+ if (sscreen->info.chip_class == GFX7)
return ARRAY_SIZE(si_driver_query_list) - 6;
else
return ARRAY_SIZE(si_driver_query_list) - 7;
static bool is_merged_shader(struct si_shader_context *ctx)
{
- if (ctx->screen->info.chip_class <= VI)
+ if (ctx->screen->info.chip_class <= GFX8)
return false;
return ctx->shader->key.as_ls ||
/* Store the dynamic HS control word. */
offset = 0;
- if (ctx->screen->info.chip_class <= VI) {
+ if (ctx->screen->info.chip_class <= GFX8) {
ac_build_buffer_store_dword(&ctx->ac, buffer,
LLVMConstInt(ctx->i32, 0x80000000, 0),
1, ctx->i32_0, tf_base,
addrs[4 * i + 0], "");
break;
default:
- fprintf(stderr, "Warning: SI unhandled fs output type:%d\n",
+ fprintf(stderr, "Warning: GFX6 unhandled fs output type:%d\n",
semantic_name);
}
}
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- /* SI only (thanks to a hw bug workaround):
+ /* GFX6 only (thanks to a hw bug workaround):
* The real barrier instruction isn’t needed, because an entire patch
* always fits into a single wave.
*/
- if (ctx->screen->info.chip_class == SI &&
+ if (ctx->screen->info.chip_class == GFX6 &&
ctx->type == PIPE_SHADER_TESS_CTRL) {
ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT);
return;
case PIPE_SHADER_TESS_CTRL:
/* Return this so that LLVM doesn't remove s_barrier
* instructions on chips where we use s_barrier. */
- return shader->selector->screen->info.chip_class >= CIK ? 128 : 64;
+ return shader->selector->screen->info.chip_class >= GFX7 ? 128 : 64;
case PIPE_SHADER_GEOMETRY:
return shader->selector->screen->info.chip_class >= GFX9 ? 128 : 64;
declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs);
break;
- case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */
+ case PIPE_SHADER_TESS_CTRL: /* GFX6-GFX8 */
declare_global_desc_pointers(ctx, &fninfo);
declare_per_stage_desc_pointers(ctx, &fninfo, true);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
ctx->param_rw_buffers);
- if (ctx->screen->info.chip_class <= VI &&
+ if (ctx->screen->info.chip_class <= GFX8 &&
(ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) {
unsigned ring =
ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
stride = 4 * num_components * sel->gs_max_out_vertices;
- /* Limit on the stride field for <= CIK. */
+ /* Limit on the stride field for <= GFX7. */
assert(stride < (1 << 14));
num_records = 64;
struct si_screen *sscreen = shader->selector->screen;
struct si_shader_config *conf = &shader->config;
unsigned num_inputs = shader->selector->info.num_inputs;
- unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;
+ unsigned lds_increment = sscreen->info.chip_class >= GFX7 ? 512 : 256;
unsigned lds_per_wave = 0;
unsigned max_simd_waves;
* - Floating-point output modifiers would be ignored by the hw.
* - Some opcodes don't support denormals, such as v_mad_f32. We would
* have to stop using those.
- * - SI & CI would be very slow.
+ * - GFX6 & GFX7 would be very slow.
*/
conf->float_mode |= V_00B028_FP_64_DENORMS;
/* Assume a slow CPU. */
assert(!sel->screen->info.has_dedicated_vram &&
- sel->screen->info.chip_class <= VI);
+ sel->screen->info.chip_class <= GFX8);
/* For a crazy dEQP test containing 2597 memory opcodes, mostly
* buffer stores. */
if (sel->type == PIPE_SHADER_COMPUTE) {
unsigned wave_size = 64;
unsigned max_vgprs = 256;
- unsigned max_sgprs = sscreen->info.chip_class >= VI ? 800 : 512;
+ unsigned max_sgprs = sscreen->info.chip_class >= GFX8 ? 800 : 512;
unsigned max_sgprs_per_wave = 128;
unsigned max_block_threads = si_get_max_workgroup_size(shader);
unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
/* Create the function. */
si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
- ctx->screen->info.chip_class >= CIK ? 128 : 64);
+ ctx->screen->info.chip_class >= GFX7 ? 128 : 64);
ac_declare_lds_as_pointer(&ctx->ac);
func = ctx->main_fn;
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
#define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF
-/* SI-specific system values. */
+/* Driver-specific system values. */
enum {
/* Values from set_tess_state. */
TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
LLVMBuildExtractElement(builder, descriptor,
LLVMConstInt(ctx->i32, 2, 0), "");
- if (ctx->screen->info.chip_class == VI) {
- /* On VI, the descriptor contains the size in bytes,
+ if (ctx->screen->info.chip_class == GFX8) {
+ /* On GFX8, the descriptor contains the size in bytes,
* but TXQ must return the size in elements.
* The stride is always non-zero for resources using TXQ.
*/
/* Match the resource type set in the descriptor. */
if (dim == ac_image_cube ||
- (screen->info.chip_class <= VI && dim == ac_image_3d))
+ (screen->info.chip_class <= GFX8 && dim == ac_image_3d))
dim = ac_image_2darray;
else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) {
/* When a single layer of a 3D texture is bound, the shader
static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
LLVMValueRef rsrc)
{
- if (ctx->screen->info.chip_class <= CIK) {
+ if (ctx->screen->info.chip_class <= GFX7) {
return rsrc;
} else {
LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
unsigned cache_policy = 0;
if (!atomic &&
- /* SI has a TC L1 bug causing corruption of 8bit/16bit stores.
+ /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores.
* All store opcodes not aligned to a dword are affected.
* The only way to get unaligned stores in radeonsi is through
* shader images. */
- ((may_store_unaligned && ctx->screen->info.chip_class == SI) ||
+ ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) ||
/* If this is write-only, don't keep data in L1 to prevent
* evicting L1 cache lines that may be needed by other
* instructions. */
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
*
- * SI-CI:
+ * GFX6-GFX7:
* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
* filtering manually. The driver sets img7 to a mask clearing
* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
* s_and_b32 samp0, samp0, img7
*
- * VI:
+ * GFX8:
* The ANISO_OVERRIDE sampler field enables this fix in TA.
*/
static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
{
LLVMValueRef img7, samp0;
- if (ctx->screen->info.chip_class >= VI)
+ if (ctx->screen->info.chip_class >= GFX8)
return samp;
img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
* so the depth comparison value isn't clamped for Z16 and
* Z24 anymore. Do it manually here.
*/
- if (ctx->screen->info.chip_class >= VI) {
+ if (ctx->screen->info.chip_class >= GFX8) {
LLVMValueRef upgraded;
LLVMValueRef clamped;
upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
} else if (tgsi_is_array_sampler(target) &&
opcode != TGSI_OPCODE_TXF &&
opcode != TGSI_OPCODE_TXF_LZ &&
- ctx->screen->info.chip_class <= VI) {
+ ctx->screen->info.chip_class <= GFX8) {
unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]);
}
/* The hardware needs special lowering for Gather4 with integer formats. */
LLVMValueRef gather4_int_result_workaround = NULL;
- if (ctx->screen->info.chip_class <= VI &&
+ if (ctx->screen->info.chip_class <= GFX8 &&
opcode == TGSI_OPCODE_TG4) {
assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK,
SI_TRACKED_CB_TARGET_MASK, cb_target_mask);
- if (sctx->chip_class >= VI) {
+ if (sctx->chip_class >= GFX8) {
/* DCC MSAA workaround for blending.
* Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
* COMBINER_DISABLE, but that would be more complicated.
*/
- bool oc_disable = (sctx->chip_class == VI ||
+ bool oc_disable = (sctx->chip_class == GFX8 ||
sctx->chip_class == GFX9) &&
blend &&
blend->blend_enable_4bit & cb_target_mask &&
!sctx->occlusion_queries_disabled) {
bool perfect = sctx->num_perfect_occlusion_queries > 0;
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
unsigned log_sample_rate = sctx->framebuffer.log_samples;
/* Stoney doesn't increment occlusion query counters
}
} else {
/* Disable occlusion queries. */
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
db_count_control = 0;
} else {
db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
db_shader_control = sctx->ps_db_shader_control;
- /* Bug workaround for smoothing (overrasterization) on SI. */
- if (sctx->chip_class == SI && sctx->smoothing_enabled) {
+ /* Bug workaround for smoothing (overrasterization) on GFX6. */
+ if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) {
db_shader_control &= C_02880C_Z_ORDER;
db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
}
case PIPE_FORMAT_X8Z24_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- return V_028040_Z_24; /* deprecated on SI */
+ return V_028040_Z_24; /* deprecated on AMD GCN */
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
return V_028040_Z_32_FLOAT;
/*
* Implemented as an 8_8_8_8 data format to fix texture
* gathers in stencil sampling. This affects at least
- * GL45-CTS.texture_cube_map_array.sampling on VI.
+ * GL45-CTS.texture_cube_map_array.sampling on GFX8.
*/
- if (sscreen->info.chip_class <= VI)
+ if (sscreen->info.chip_class <= GFX8)
return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
if (format == PIPE_FORMAT_X24S8_UINT)
color_info |= S_028C70_COMPRESSION(1);
unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh);
- if (sctx->chip_class == SI) {
- /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
+ if (sctx->chip_class == GFX6) {
+ /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */
color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
}
}
}
- if (sctx->chip_class >= VI) {
+ if (sctx->chip_class >= GFX8) {
unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
}
/* This must be set for fast clear to work without FMASK. */
- if (!tex->surface.fmask_size && sctx->chip_class == SI) {
+ if (!tex->surface.fmask_size && sctx->chip_class == GFX6) {
unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh);
color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
}
}
if (tex->surface.has_stencil) {
- /* Stencil buffer workaround ported from the SI-CI-VI code.
+ /* Stencil buffer workaround ported from the GFX6-GFX8 code.
* See that for explanation.
*/
s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1);
S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned);
}
} else {
- /* SI-CI-VI */
+ /* GFX6-GFX8 */
struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level];
assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
s_info = S_028044_FORMAT(stencil_format);
surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile);
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
struct radeon_info *info = &sctx->screen->info;
unsigned index = tex->surface.u.legacy.tiling_index[level];
unsigned stencil_index = tex->surface.u.legacy.stencil_tiling_index[level];
bool unbound = false;
int i;
- /* Reject zero-sized framebuffers due to a hw bug on SI that occurs
+ /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs
* when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
* We could implement the full workaround here, but it's a useless case.
*/
}
/* For optimal DCC performance. */
- if (sctx->chip_class == VI)
+ if (sctx->chip_class == GFX8)
sctx->framebuffer.dcc_overwrite_combiner_watermark = 4;
else if (num_bpp64_colorbufs >= 5)
sctx->framebuffer.dcc_overwrite_combiner_watermark = 8;
radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4,
S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch));
} else {
- /* Compute mutable surface parameters (SI-CI-VI). */
+ /* Compute mutable surface parameters (GFX6-GFX8). */
const struct legacy_surf_level *level_info =
&tex->surface.u.legacy.level[cb->base.u.tex.level];
unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
if (tex->surface.fmask_size) {
- if (sctx->chip_class >= CIK)
+ if (sctx->chip_class >= GFX7)
cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1);
cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index);
cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.fmask.slice_tile_max);
} else {
/* This must be set for fast clear to work without FMASK. */
- if (sctx->chip_class >= CIK)
+ if (sctx->chip_class >= GFX7)
cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
}
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
- sctx->chip_class >= VI ? 14 : 13);
+ sctx->chip_class >= GFX8 ? 14 : 13);
radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */
radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */
radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */
radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */
radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */
- if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
+ if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */
radeon_emit(cs, cb_dcc_base);
}
}
/* The exclusion bits can be set to improve rasterization efficiency
* if no sample lies on the pixel boundary (-8 sample offset).
*/
- bool exclusion = sctx->chip_class >= CIK &&
+ bool exclusion = sctx->chip_class >= GFX7 &&
(!rs->multisample_enable || nr_samples != 16);
radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL,
SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
/* The NUM_RECORDS field has a different meaning depending on the chip,
* instruction type, STRIDE, and SWIZZLE_ENABLE.
*
- * SI-CIK:
+ * GFX6-GFX7:
* - If STRIDE == 0, it's in byte units.
* - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
*
- * VI:
+ * GFX8:
* - For SMEM and STRIDE == 0, it's in byte units.
* - For SMEM and STRIDE != 0, it's in units of STRIDE.
* - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
* the first element is readable when IDXEN == 0.
*/
num_records = num_records ? MAX2(num_records, stride) : 0;
- else if (screen->info.chip_class == VI)
+ else if (screen->info.chip_class == GFX8)
num_records *= stride;
state[4] = 0;
/*
* X24S8 is implemented as an 8_8_8_8 data format, to
* fix texture gathers. This affects at least
- * GL45-CTS.texture_cube_map_array.sampling on VI.
+ * GL45-CTS.texture_cube_map_array.sampling on GFX8.
*/
- if (screen->info.chip_class <= VI)
+ if (screen->info.chip_class <= GFX8)
util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
else
util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
if (!sampler &&
(res->target == PIPE_TEXTURE_CUBE ||
res->target == PIPE_TEXTURE_CUBE_ARRAY ||
- (screen->info.chip_class <= VI &&
+ (screen->info.chip_class <= GFX8 &&
res->target == PIPE_TEXTURE_3D))) {
/* For the purpose of shader images, treat cube maps and 3D
* textures as 2D arrays. For 3D textures, the address
/* The last dword is unused by hw. The shader uses it to clear
* bits in the first dword of sampler state.
*/
- if (screen->info.chip_class <= CIK && res->nr_samples <= 1) {
+ if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) {
if (first_level == last_level)
state[7] = C_008F30_MAX_ANISO_RATIO;
else
height = height0;
depth = texture->depth0;
- if (sctx->chip_class <= VI && force_level) {
+ if (sctx->chip_class <= GFX8 && force_level) {
assert(force_level == first_level &&
force_level == last_level);
base_level = force_level;
S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
S_008F30_ANISO_BIAS(max_aniso_ratio) |
S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
- S_008F30_COMPAT_MODE(sctx->chip_class >= VI));
+ S_008F30_COMPAT_MODE(sctx->chip_class >= GFX8));
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) |
S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
S_008F38_MIP_POINT_PRECLAMP(0) |
- S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) |
+ S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) |
S_008F38_FILTER_PREC_FIX(1) |
- S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI));
+ S_008F38_ANISO_OVERRIDE(sctx->chip_class >= GFX8));
rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false);
/* Create sampler resource for integer textures. */
/* The hardware always treats the 2-bit alpha channel as
* unsigned, so a shader workaround is needed. The affected
- * chips are VI and older except Stoney (GFX8.1).
+ * chips are GFX8 and older except Stoney (GFX8.1).
*/
- always_fix = sscreen->info.chip_class <= VI &&
+ always_fix = sscreen->info.chip_class <= GFX8 &&
sscreen->info.family != CHIP_STONEY &&
channel->type == UTIL_FORMAT_TYPE_SIGNED;
} else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) {
* into account would complicate the fast path (where everything
* is nicely aligned).
*/
- bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == SI;
+ bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == GFX6;
bool opencode = sscreen->options.vs_fetch_always_opencode;
if (check_alignment &&
}
if (flags & PIPE_BARRIER_INDEX_BUFFER) {
- /* Indices are read through TC L2 since VI.
+ /* Indices are read through TC L2 since GFX8.
* L1 isn't used.
*/
- if (sctx->screen->info.chip_class <= CIK)
+ if (sctx->screen->info.chip_class <= GFX7)
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
sctx->framebuffer.uncompressed_cb_mask) {
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
- if (sctx->chip_class <= VI)
+ if (sctx->chip_class <= GFX8)
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
- if (sctx->screen->info.chip_class <= VI &&
+ if (sctx->screen->info.chip_class <= GFX8 &&
flags & PIPE_BARRIER_INDIRECT_BUFFER)
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
static void si_set_grbm_gfx_index(struct si_context *sctx,
struct si_pm4_state *pm4, unsigned value)
{
- unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX :
+ unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX :
R_00802C_GRBM_GFX_INDEX;
si_pm4_set_reg(pm4, reg, value);
}
}
si_set_grbm_gfx_index(sctx, pm4, ~0);
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
}
}
*/
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
raster_config);
- if (sctx->chip_class >= CIK)
+ if (sctx->chip_class >= GFX7)
si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
raster_config_1);
} else {
bool has_clear_state = sscreen->has_clear_state;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
- /* SI, radeon kernel disabled CLEAR_STATE. */
- assert(has_clear_state || sscreen->info.chip_class == SI ||
+ /* GFX6, radeon kernel disabled CLEAR_STATE. */
+ assert(has_clear_state || sscreen->info.chip_class == GFX6 ||
sscreen->info.drm_major != 3);
if (!pm4)
si_pm4_cmd_end(pm4, false);
}
- if (sctx->chip_class <= VI)
+ if (sctx->chip_class <= GFX8)
si_set_raster_config(sctx, pm4);
si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
}
si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
if (!has_clear_state)
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
- if (sctx->chip_class < CIK)
+ if (sctx->chip_class < GFX7)
si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
S_008A14_CLIP_VTX_REORDER_ENA(1));
/* CLEAR_STATE doesn't clear these correctly on certain generations.
* I don't know why. Deduced by trial and error.
*/
- if (sctx->chip_class <= CIK) {
+ if (sctx->chip_class <= GFX7) {
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
}
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
if (sctx->chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
- if (sctx->chip_class >= VI) {
+ if (sctx->chip_class >= GFX8) {
unsigned vgt_tess_distribution;
vgt_tess_distribution =
}
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI,
S_028084_ADDRESS(border_color_va >> 40));
}
struct si_shader_selector *tcs =
sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
- bool has_primid_instancing_bug = sctx->chip_class == SI &&
+ bool has_primid_instancing_bug = sctx->chip_class == GFX6 &&
sctx->screen->info.max_se == 1;
unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL];
unsigned num_tcs_input_cp = info->vertices_per_patch;
/* Make sure that the data fits in LDS. This assumes the shaders only
* use LDS for the inputs and outputs.
*
- * While CIK can use 64K per threadgroup, there is a hang on Stoney
+ * While GFX7 can use 64K per threadgroup, there is a hang on Stoney
* with 2 CUs if we use more than 32K. The closed Vulkan driver also
* uses 32K at most on all GCN chips.
*/
if (temp_verts_per_tg > 64 && temp_verts_per_tg % 64 < 48)
*num_patches = (temp_verts_per_tg & ~63) / max_verts_per_patch;
- if (sctx->chip_class == SI) {
- /* SI bug workaround, related to power management. Limit LS-HS
+ if (sctx->chip_class == GFX6) {
+ /* GFX6 bug workaround, related to power management. Limit LS-HS
* threadgroups to only one wave.
*/
unsigned one_wave = 64 / max_verts_per_patch;
* The intended solution is to restrict threadgroups to
* a single instance by setting SWITCH_ON_EOI, which
* should cause IA to split instances up. However, this
- * doesn't work correctly on SI when there is no other
+ * doesn't work correctly on GFX6 when there is no other
* SE to switch to.
*/
if (has_primid_instancing_bug && tess_uses_primid)
/* Compute the LDS size. */
lds_size = output_patch0_offset + output_patch_size * *num_patches;
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
assert(lds_size <= 65536);
lds_size = align(lds_size, 512) / 512;
} else {
/* Due to a hw bug, RSRC2_LS must be written twice with another
* LS register written in between. */
- if (sctx->chip_class == CIK && sctx->family != CHIP_HAWAII)
+ if (sctx->chip_class == GFX7 && sctx->family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
radeon_emit(cs, ls_current->config.rsrc1);
S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
if (sctx->last_ls_hs_config != ls_hs_config) {
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
ls_hs_config);
} else {
key->u.uses_gs)
partial_vs_wave = true;
- /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= VI) */
+ /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= GFX8) */
if (sscreen->has_distributed_tess) {
if (key->u.uses_gs) {
- if (sscreen->info.chip_class == VI)
+ if (sscreen->info.chip_class == GFX8)
partial_es_wave = true;
} else {
partial_vs_wave = true;
wd_switch_on_eop = true;
}
- if (sscreen->info.chip_class >= CIK) {
+ if (sscreen->info.chip_class >= GFX7) {
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
* 4 shader engines. Set 1 to pass the assertion below.
* The other cases are hardware requirements.
* Assume indirect draws always use small instances.
* This is needed for good VS wave utilization.
*/
- if (sscreen->info.chip_class <= VI &&
+ if (sscreen->info.chip_class <= GFX8 &&
sscreen->info.max_se == 4 &&
key->u.multi_instances_smaller_than_primgroup)
wd_switch_on_eop = true;
- /* Required on CIK and later. */
+ /* Required on GFX7 and later. */
if (sscreen->info.max_se == 4 && !wd_switch_on_eop)
ia_switch_on_eoi = true;
sscreen->info.family == CHIP_VEGAM))
partial_vs_wave = true;
- /* Required by Hawaii and, for some special cases, by VI. */
+ /* Required by Hawaii and, for some special cases, by GFX8. */
if (ia_switch_on_eoi &&
(sscreen->info.family == CHIP_HAWAII ||
- (sscreen->info.chip_class == VI &&
+ (sscreen->info.chip_class == GFX8 &&
(key->u.uses_gs || max_primgroup_in_wave != 2))))
partial_vs_wave = true;
}
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
- if (sscreen->info.chip_class <= VI && ia_switch_on_eoi)
+ if (sscreen->info.chip_class <= GFX8 && ia_switch_on_eoi)
partial_es_wave = true;
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
- S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= CIK ? wd_switch_on_eop : 0) |
+ S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= GFX7 ? wd_switch_on_eop : 0) |
/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
- S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == VI ?
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == GFX8 ?
max_primgroup_in_wave : 0) |
S_030960_EN_INST_OPT_BASIC(sscreen->info.chip_class >= GFX9) |
S_030960_EN_INST_OPT_ADV(sscreen->info.chip_class >= GFX9);
if (sctx->gs_shader.cso) {
/* GS requirement. */
- if (sctx->chip_class <= VI &&
+ if (sctx->chip_class <= GFX8 &&
SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1);
radeon_set_uconfig_reg_idx(cs, sctx->screen,
R_030960_IA_MULTI_VGT_PARAM, 4,
ia_multi_vgt_param);
- else if (sctx->chip_class >= CIK)
+ else if (sctx->chip_class >= GFX7)
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
else
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
sctx->last_multi_vgt_param = ia_multi_vgt_param;
}
if (prim != sctx->last_prim) {
- if (sctx->chip_class >= CIK)
+ if (sctx->chip_class >= GFX7)
radeon_set_uconfig_reg_idx(cs, sctx->screen,
R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
else
break;
case 2:
index_type = V_028A7C_VGT_INDEX_16 |
- (SI_BIG_ENDIAN && sctx->chip_class <= CIK ?
+ (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ?
V_028A7C_VGT_DMA_SWAP_16_BIT : 0);
break;
case 4:
index_type = V_028A7C_VGT_INDEX_32 |
- (SI_BIG_ENDIAN && sctx->chip_class <= CIK ?
+ (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ?
V_028A7C_VGT_DMA_SWAP_32_BIT : 0);
break;
default:
si_resource(indexbuf),
RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
} else {
- /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
+ /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
* so the state must be re-emitted before the next indexed draw.
*/
- if (sctx->chip_class >= CIK)
+ if (sctx->chip_class >= GFX7)
sctx->last_index_size = -1;
}
if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
sctx->num_db_cache_flushes++;
- /* SI has a bug that it always flushes ICACHE and KCACHE if either
+ /* GFX6 has a bug that it always flushes ICACHE and KCACHE if either
* bit is set. An alternative way is to write SQC_CACHES, but that
* doesn't seem to work reliably. Since the bug doesn't affect
* correctness (it only does more work than necessary) and
if (flags & SI_CONTEXT_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
/* Necessary for DCC */
- if (sctx->chip_class == VI)
+ if (sctx->chip_class == GFX8)
si_cp_release_mem(sctx,
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
radeon_emit(cs, 0);
}
- /* SI-CI-VI only:
+ /* GFX6-GFX8 only:
* When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC
* waits for idle, so it should be last. SURFACE_SYNC is done in PFP.
*
* cp_coher_cntl should contain all necessary flags except TC flags
* at this point.
*
- * SI-CIK don't support L2 write-back.
+ * GFX6-GFX7 don't support L2 write-back.
*/
if (flags & SI_CONTEXT_INV_GLOBAL_L2 ||
- (sctx->chip_class <= CIK &&
+ (sctx->chip_class <= GFX7 &&
(flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
- /* Invalidate L1 & L2. (L1 is always invalidated on SI)
- * WB must be set on VI+ when TC_ACTION is set.
+ /* Invalidate L1 & L2. (L1 is always invalidated on GFX6)
+ * WB must be set on GFX8+ when TC_ACTION is set.
*/
si_emit_surface_sync(sctx, cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1) |
- S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= VI));
+ S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8));
cp_coher_cntl = 0;
sctx->num_L2_invalidates++;
} else {
unsigned index_offset = info->indirect ? info->start * index_size : 0;
if (likely(!info->indirect)) {
- /* SI-CI treat instance_count==0 as instance_count==1. There is
+ /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
* no workaround for indirect draws, but we can at least skip
* direct draws.
*/
if (index_size) {
/* Translate or upload, if needed. */
- /* 8-bit indices are supported on VI. */
- if (sctx->chip_class <= CIK && index_size == 1) {
+ /* 8-bit indices are supported on GFX8. */
+ if (sctx->chip_class <= GFX7 && index_size == 1) {
unsigned start, count, start_offset, size, offset;
void *ptr;
/* info->start will be added by the drawing code */
index_offset -= start_offset;
- } else if (sctx->chip_class <= CIK &&
+ } else if (sctx->chip_class <= GFX7 &&
si_resource(indexbuf)->TC_L2_dirty) {
- /* VI reads index buffers through TC L2, so it doesn't
+ /* GFX8 reads index buffers through TC L2, so it doesn't
* need this. */
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
si_resource(indexbuf)->TC_L2_dirty = false;
si_context_add_resource_size(sctx, indirect->buffer);
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
if (si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
si_resource(indirect->buffer)->TC_L2_dirty = false;
/* Start prefetches after the draw has been started. Both will run
* in parallel, but starting the draw first is more important.
*/
- if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
+ if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx, false);
} else {
/* If we don't wait for idle, start prefetches first, then set
si_emit_cache_flush(sctx);
/* Only prefetch the API VS and VBO descriptors. */
- if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
+ if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx, true);
if (!si_upload_graphics_shader_descriptors(sctx))
/* Prefetch the remaining shaders after the draw has been
* started. */
- if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
+ if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx, false);
}
unsigned vgpr_comp_cnt;
uint64_t va;
- assert(sscreen->info.chip_class <= VI);
+ assert(sscreen->info.chip_class <= GFX8);
pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
S_00B428_FLOAT_MODE(shader->config.float_mode) |
S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt));
- if (sscreen->info.chip_class <= VI) {
+ if (sscreen->info.chip_class <= GFX8) {
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
shader->config.rsrc2);
}
uint64_t va;
unsigned oc_lds_en;
- assert(sscreen->info.chip_class <= VI);
+ assert(sscreen->info.chip_class <= GFX8);
pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
SI_TRACKED_VGT_PRIMITIVEID_EN,
shader->ctx_reg.vs.vgt_primitiveid_en);
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
radeon_opt_set_context_reg(sctx, R_028AB4_VGT_REUSE_OFF,
SI_TRACKED_VGT_REUSE_OFF,
shader->ctx_reg.vs.vgt_reuse_off);
shader->ctx_reg.vs.vgt_primitiveid_en = 0;
}
- if (sscreen->info.chip_class <= VI) {
+ if (sscreen->info.chip_class <= GFX8) {
/* Reuse needs to be set off if we write oViewport. */
shader->ctx_reg.vs.vgt_reuse_off =
S_028AB4_REUSE_OFF(info->writes_viewport_index);
blend && blend->alpha_to_coverage)
key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
- /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+ /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
* to the range supported by the type if a channel has less
* than 16 bits and the export format is 16_ABGR.
*/
- if (sctx->chip_class <= CIK && sctx->family != CHIP_HAWAII) {
+ if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
}
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_ls) {
- assert(sctx->chip_class <= VI);
+ assert(sctx->chip_class <= GFX8);
si_pm4_delete_state(sctx, ls, shader->pm4);
} else if (shader->key.as_es) {
- assert(sctx->chip_class <= VI);
+ assert(sctx->chip_class <= GFX8);
si_pm4_delete_state(sctx, es, shader->pm4);
} else {
si_pm4_delete_state(sctx, vs, shader->pm4);
break;
case PIPE_SHADER_TESS_EVAL:
if (shader->key.as_es) {
- assert(sctx->chip_class <= VI);
+ assert(sctx->chip_class <= GFX8);
si_pm4_delete_state(sctx, es, shader->pm4);
} else {
si_pm4_delete_state(sctx, vs, shader->pm4);
unsigned num_se = sctx->screen->info.max_se;
unsigned wave_size = 64;
unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
- /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
- * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+ /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+ * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
*/
- unsigned gs_vertex_reuse = (sctx->chip_class >= VI ? 32 : 16) * num_se;
+ unsigned gs_vertex_reuse = (sctx->chip_class >= GFX8 ? 32 : 16) * num_se;
unsigned alignment = 256 * num_se;
/* The maximum size is 63.999 MB per SE. */
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
*
* GFX9 doesn't have the ESGS ring.
*/
- bool update_esgs = sctx->chip_class <= VI &&
+ bool update_esgs = sctx->chip_class <= GFX8 &&
esgs_ring_size &&
(!sctx->esgs_ring ||
sctx->esgs_ring->width0 < esgs_ring_size);
if (!pm4)
return false;
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
if (sctx->esgs_ring) {
- assert(sctx->chip_class <= VI);
+ assert(sctx->chip_class <= GFX8);
si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
sctx->esgs_ring->width0 / 256);
}
/* Set ring bindings. */
if (sctx->esgs_ring) {
- assert(sctx->chip_class <= VI);
+ assert(sctx->chip_class <= GFX8);
si_set_ring_buffer(sctx, SI_ES_RING_ESGS,
sctx->esgs_ring, 0, sctx->esgs_ring->width0,
true, true, 4, 64, 0);
sctx->screen->tess_offchip_ring_size;
/* Append these registers to the init config state. */
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
}
/* VS as LS */
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
r = si_shader_select(ctx, &sctx->vs_shader,
&compiler_state);
if (r)
if (sctx->gs_shader.cso) {
/* TES as ES */
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
r = si_shader_select(ctx, &sctx->tes_shader,
&compiler_state);
if (r)
si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
}
} else if (sctx->gs_shader.cso) {
- if (sctx->chip_class <= VI) {
+ if (sctx->chip_class <= GFX8) {
/* VS as ES */
r = si_shader_select(ctx, &sctx->vs_shader,
&compiler_state);
return false;
} else {
si_pm4_bind_state(sctx, gs, NULL);
- if (sctx->chip_class <= VI)
+ if (sctx->chip_class <= GFX8)
si_pm4_bind_state(sctx, es, NULL);
}
sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
- if (sctx->chip_class == SI)
+ if (sctx->chip_class == GFX6)
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
if (sctx->framebuffer.nr_samples <= 1)
return false;
}
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
if (si_pm4_state_enabled_and_changed(sctx, ls))
sctx->prefetch_L2_mask |= SI_PREFETCH_LS;
else if (!sctx->queued.named.ls)
* to flush it.
*
* The only cases which requires flushing it is VGT DMA index
- * fetching (on <= CIK) and indirect draw data, which are rare
+ * fetching (on <= GFX7) and indirect draw data, which are rare
* cases. Thus, flag the TC L2 dirtiness in the resource and
* handle it at draw call time.
*/
unsigned reg_strmout_cntl;
/* The register is at different places on different ASICs. */
- if (sctx->chip_class >= CIK) {
+ if (sctx->chip_class >= GFX7) {
reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
} else {
t[i]->stride_in_dw = stride_in_dw[i];
- /* SI binds streamout buffers as shader resources.
+ /* AMD GCN binds streamout buffers as shader resources.
* VGT only counts primitives and tells the shader
* through SGPRs what to do. */
radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
if (scissor)
si_clip_scissor(&final, scissor);
- /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_-
+ /* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_-
* SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0.
*/
- if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) {
+ if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) {
radeon_emit(cs, S_028250_TL_X(1) |
S_028250_TL_Y(1) |
S_028250_WINDOW_OFFSET_DISABLE(1));
int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2;
int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2;
- /* SI-CI need to align the offset to an ubertile consisting of all SEs. */
+ /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */
const unsigned hw_screen_offset_alignment =
- ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
+ ctx->chip_class >= GFX8 ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
/* Indexed by quantization modes */
static int max_viewport_size[] = {65535, 16383, 4095};
unsigned cs_dwords_per_thread =
test_cs ? cs_dwords_per_thread_list[cs_method % NUM_SHADERS] : 0;
- if (sctx->chip_class == SI) {
- /* SI doesn't support CP DMA operations through L2. */
+ if (sctx->chip_class == GFX6) {
+ /* GFX6 doesn't support CP DMA operations through L2. */
if (test_cp && cache_policy != L2_BYPASS)
continue;
- /* WAVES_PER_SH is in multiples of 16 on SI. */
+ /* WAVES_PER_SH is in multiples of 16 on GFX6. */
if (test_cs && cs_waves_per_sh % 16 != 0)
continue;
}
unsigned query_type = PIPE_QUERY_TIME_ELAPSED;
if (test_sdma) {
- if (sctx->chip_class == SI)
+ if (sctx->chip_class == GFX6)
query_type = SI_QUERY_TIME_ELAPSED_SDMA_SI;
else
query_type = SI_QUERY_TIME_ELAPSED_SDMA;
if (!r->is_valid)
continue;
- /* Ban CP DMA clears via MC on <= VI. They are super slow
+ /* Ban CP DMA clears via MC on <= GFX8. They are super slow
* on GTT, which we can get due to BO evictions.
*/
- if (sctx->chip_class <= VI && placement == 1 &&
+ if (sctx->chip_class <= GFX8 && placement == 1 &&
r->is_cp && r->cache_policy == L2_BYPASS)
continue;
array_mode == RADEON_SURF_MODE_2D)) {
/* TC-compatible HTILE only supports Z32_FLOAT.
* GFX9 also supports Z16_UNORM.
- * On VI, promote Z16 to Z32. DB->CB copies will convert
+ * On GFX8, promote Z16 to Z32. DB->CB copies will convert
* the format for transfers.
*/
- if (sscreen->info.chip_class == VI)
+ if (sscreen->info.chip_class == GFX8)
bpe = 4;
flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
flags |= RADEON_SURF_SBUFFER;
}
- if (sscreen->info.chip_class >= VI &&
+ if (sscreen->info.chip_class >= GFX8 &&
(ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC ||
ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT ||
(ptex->nr_samples >= 2 && !sscreen->dcc_msaa_allowed)))
bpe == 16 && ptex->nr_samples >= 2)
flags |= RADEON_SURF_DISABLE_DCC;
- /* VI: DCC clear for 4x and 8x MSAA array textures unimplemented. */
- if (sscreen->info.chip_class == VI &&
+ /* GFX8: DCC clear for 4x and 8x MSAA array textures unimplemented. */
+ if (sscreen->info.chip_class == GFX8 &&
ptex->nr_storage_samples >= 4 &&
ptex->array_size > 1)
flags |= RADEON_SURF_DISABLE_DCC;
md.size_metadata = 10 * 4;
/* Dwords [10:..] contain the mipmap level offsets. */
- if (sscreen->info.chip_class <= VI) {
+ if (sscreen->info.chip_class <= GFX8) {
for (unsigned i = 0; i <= res->last_level; i++)
md.metadata[10+i] = tex->surface.u.legacy.level[i].offset >> 8;
{
uint32_t *desc = &md->metadata[2];
- if (sscreen->info.chip_class < VI)
+ if (sscreen->info.chip_class < GFX8)
return;
/* Return if DCC is enabled. The texture should be set up with it
{
struct si_screen *sscreen = (struct si_screen*)tex->buffer.b.b.screen;
- if (sscreen->info.chip_class <= VI)
+ if (sscreen->info.chip_class <= GFX8)
return false;
/* This needs a cache flush before scanout.
assert(tex->surface.tile_swizzle == 0);
}
- /* Since shader image stores don't support DCC on VI,
+ /* Since shader image stores don't support DCC on GFX8,
* disable it for external clients that want write
* access.
*/
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
unsigned num_pipes = sscreen->info.num_tile_pipes;
- assert(sscreen->info.chip_class <= VI);
+ assert(sscreen->info.chip_class <= GFX8);
tex->surface.htile_size = 0;
* are always reproducible. I think I have seen the test hang
* on Carrizo too, though it was very rare there.
*/
- if (sscreen->info.chip_class >= CIK && num_pipes < 4)
+ if (sscreen->info.chip_class >= GFX7 && num_pipes < 4)
num_pipes = 4;
switch (num_pipes) {
static void si_texture_allocate_htile(struct si_screen *sscreen,
struct si_texture *tex)
{
- if (sscreen->info.chip_class <= VI && !tex->tc_compatible_htile)
+ if (sscreen->info.chip_class <= GFX8 && !tex->tc_compatible_htile)
si_texture_get_htile_size(sscreen, tex);
if (!tex->surface.htile_size)
RADEON_SURF_TC_COMPATIBLE_HTILE);
/* TC-compatible HTILE:
- * - VI only supports Z32_FLOAT.
+ * - GFX8 only supports Z32_FLOAT.
* - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
if (tex->tc_compatible_htile) {
if (sscreen->info.chip_class >= GFX9 &&
if (templ->flags & SI_RESOURCE_FLAG_TRANSFER)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
- /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI,
+ /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8,
* which requires 2D tiling.
*/
- if (sscreen->info.chip_class == VI && tc_compatible_htile)
+ if (sscreen->info.chip_class == GFX8 && tc_compatible_htile)
return RADEON_SURF_MODE_2D;
/* Handle common candidates for the linear mode.
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
- /* Cursors are linear on SI.
+ /* Cursors are linear on AMD GCN.
* (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
if (templ->bind & PIPE_BIND_CURSOR)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
struct radeon_surf surface = {0};
bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH;
bool tc_compatible_htile =
- sscreen->info.chip_class >= VI &&
+ sscreen->info.chip_class >= GFX8 &&
/* There are issues with TC-compatible HTILE on Tonga (and
* Iceland is the same design), and documented bug workarounds
* don't help. For example, this fails:
sctx->screen->debug_flags & DBG(NO_DCC_FB))
return;
- assert(sctx->chip_class >= VI);
+ assert(sctx->chip_class >= GFX8);
if (tex->dcc_offset)
return; /* already enabled */
static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
{
- return cs->ctx->ws->info.chip_class >= CIK &&
+ return cs->ctx->ws->info.chip_class >= GFX7 &&
(cs->ring_type == RING_GFX || cs->ring_type == RING_COMPUTE);
}
switch (cs->ring_type) {
case RING_DMA:
/* pad DMA ring to 8 DWs */
- if (ws->info.chip_class <= SI) {
+ if (ws->info.chip_class <= GFX6) {
while (rcs->current.cdw & 7)
radeon_emit(rcs, 0xf0000000); /* NOP packet */
} else {
if (i >= CHIP_VEGA10)
ws->info.chip_class = GFX9;
else if (i >= CHIP_TONGA)
- ws->info.chip_class = VI;
+ ws->info.chip_class = GFX8;
else if (i >= CHIP_BONAIRE)
- ws->info.chip_class = CIK;
+ ws->info.chip_class = GFX7;
else
- ws->info.chip_class = SI;
+ ws->info.chip_class = GFX6;
/* Don't submit any IBs. */
setenv("RADEON_NOOP", "1", 1);
switch (cs->ring_type) {
case RING_DMA:
/* pad DMA ring to 8 DWs */
- if (cs->ws->info.chip_class <= SI) {
+ if (cs->ws->info.chip_class <= GFX6) {
while (rcs->current.cdw & 7)
radeon_emit(&cs->base, 0xf0000000); /* NOP packet */
} else {
{
uint32_t tile_mode;
- if (info->chip_class < SI) {
+ if (info->chip_class < GFX6) {
surf->micro_tile_mode = 0;
return;
}
tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
- if (info->chip_class >= CIK)
+ if (info->chip_class >= GFX7)
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
else
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
return;
- assert(info->chip_class <= VI);
+ assert(info->chip_class <= GFX8);
switch (num_pipes) {
case 2:
case CHIP_VERDE:
case CHIP_OLAND:
case CHIP_HAINAN:
- ws->info.chip_class = SI;
+ ws->info.chip_class = GFX6;
break;
case CHIP_BONAIRE:
case CHIP_KAVERI:
case CHIP_KABINI:
case CHIP_HAWAII:
case CHIP_MULLINS:
- ws->info.chip_class = CIK;
+ ws->info.chip_class = GFX7;
break;
}
return false;
}
- if (ws->info.chip_class == CIK) {
+ if (ws->info.chip_class == GFX7) {
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL,
ws->info.cik_macrotile_mode_array)) {
- fprintf(stderr, "radeon: Kernel 3.13 is required for CIK support.\n");
+ fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n");
return false;
}
}
- if (ws->info.chip_class >= SI) {
+ if (ws->info.chip_class >= GFX6) {
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL,
ws->info.si_tile_mode_array)) {
- fprintf(stderr, "radeon: Kernel 3.10 is required for SI support.\n");
+ fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n");
return false;
}
}
/* Hawaii with old firmware needs type2 nop packet.
* accel_working2 with value 3 indicates the new firmware.
*/
- ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI ||
+ ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 ||
(ws->info.family == CHIP_HAWAII &&
ws->accel_working2 < 3);
ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
ws->info.ib_start_alignment = 4096;
ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40;
- /* HTILE is broken with 1D tiling on old kernels and CIK. */
- ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != CIK ||
+ /* HTILE is broken with 1D tiling on old kernels and GFX7. */
+ ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 ||
ws->info.drm_minor >= 38;
ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48;
ws->info.has_bo_metadata = false;
ws->info.kernel_flushes_tc_l2_after_ib = true;
/* Old kernels disallowed register writes via COPY_DATA
* that are used for indirect compute dispatches. */
- ws->info.has_indirect_compute_dispatch = ws->info.chip_class == CIK ||
- (ws->info.chip_class == SI &&
+ ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 ||
+ (ws->info.chip_class == GFX6 &&
ws->info.drm_minor >= 45);
- /* SI doesn't support unaligned loads. */
- ws->info.has_unaligned_shader_loads = ws->info.chip_class == CIK &&
+ /* GFX6 doesn't support unaligned loads. */
+ ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 &&
ws->info.drm_minor >= 50;
ws->info.has_sparse_vm_mappings = false;
- /* 2D tiling on CIK is supported since DRM 2.35.0 */
- ws->info.has_2d_tiling = ws->info.chip_class <= SI || ws->info.drm_minor >= 35;
+ /* 2D tiling on GFX7 is supported since DRM 2.35.0 */
+ ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35;
ws->info.has_read_registers_query = ws->info.drm_minor >= 42;
ws->info.max_alignment = 1024*1024;
_mesa_primitive_restart_index(ctx, index_size);
/* Enable primitive restart only when the restart index can have an
- * effect. This is required for correctness in radeonsi VI support.
+ * effect. This is required for correctness in radeonsi GFX8 support.
* Other hardware may also benefit from taking a faster, non-restart path
* when possible.
*/