From: Marek Olšák Date: Wed, 15 May 2019 02:16:20 +0000 (-0400) Subject: ac: rename SI-CIK-VI to GFX6-GFX7-GFX8 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ccfcb9d818b40564001b3cf2516367526de26c1d;p=mesa.git ac: rename SI-CIK-VI to GFX6-GFX7-GFX8 Acked-by: Dave Airlie We already use GFX9 and I don't want us to have confusing naming in the driver. GFXn naming is better from the driver perspective, because it's the real version of the gfx portion of the hw. Also, CIK means Bonaire-Kaveri-Kabini, it doesn't mean CI. It shouldn't confuse our SDMA, UVD, VCE etc. code much. Those have nothing to do with GFXn and they have their own version numbers. --- diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index e5463b66616..187e9d6ba66 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -268,7 +268,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); break; case PKT3_SURFACE_SYNC: - if (ib->chip_class >= CIK) { + if (ib->chip_class >= GFX7) { ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index e46424dd885..db7f9e47ce1 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -78,7 +78,7 @@ static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info) case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16: return 16; default: - fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n"); + fprintf(stderr, "Invalid GFX7 pipe configuration, assuming P2\n"); assert(!"this should never occur"); return 2; } @@ -323,11 +323,11 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, if (info->family >= CHIP_VEGA10) info->chip_class = GFX9; else if (info->family >= CHIP_TONGA) - info->chip_class = VI; + info->chip_class = GFX8; else if (info->family >= CHIP_BONAIRE) - info->chip_class = CIK; + info->chip_class = GFX7; else if (info->family >= CHIP_TAHITI) - info->chip_class = SI; + info->chip_class = GFX6; else { fprintf(stderr, "amdgpu: Unknown family.\n"); return false; @@ -382,18 +382,18 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, info->has_gpu_reset_counter_query = false; info->has_eqaa_surface_allocator = true; info->has_format_bc1_through_bc7 = true; - /* DRM 3.1.0 doesn't flush TC for VI correctly. */ - info->kernel_flushes_tc_l2_after_ib = info->chip_class != VI || + /* DRM 3.1.0 doesn't flush TC for GFX8 correctly. */ + info->kernel_flushes_tc_l2_after_ib = info->chip_class != GFX8 || info->drm_minor >= 2; info->has_indirect_compute_dispatch = true; - /* SI doesn't support unaligned loads. */ - info->has_unaligned_shader_loads = info->chip_class != SI; - /* Disable sparse mappings on SI due to VM faults in CP DMA. Enable them once + /* GFX6 doesn't support unaligned loads. */ + info->has_unaligned_shader_loads = info->chip_class != GFX6; + /* Disable sparse mappings on GFX6 due to VM faults in CP DMA. Enable them once * these faults are mitigated in software. * Disable sparse mappings on GFX9 due to hangs. */ info->has_sparse_vm_mappings = - info->chip_class >= CIK && info->chip_class <= VI && + info->chip_class >= GFX7 && info->chip_class <= GFX8 && info->drm_minor >= 13; info->has_2d_tiling = true; info->has_read_registers_query = true; @@ -446,7 +446,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev, info->pte_fragment_size = alignment_info.size_local; info->gart_page_size = alignment_info.size_remote; - if (info->chip_class == SI) + if (info->chip_class == GFX6) info->gfx_ib_pad_with_type2 = TRUE; unsigned ib_align = 0; @@ -791,7 +791,7 @@ ac_get_harvested_configs(struct radeon_info *info, assert(rb_per_pkr == 1 || rb_per_pkr == 2); - if (info->chip_class >= CIK) { + if (info->chip_class >= GFX7) { unsigned raster_config_1 = *cik_raster_config_1_p; if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || (!se_mask[2] && !se_mask[3]))) { diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 946c2df82d0..11fb77eee87 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -183,7 +183,7 @@ static inline unsigned ac_get_max_simd_waves(enum radeon_family family) static inline uint32_t ac_get_num_physical_sgprs(enum chip_class chip_class) { - return chip_class >= VI ? 800 : 512; + return chip_class >= GFX8 ? 800 : 512; } #ifdef __cplusplus diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 58dcd2e863d..3ad9bb34805 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -826,14 +826,14 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx, * helper invocation which happens to fall on a different * layer due to extrapolation." * - * VI and earlier attempt to implement this in hardware by + * GFX8 and earlier attempt to implement this in hardware by * clamping the value of coords[2] = (8 * layer) + face. * Unfortunately, this means that the we end up with the wrong * face when clamping occurs. * * Clamp the layer earlier to work around the issue. */ - if (ctx->chip_class <= VI) { + if (ctx->chip_class <= GFX8) { LLVMValueRef ge0; ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, ""); tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, ""); @@ -1392,7 +1392,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx, offset = LLVMBuildAdd(ctx->builder, offset, soffset, ""); if (allow_smem && !slc && - (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= VI))) { + (!glc || (HAVE_LLVM >= 0x0800 && ctx->chip_class >= GFX8))) { assert(vindex == NULL); LLVMValueRef result[8]; @@ -1783,7 +1783,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx, } int log_recombine = 0; - if (ctx->chip_class == SI && !known_aligned) { + if (ctx->chip_class == GFX6 && !known_aligned) { /* Avoid alignment restrictions by loading one byte at a time. */ load_num_channels <<= load_log_size; log_recombine = load_log_size; @@ -1819,7 +1819,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx, } if (log_recombine > 0) { - /* Recombine bytes if necessary (SI only) */ + /* Recombine bytes if necessary (GFX6 only) */ LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16; for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) { @@ -2212,7 +2212,7 @@ ac_get_thread_id(struct ac_llvm_context *ctx) } /* - * SI implements derivatives using the local data store (LDS) + * AMD GCN implements derivatives using the local data store (LDS) * All writes to the LDS happen in all executing threads at * the same time. TID is the Thread ID for the current * thread and is a value between 0 and 63, representing @@ -3304,7 +3304,7 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx) void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx) { - unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768; + unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768; ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0, LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds"); @@ -4034,7 +4034,7 @@ ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, * \param maxprefix specifies that the result only needs to be correct for a * prefix of this many threads * - * TODO: add inclusive and excluse scan functions for SI chip class. + * TODO: add inclusive and excluse scan functions for GFX6. */ static LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity, @@ -4142,28 +4142,28 @@ ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsign result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 4) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI) + if (ctx->chip_class >= GFX8) swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false); else swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04)); result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 8) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI) + if (ctx->chip_class >= GFX8) swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false); else swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08)); result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 16) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI && cluster_size != 32) + if (ctx->chip_class >= GFX8 && cluster_size != 32) swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false); else swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10)); result = ac_build_alu_op(ctx, result, swap, op); if (cluster_size == 32) return ac_build_wwm(ctx, result); - if (ctx->chip_class >= VI) { + if (ctx->chip_class >= GFX8) { swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false); result = ac_build_alu_op(ctx, result, swap, op); result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0)); @@ -4350,7 +4350,7 @@ ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3) { unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3); - if (ctx->chip_class >= VI) { + if (ctx->chip_class >= GFX8) { return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false); } else { return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 682645e9b1f..53c4ff7d383 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -112,7 +112,7 @@ get_ac_image_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim sdim, enum ac_image_dim dim = get_ac_sampler_dim(ctx, sdim, is_array); if (dim == ac_image_cube || - (ctx->chip_class <= VI && dim == ac_image_3d)) + (ctx->chip_class <= GFX8 && dim == ac_image_3d)) dim = ac_image_2darray; return dim; @@ -371,7 +371,7 @@ static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, src0 = ac_to_float(ctx, src0); result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); - if (ctx->chip_class >= VI) { + if (ctx->chip_class >= GFX8) { LLVMValueRef args[2]; /* Check if the result is a denormal - and flush to 0 if so. */ args[0] = result; @@ -382,10 +382,10 @@ static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, /* need to convert back up to f32 */ result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); - if (ctx->chip_class >= VI) + if (ctx->chip_class >= GFX8) result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); else { - /* for SI/CIK */ + /* for GFX6-GFX7 */ /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, * so compare the result and flush to 0 if it's smaller. */ @@ -1169,9 +1169,9 @@ get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_ele LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), ""); - /* VI only */ - if (ctx->ac.chip_class == VI && in_elements) { - /* On VI, the descriptor contains the size in bytes, + /* GFX8 only */ + if (ctx->ac.chip_class == GFX8 && in_elements) { + /* On GFX8, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ @@ -1376,7 +1376,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, break; } - if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { + if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) { nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr); nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr); const struct glsl_type *type = glsl_without_array(var->type); @@ -1535,11 +1535,11 @@ static unsigned get_cache_policy(struct ac_nir_context *ctx, { unsigned cache_policy = 0; - /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. All + /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All * store opcodes not aligned to a dword are affected. The only way to * get unaligned stores is through shader images. */ - if (((may_store_unaligned && ctx->ac.chip_class == SI) || + if (((may_store_unaligned && ctx->ac.chip_class == GFX6) || /* If this is write-only, don't keep data in L1 to prevent * evicting L1 cache lines that may be needed by other * instructions. @@ -2773,11 +2773,11 @@ static void emit_membar(struct ac_llvm_context *ac, void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) { - /* SI only (thanks to a hw bug workaround): + /* GFX6 only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) { + if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) { ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); return; } @@ -3557,13 +3557,13 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * - * SI-CI: + * GFX6-GFX7: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * - * VI: + * GFX8: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, @@ -3572,7 +3572,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef img7, samp0; - if (ctx->ac.chip_class >= VI) + if (ctx->ac.chip_class >= GFX8) return samp; img7 = LLVMBuildExtractElement(builder, res, @@ -3756,7 +3756,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) * It's unnecessary if the original texture format was * Z32_FLOAT, but we don't know that here. */ - if (args.compare && ctx->ac.chip_class >= VI && ctx->abi->clamp_shadow_reference) + if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->abi->clamp_shadow_reference) args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare)); /* pack derivatives */ @@ -4396,7 +4396,7 @@ ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ - bool llvm_has_working_vgpr_indexing = chip_class <= VI; + bool llvm_has_working_vgpr_indexing = chip_class <= GFX8; /* TODO: Indirect indexing of GS inputs is unimplemented. * diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h index 8debb1ff986..2051f22d29b 100644 --- a/src/amd/common/ac_shader_abi.h +++ b/src/amd/common/ac_shader_abi.h @@ -196,7 +196,7 @@ struct ac_shader_abi { LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi); - /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently + /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently * uses it due to promoting D16 to D32, but radv needs it off. */ bool clamp_shadow_reference; diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 531395f4f62..64152081737 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -104,7 +104,7 @@ ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class) return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | S_028A40_CUT_MODE(cut_mode)| - S_028A40_ES_WRITE_OPTIMIZE(chip_class <= VI) | + S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) | S_028A40_GS_WRITE_OPTIMIZE(1) | S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0); } @@ -167,9 +167,9 @@ ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, } } - /* SI (except OLAND and HAINAN) has a bug that it only looks + /* GFX6 (except OLAND and HAINAN) has a bug that it only looks * at the X writemask component. */ - if (ctx->chip_class == SI && + if (ctx->chip_class == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN) mask |= 0x1; diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c index f9dd4f5d77d..a9433b9696c 100644 --- a/src/amd/common/ac_surface.c +++ b/src/amd/common/ac_surface.c @@ -452,7 +452,7 @@ static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, { uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; - if (info->chip_class >= CIK) + if (info->chip_class >= GFX7) surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); else surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); @@ -526,8 +526,8 @@ static int gfx6_surface_settings(ADDR_HANDLE addrlib, } /* Compute tile swizzle. */ - /* TODO: fix tile swizzle with mipmapping for SI */ - if ((info->chip_class >= CIK || config->info.levels == 1) && + /* TODO: fix tile swizzle with mipmapping for GFX6 */ + if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index && surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && @@ -567,7 +567,7 @@ void ac_compute_cmask(const struct radeon_info *info, if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) return; - assert(info->chip_class <= VI); + assert(info->chip_class <= GFX8); switch (num_pipes) { case 2: @@ -732,7 +732,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, * driver team). */ AddrSurfInfoIn.flags.dccCompatible = - info->chip_class >= VI && + info->chip_class >= GFX8 && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed && @@ -742,7 +742,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER); - /* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit) + /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit) * for Z and stencil. This can cause a number of problems which we work * around here: * @@ -799,7 +799,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); - if (info->chip_class == SI) { + if (info->chip_class == GFX6) { if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { if (surf->bpe == 2) AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ @@ -816,7 +816,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib, AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ } } else { - /* CIK - VI */ + /* GFX7 - GFX8 */ if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ else diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h index 10d25e23d32..08aac94d3a9 100644 --- a/src/amd/common/ac_surface.h +++ b/src/amd/common/ac_surface.h @@ -221,7 +221,7 @@ struct radeon_surf { uint32_t cmask_alignment; union { - /* R600-VI return values. + /* Return values for GFX8 and older. * * Some of them can be set by the caller if certain parameters are * desirable. The allocator will try to obey them. diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h index 82eff1a492f..ee30dc74b93 100644 --- a/src/amd/common/amd_family.h +++ b/src/amd/common/amd_family.h @@ -110,9 +110,9 @@ enum chip_class { R700, EVERGREEN, CAYMAN, - SI, /* GFX6 */ - CIK, /* GFX7 */ - VI, /* GFX8 */ + GFX6, + GFX7, + GFX8, GFX9, }; diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 6d1f3fc7d5a..ec1fcf4fd64 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -215,7 +215,7 @@ radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) { return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; } enum ring_type radv_queue_family_to_ring(int f) { @@ -1041,7 +1041,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout layout) { - bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; + bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8; struct radv_color_buffer_info *cb = &att->cb; uint32_t cb_color_info = cb->cb_color_info; @@ -1629,8 +1629,8 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height)); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) { - uint8_t watermark = 4; /* Default value for VI. */ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) { + uint8_t watermark = 4; /* Default value for GFX8. */ /* For optimal DCC performance. */ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { @@ -1691,7 +1691,7 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) uint32_t db_count_control; if(!cmd_buffer->state.active_occlusion_queries) { - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) { @@ -1710,7 +1710,7 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) const struct radv_subpass *subpass = cmd_buffer->state.subpass; uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0; - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { db_count_control = S_028004_PERFECT_ZPASS_COUNTS(has_perfect_queries) | S_028004_SAMPLE_RATE(sample_rate) | @@ -2019,7 +2019,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, va += offset + buffer->offset; desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride) + if (cmd_buffer->device->physical_device->rad_info.chip_class <= GFX7 && stride) desc[2] = (buffer->size - offset - velems->format_size[i]) / stride + 1; else desc[2] = buffer->size - offset; @@ -2106,7 +2106,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) /* Set the descriptor. * - * On VI, the format must be non-INVALID, otherwise + * On GFX8, the format must be non-INVALID, otherwise * the buffer will be considered not bound and store * instructions will be no-ops. */ @@ -2211,7 +2211,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, radeon_set_uconfig_reg_idx(cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); - } else if (info->chip_class >= CIK) { + } else if (info->chip_class >= GFX7) { radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); @@ -2948,7 +2948,7 @@ VkResult radv_EndCommandBuffer( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { - if (cmd_buffer->device->physical_device->rad_info.chip_class == SI) + if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6) cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; si_emit_cache_flush(cmd_buffer); } @@ -3824,11 +3824,11 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) radv_emit_index_buffer(cmd_buffer); } else { - /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, + /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, * so the state must be re-emitted before the next indexed * draw. */ - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { cmd_buffer->state.last_index_type = -1; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; } @@ -3849,7 +3849,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; bool has_prefetch = - cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline; @@ -3859,7 +3859,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->cs, 4096); if (likely(!info->indirect)) { - /* SI-CI treat instance_count==0 as instance_count==1. There is + /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip * direct draws. */ @@ -4237,7 +4237,7 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, { struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline; bool has_prefetch = - cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; + cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7; bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline; @@ -5043,7 +5043,7 @@ static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) unsigned reg_strmout_cntl; /* The register is at different places on different ASICs. */ - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); } else { @@ -5084,7 +5084,7 @@ void radv_CmdBeginTransformFeedbackEXT( if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) counter_buffer_idx = -1; - /* SI binds streamout buffers as shader resources. + /* AMD GCN binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader through * SGPRs what to do. */ diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index 4854b094ba7..432e65b1475 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -131,7 +131,7 @@ radv_dump_debug_registers(struct radv_device *device, FILE *f) radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); - if (info->chip_class <= VI) { + if (info->chip_class <= GFX8) { radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 4b64f5101ed..c0e317a97e5 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -220,11 +220,11 @@ radv_handle_env_var_force_family(struct radv_physical_device *device) if (i >= CHIP_VEGA10) device->rad_info.chip_class = GFX9; else if (i >= CHIP_TONGA) - device->rad_info.chip_class = VI; + device->rad_info.chip_class = GFX8; else if (i >= CHIP_BONAIRE) - device->rad_info.chip_class = CIK; + device->rad_info.chip_class = GFX7; else - device->rad_info.chip_class = SI; + device->rad_info.chip_class = GFX6; return; } @@ -332,7 +332,7 @@ radv_physical_device_init(struct radv_physical_device *device, disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags); - if (device->rad_info.chip_class < VI || + if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX9) fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); @@ -349,18 +349,18 @@ radv_physical_device_init(struct radv_physical_device *device, } /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs - * on SI. + * on GFX6. */ - device->has_clear_state = device->rad_info.chip_class >= CIK; + device->has_clear_state = device->rad_info.chip_class >= GFX7; - device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= VI; + device->cpdma_prefetch_writes_memory = device->rad_info.chip_class <= GFX8; /* Vega10/Raven need a special workaround for a hardware bug. */ device->has_scissor_bug = device->rad_info.family == CHIP_VEGA10 || device->rad_info.family == CHIP_RAVEN; /* Out-of-order primitive rasterization. */ - device->has_out_of_order_rast = device->rad_info.chip_class >= VI && + device->has_out_of_order_rast = device->rad_info.chip_class >= GFX8 && device->rad_info.max_se >= 2; device->out_of_order_rast_allowed = device->has_out_of_order_rast && !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER); @@ -368,9 +368,9 @@ radv_physical_device_init(struct radv_physical_device *device, device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA); - /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */ + /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */ device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 || - (device->rad_info.chip_class >= VI && + (device->rad_info.chip_class >= GFX8 && device->rad_info.me_fw_feature >= 41); radv_physical_device_init_mem_types(device); @@ -769,7 +769,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderTessellationAndGeometryPointSize = true, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = true, - .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= VI, + .shaderStorageImageMultisample = pdevice->rad_info.chip_class >= GFX8, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, .shaderStorageBufferArrayDynamicIndexing = true, @@ -822,7 +822,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { VkPhysicalDevice16BitStorageFeatures *features = (VkPhysicalDevice16BitStorageFeatures*)ext; - bool enabled = pdevice->rad_info.chip_class >= VI; + bool enabled = pdevice->rad_info.chip_class >= GFX8; features->storageBuffer16BitAccess = enabled; features->uniformAndStorageBuffer16BitAccess = enabled; features->storagePushConstant16 = enabled; @@ -884,7 +884,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: { VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features = (VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext; - features->scalarBlockLayout = pdevice->rad_info.chip_class >= CIK; + features->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7; break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: { @@ -916,7 +916,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: { VkPhysicalDevice8BitStorageFeaturesKHR *features = (VkPhysicalDevice8BitStorageFeaturesKHR*)ext; - bool enabled = pdevice->rad_info.chip_class >= VI; + bool enabled = pdevice->rad_info.chip_class >= GFX8; features->storageBuffer8BitAccess = enabled; features->uniformAndStorageBuffer8BitAccess = enabled; features->storagePushConstant8 = enabled; @@ -925,7 +925,7 @@ void radv_GetPhysicalDeviceFeatures2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: { VkPhysicalDeviceFloat16Int8FeaturesKHR *features = (VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext; - features->shaderFloat16 = pdevice->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800; + features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800; features->shaderInt8 = true; break; } @@ -1087,7 +1087,7 @@ void radv_GetPhysicalDeviceProperties( .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, .sampledImageDepthSampleCounts = sample_counts, .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = pdevice->rad_info.chip_class >= VI ? sample_counts : VK_SAMPLE_COUNT_1_BIT, + .storageImageSampleCounts = pdevice->rad_info.chip_class >= GFX8 ? sample_counts : VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, .timestampComputeAndGraphics = true, .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq, @@ -1176,7 +1176,7 @@ void radv_GetPhysicalDeviceProperties2( VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT; - if (pdevice->rad_info.chip_class >= VI) { + if (pdevice->rad_info.chip_class >= GFX8) { properties->supportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | @@ -1228,12 +1228,12 @@ void radv_GetPhysicalDeviceProperties2( properties->sgprsPerSimd = ac_get_num_physical_sgprs(pdevice->rad_info.chip_class); properties->minSgprAllocation = - pdevice->rad_info.chip_class >= VI ? 16 : 8; + pdevice->rad_info.chip_class >= GFX8 ? 16 : 8; properties->maxSgprAllocation = pdevice->rad_info.family == CHIP_TONGA || pdevice->rad_info.family == CHIP_ICELAND ? 96 : 104; properties->sgprAllocationGranularity = - pdevice->rad_info.chip_class >= VI ? 16 : 8; + pdevice->rad_info.chip_class >= GFX8 ? 16 : 8; /* VGPR. */ properties->vgprsPerSimd = RADV_NUM_PHYSICAL_VGPRS; @@ -1868,7 +1868,7 @@ VkResult radv_CreateDevice( device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. */ @@ -1880,7 +1880,7 @@ VkResult radv_CreateDevice( device->tess_offchip_block_dw_size = device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192; device->has_distributed_tess = - device->physical_device->rad_info.chip_class >= VI && + device->physical_device->rad_info.chip_class >= GFX8 && device->physical_device->rad_info.max_se >= 2; if (getenv("RADV_TRACE_FILE")) { @@ -1923,7 +1923,7 @@ VkResult radv_CreateDevice( device->ws->cs_finalize(device->empty_cs[family]); } - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->physical_device->rad_info.chip_class >= GFX7) cik_create_gfx_config(device); VkPipelineCacheCreateInfo ci; @@ -2223,7 +2223,7 @@ fill_geom_tess_rings(struct radv_queue *queue, static unsigned radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p) { - bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK && + bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 && device->physical_device->rad_info.family != CHIP_CARRIZO && device->physical_device->rad_info.family != CHIP_STONEY; unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; @@ -2234,7 +2234,7 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff /* * Per RadeonSI: * This must be one less than the maximum number due to a hw limitation. - * Various hardware bugs in SI, CIK, and GFX9 need this. + * Various hardware bugs need thGFX7 * * Per AMDVLK: * Vega10 should limit max_offchip_buffers to 508 (4 * 127). @@ -2244,8 +2244,8 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff * Follow AMDVLK here. */ if (device->physical_device->rad_info.family == CHIP_VEGA10 || - device->physical_device->rad_info.chip_class == CIK || - device->physical_device->rad_info.chip_class == SI) + device->physical_device->rad_info.chip_class == GFX7 || + device->physical_device->rad_info.chip_class == GFX6) --max_offchip_buffers_per_se; max_offchip_buffers = max_offchip_buffers_per_se * @@ -2263,11 +2263,11 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff } switch (device->physical_device->rad_info.chip_class) { - case SI: + case GFX6: max_offchip_buffers = MIN2(max_offchip_buffers, 126); break; - case CIK: - case VI: + case GFX7: + case GFX8: case GFX9: default: max_offchip_buffers = MIN2(max_offchip_buffers, 508); @@ -2275,8 +2275,8 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff } *max_offchip_buffers_p = max_offchip_buffers; - if (device->physical_device->rad_info.chip_class >= CIK) { - if (device->physical_device->rad_info.chip_class >= VI) + if (device->physical_device->rad_info.chip_class >= GFX7) { + if (device->physical_device->rad_info.chip_class >= GFX8) --max_offchip_buffers; hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | @@ -2304,7 +2304,7 @@ radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs, if (gsvs_ring_bo) radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo); - if (queue->device->physical_device->rad_info.chip_class >= CIK) { + if (queue->device->physical_device->rad_info.chip_class >= GFX7) { radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); radeon_emit(cs, esgs_ring_size >> 8); radeon_emit(cs, gsvs_ring_size >> 8); @@ -2329,7 +2329,7 @@ radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs, radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo); - if (queue->device->physical_device->rad_info.chip_class >= CIK) { + if (queue->device->physical_device->rad_info.chip_class >= GFX7) { radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4)); radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, @@ -2649,7 +2649,7 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->device->physical_device->rad_info.chip_class, NULL, 0, queue->queue_family_index == RING_COMPUTE && - queue->device->physical_device->rad_info.chip_class >= CIK, + queue->device->physical_device->rad_info.chip_class >= GFX7, (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | @@ -2661,7 +2661,7 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->device->physical_device->rad_info.chip_class, NULL, 0, queue->queue_family_index == RING_COMPUTE && - queue->device->physical_device->rad_info.chip_class >= CIK, + queue->device->physical_device->rad_info.chip_class >= GFX7, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SMEM_L1 | RADV_CMD_FLAG_INV_VMEM_L1 | @@ -4274,13 +4274,13 @@ radv_initialise_color_surface(struct radv_device *device, cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); if (radv_image_has_fmask(iview->image)) { - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->physical_device->rad_info.chip_class >= GFX7) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ - if (device->physical_device->rad_info.chip_class >= CIK) + if (device->physical_device->rad_info.chip_class >= GFX7) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); @@ -4360,7 +4360,7 @@ radv_initialise_color_surface(struct radv_device *device, S_028C70_ENDIAN(endian); if (radv_image_has_fmask(iview->image)) { cb->cb_color_info |= S_028C70_COMPRESSION(1); - if (device->physical_device->rad_info.chip_class == SI) { + if (device->physical_device->rad_info.chip_class == GFX6) { unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); } @@ -4377,7 +4377,7 @@ radv_initialise_color_surface(struct radv_device *device, /* This must be set for fast clear to work without FMASK. */ if (!radv_image_has_fmask(iview->image) && - device->physical_device->rad_info.chip_class == SI) { + device->physical_device->rad_info.chip_class == GFX6) { unsigned bankh = util_logbase2(surf->u.legacy.bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); } @@ -4548,7 +4548,7 @@ radv_initialise_ds_surface(struct radv_device *device, if (iview->image->info.samples > 1) ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples)); - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { struct radeon_info *info = &device->physical_device->rad_info; unsigned tiling_index = surf->u.legacy.tiling_index[level]; unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level]; @@ -4807,7 +4807,7 @@ radv_init_sampler(struct radv_device *device, { uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); - bool is_vi = (device->physical_device->rad_info.chip_class >= VI); + bool is_vi = (device->physical_device->rad_info.chip_class >= GFX8); unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND; const struct VkSamplerReductionModeCreateInfoEXT *sampler_reduction = @@ -4835,7 +4835,7 @@ radv_init_sampler(struct radv_device *device, S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) | S_008F38_MIP_POINT_PRECLAMP(0) | - S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= VI) | + S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) | S_008F38_FILTER_PREC_FIX(1) | S_008F38_ANISO_OVERRIDE(is_vi)); sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) | diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py index 576a21f4ca5..0b5af56a435 100644 --- a/src/amd/vulkan/radv_extensions.py +++ b/src/amd/vulkan/radv_extensions.py @@ -96,7 +96,7 @@ EXTENSIONS = [ Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'), Extension('VK_KHR_multiview', 1, True), Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'), - Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= VI'), + Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8'), Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'), Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'), Extension('VK_EXT_buffer_device_address', 1, True), @@ -119,8 +119,8 @@ EXTENSIONS = [ Extension('VK_EXT_memory_priority', 1, True), Extension('VK_EXT_pci_bus_info', 2, True), Extension('VK_EXT_pipeline_creation_feedback', 1, True), - Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'), - Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= CIK'), + Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= GFX7'), + Extension('VK_EXT_scalar_block_layout', 1, 'device->rad_info.chip_class >= GFX7'), Extension('VK_EXT_shader_viewport_index_layer', 1, True), Extension('VK_EXT_shader_stencil_export', 1, True), Extension('VK_EXT_transform_feedback', 1, True), @@ -128,15 +128,15 @@ EXTENSIONS = [ Extension('VK_EXT_ycbcr_image_arrays', 1, True), Extension('VK_AMD_draw_indirect_count', 1, True), Extension('VK_AMD_gcn_shader', 1, True), - Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= VI && HAVE_LLVM >= 0x0800'), - Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= VI'), + Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX8 && HAVE_LLVM >= 0x0800'), + Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= GFX8'), Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'), Extension('VK_AMD_shader_core_properties', 1, True), Extension('VK_AMD_shader_info', 1, True), Extension('VK_AMD_shader_trinary_minmax', 1, True), Extension('VK_GOOGLE_decorate_string', 1, True), Extension('VK_GOOGLE_hlsl_functionality1', 1, True), - Extension('VK_NV_compute_shader_derivatives', 1, 'device->rad_info.chip_class >= VI'), + Extension('VK_NV_compute_shader_derivatives', 1, 'device->rad_info.chip_class >= GFX8'), ] class VkVersion: diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index 9883002fa42..d7b560082f6 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -761,7 +761,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical case VK_FORMAT_A2B10G10R10_SSCALED_PACK32: case VK_FORMAT_A2R10G10B10_SINT_PACK32: case VK_FORMAT_A2B10G10R10_SINT_PACK32: - if (physical_device->rad_info.chip_class <= VI && + if (physical_device->rad_info.chip_class <= GFX8 && physical_device->rad_info.family != CHIP_STONEY) { buffer &= ~(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT); diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 3ffb4e95749..161997ae196 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -47,7 +47,7 @@ radv_choose_tiling(struct radv_device *device, if (!vk_format_is_compressed(pCreateInfo->format) && !vk_format_is_depth_or_stencil(pCreateInfo->format) - && device->physical_device->rad_info.chip_class <= VI) { + && device->physical_device->rad_info.chip_class <= GFX8) { /* this causes hangs in some VK CTS tests on GFX9. */ /* Textures with a very small height are recommended to be linear. */ if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || @@ -69,7 +69,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo) { /* TC-compat HTILE is only available for GFX8+. */ - if (device->physical_device->rad_info.chip_class < VI) + if (device->physical_device->rad_info.chip_class < GFX8) return false; if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) || @@ -130,7 +130,7 @@ radv_use_dcc_for_image(struct radv_device *device, bool blendable; /* DCC (Delta Color Compression) is only available for GFX8+. */ - if (device->physical_device->rad_info.chip_class < VI) + if (device->physical_device->rad_info.chip_class < GFX8) return false; if (device->instance->debug_flags & RADV_DEBUG_NO_DCC) @@ -328,7 +328,7 @@ radv_make_buffer_descriptor(struct radv_device *device, state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - if (device->physical_device->rad_info.chip_class != VI && stride) { + if (device->physical_device->rad_info.chip_class != GFX8 && stride) { range /= stride; } @@ -370,12 +370,12 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, state[1] &= C_008F14_BASE_ADDRESS_HI; state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40); - if (chip_class >= VI) { + if (chip_class >= GFX8) { state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; if (!is_storage_image && radv_dcc_enabled(image, first_level)) { meta_va = gpu_address + image->dcc_offset; - if (chip_class <= VI) + if (chip_class <= GFX8) meta_va += base_level_info->dcc_offset; } else if (!is_storage_image && radv_image_is_tc_compat_htile(image)) { @@ -417,7 +417,7 @@ si_set_mutable_tex_desc_fields(struct radv_device *device, S_008F24_META_RB_ALIGNED(meta.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ unsigned pitch = base_level_info->nblk_x * block_width; unsigned index = si_tile_mode_index(plane, base_level, is_stencil); @@ -596,7 +596,7 @@ si_make_texture_descriptor(struct radv_device *device, /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) { + if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -725,7 +725,7 @@ radv_query_opaque_metadata(struct radv_device *device, memcpy(&md->metadata[2], desc, sizeof(desc)); /* Dwords [10:..] contain the mipmap level offsets. */ - if (device->physical_device->rad_info.chip_class <= VI) { + if (device->physical_device->rad_info.chip_class <= GFX8) { for (i = 0; i <= image->info.levels - 1; i++) md->metadata[10+i] = image->planes[0].surface.u.legacy.level[i].offset >> 8; md->size_metadata = (11 + image->info.levels - 1) * 4; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index e8be058d3f7..341f6388f32 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -262,7 +262,7 @@ get_tcs_num_patches(struct radv_shader_context *ctx) * * Test: dEQP-VK.tessellation.shader_input_output.barrier */ - if (ctx->options->chip_class >= CIK && ctx->options->family != CHIP_STONEY) + if (ctx->options->chip_class >= GFX7 && ctx->options->family != CHIP_STONEY) hardware_lds_size = 65536; num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); @@ -273,8 +273,8 @@ get_tcs_num_patches(struct radv_shader_context *ctx) */ num_patches = MIN2(num_patches, 40); - /* SI bug workaround - limit LS-HS threadgroups to only one wave. */ - if (ctx->options->chip_class == SI) { + /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */ + if (ctx->options->chip_class == GFX6) { unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); num_patches = MIN2(num_patches, one_wave); } @@ -3276,7 +3276,7 @@ write_tess_factors(struct radv_shader_context *ctx) LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); unsigned tf_offset = 0; - if (ctx->options->chip_class <= VI) { + if (ctx->options->chip_class <= GFX8) { ac_nir_build_if(&inner_if_ctx, ctx, LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, "")); @@ -3518,7 +3518,7 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) static void ac_setup_rings(struct radv_shader_context *ctx) { - if (ctx->options->chip_class <= VI && + if (ctx->options->chip_class <= GFX8 && (ctx->stage == MESA_SHADER_GEOMETRY || ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) { unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS @@ -3568,7 +3568,7 @@ ac_setup_rings(struct radv_shader_context *ctx) stride = 4 * num_components * ctx->gs_max_out_vertices; - /* Limit on the stride field for <= CIK. */ + /* Limit on the stride field for <= GFX7. */ assert(stride < (1 << 14)); ring = LLVMBuildBitCast(ctx->ac.builder, @@ -3616,7 +3616,7 @@ radv_nir_get_max_workgroup_size(enum chip_class chip_class, { switch (nir->info.stage) { case MESA_SHADER_TESS_CTRL: - return chip_class >= CIK ? 128 : 64; + return chip_class >= GFX7 ? 128 : 64; case MESA_SHADER_GEOMETRY: return chip_class >= GFX9 ? 128 : 64; case MESA_SHADER_COMPUTE: @@ -3961,7 +3961,7 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, * - Floating-point output modifiers would be ignored by the hw. * - Some opcodes don't support denormals, such as v_mad_f32. We would * have to stop using those. - * - SI & CI would be very slow. + * - GFX6 & GFX7 would be very slow. */ config->float_mode |= V_00B028_FP_64_DENORMS; } diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index f25a5f55bf5..c89a6f139ba 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -1558,11 +1558,11 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_sta unsigned num_se = device->physical_device->rad_info.max_se; unsigned wave_size = 64; unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ - /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16. - * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). + /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16. + * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). */ unsigned gs_vertex_reuse = - (device->physical_device->rad_info.chip_class >= VI ? 32 : 16) * num_se; + (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se; unsigned alignment = 256 * num_se; /* The maximum size is 63.999 MB per SE. */ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; @@ -1581,7 +1581,7 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline, const struct radv_gs_sta esgs_ring_size = align(esgs_ring_size, alignment); gsvs_ring_size = align(gsvs_ring_size, alignment); - if (pipeline->device->physical_device->rad_info.chip_class <= VI) + if (pipeline->device->physical_device->rad_info.chip_class <= GFX8) pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size); pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size); @@ -1643,7 +1643,7 @@ calculate_tess_state(struct radv_pipeline *pipeline, lds_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.lds_size; - if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { + if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { assert(lds_size <= 65536); lds_size = align(lds_size, 512) / 512; } else { @@ -1904,7 +1904,7 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline, key.vertex_attribute_offsets[location] = desc->offset; key.vertex_attribute_strides[location] = radv_get_attrib_stride(input_state, desc->binding); - if (pipeline->device->physical_device->rad_info.chip_class <= VI && + if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 && pipeline->device->physical_device->rad_info.family != CHIP_STONEY) { VkFormat format = input_state->pVertexAttributeDescriptions[i].format; uint64_t adjust; @@ -1962,7 +1962,7 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline, } key.col_format = blend->spi_shader_col_format; - if (pipeline->device->physical_device->rad_info.chip_class < VI) + if (pipeline->device->physical_device->rad_info.chip_class < GFX8) radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.is_int8, &key.is_int10); return key; @@ -2918,7 +2918,7 @@ radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs, * if no sample lies on the pixel boundary (-8 sample offset). It's * currently always TRUE because the driver doesn't support 16 samples. */ - bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= CIK; + bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7; radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); @@ -3003,7 +3003,7 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, cull_dist_mask << 8 | clip_dist_mask); - if (pipeline->device->physical_device->rad_info.chip_class <= VI) + if (pipeline->device->physical_device->rad_info.chip_class <= GFX8) radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); } @@ -3036,7 +3036,7 @@ radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, radeon_emit(cs, S_00B524_MEM_BASE(va >> 40)); rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size); - if (pipeline->device->physical_device->rad_info.chip_class == CIK && + if (pipeline->device->physical_device->rad_info.chip_class == GFX7 && pipeline->device->physical_device->rad_info.family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); @@ -3118,7 +3118,7 @@ radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM, tess->tf_param); - if (pipeline->device->physical_device->rad_info.chip_class >= CIK) + if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, tess->ls_hs_config); else @@ -3490,7 +3490,7 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline, radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, radv_compute_vgt_shader_stages_en(pipeline)); - if (pipeline->device->physical_device->rad_info.chip_class >= CIK) { + if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) { radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); } else { radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim); @@ -3522,12 +3522,12 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, /* GS requirement. */ ia_multi_vgt_param.partial_es_wave = false; - if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= VI) + if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8) if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3) ia_multi_vgt_param.partial_es_wave = true; ia_multi_vgt_param.wd_switch_on_eop = false; - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. * The other cases are hardware requirements. */ @@ -3567,7 +3567,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ if (device->has_distributed_tess) { if (radv_pipeline_has_gs(pipeline)) { - if (device->physical_device->rad_info.chip_class <= VI) + if (device->physical_device->rad_info.chip_class <= GFX8) ia_multi_vgt_param.partial_es_wave = true; } else { ia_multi_vgt_param.partial_vs_wave = true; @@ -3609,7 +3609,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline, ia_multi_vgt_param.base = S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) | /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ - S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == VI ? 2 : 0) | + S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) | S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) | S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9); @@ -3885,7 +3885,7 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline) compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - if (device->physical_device->rad_info.chip_class >= CIK) { + if (device->physical_device->rad_info.chip_class >= GFX7) { unsigned num_cu_per_se = device->physical_device->rad_info.num_good_compute_units / device->physical_device->rad_info.max_se; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index aa25e8f9805..a88c0f31ad3 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -703,7 +703,7 @@ struct radv_device { float sample_locations_8x[8][2]; float sample_locations_16x[16][2]; - /* CIK and later */ + /* GFX7 and later */ uint32_t gfx_init_size_dw; struct radeon_winsys_bo *gfx_init; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 17d6c5bc33a..dfa50155c06 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -773,7 +773,7 @@ generate_shader_stats(struct radv_device *device, struct _mesa_string_buffer *buf) { enum chip_class chip_class = device->physical_device->rad_info.chip_class; - unsigned lds_increment = chip_class >= CIK ? 512 : 256; + unsigned lds_increment = chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf; unsigned max_simd_waves; unsigned lds_per_wave = 0; @@ -875,7 +875,7 @@ radv_GetShaderInfoAMD(VkDevice _device, if (!pInfo) { *pInfoSize = sizeof(VkShaderStatisticsInfoAMD); } else { - unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256; + unsigned lds_multiplier = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256; struct ac_shader_config *conf = &variant->config; VkShaderStatisticsInfoAMD statistics = {}; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index e73c13762e5..0f4bdadc3d2 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -25,7 +25,7 @@ * IN THE SOFTWARE. */ -/* command buffer handling for SI */ +/* command buffer handling for AMD GCN */ #include "radv_private.h" #include "radv_shader.h" @@ -51,8 +51,8 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, raster_config_se); for (se = 0; se < num_se; se++) { - /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ - if (physical_device->rad_info.chip_class < CIK) + /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ + if (physical_device->rad_info.chip_class < GFX7) radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) | @@ -64,8 +64,8 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); } - /* GRBM_GFX_INDEX has a different offset on SI and CI+ */ - if (physical_device->rad_info.chip_class < CIK) + /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */ + if (physical_device->rad_info.chip_class < GFX7) radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX, S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) | @@ -75,7 +75,7 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device, S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1)); - if (physical_device->rad_info.chip_class >= CIK) + if (physical_device->rad_info.chip_class >= GFX7) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } @@ -93,7 +93,7 @@ si_emit_compute(struct radv_physical_device *physical_device, radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff)); - if (physical_device->rad_info.chip_class >= CIK) { + if (physical_device->rad_info.chip_class >= GFX7) { /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); @@ -108,7 +108,7 @@ si_emit_compute(struct radv_physical_device *physical_device, * kernel if we want to use something other than the default value, * which is now 0x22f. */ - if (physical_device->rad_info.chip_class <= SI) { + if (physical_device->rad_info.chip_class <= GFX6) { /* XXX: This should be: * (number of compute units) * 4 * (waves per simd) - 1 */ @@ -142,7 +142,7 @@ si_set_raster_config(struct radv_physical_device *physical_device, if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config); - if (physical_device->rad_info.chip_class >= CIK) + if (physical_device->rad_info.chip_class >= GFX7) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { @@ -158,9 +158,9 @@ si_emit_graphics(struct radv_physical_device *physical_device, { int i; - /* Only SI can disable CLEAR_STATE for now. */ + /* Only GFX6 can disable CLEAR_STATE for now. */ assert(physical_device->has_clear_state || - physical_device->rad_info.chip_class == SI); + physical_device->rad_info.chip_class == GFX6); radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); @@ -171,7 +171,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_emit(cs, 0); } - if (physical_device->rad_info.chip_class <= VI) + if (physical_device->rad_info.chip_class <= GFX8) si_set_raster_config(physical_device, cs); radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); @@ -179,7 +179,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - if (physical_device->rad_info.chip_class <= VI) { + if (physical_device->rad_info.chip_class <= GFX8) { radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40); } @@ -193,7 +193,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); if (!physical_device->has_clear_state) radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); - if (physical_device->rad_info.chip_class < CIK) + if (physical_device->rad_info.chip_class < GFX7) radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); @@ -206,7 +206,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ - if (physical_device->rad_info.chip_class <= CIK) { + if (physical_device->rad_info.chip_class <= GFX7) { radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); @@ -229,7 +229,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, if (!physical_device->has_clear_state) { radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); - /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ + /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */ radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); @@ -256,7 +256,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); } - if (physical_device->rad_info.chip_class >= CIK) { + if (physical_device->rad_info.chip_class >= GFX7) { if (physical_device->rad_info.chip_class >= GFX9) { radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); @@ -303,7 +303,7 @@ si_emit_graphics(struct radv_physical_device *physical_device, S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); } - if (physical_device->rad_info.chip_class >= VI) { + if (physical_device->rad_info.chip_class >= GFX8) { uint32_t vgt_tess_distribution; vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | @@ -586,7 +586,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi; partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave; - if (chip_class >= CIK) { + if (chip_class >= GFX7) { wd_switch_on_eop = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.wd_switch_on_eop; /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. @@ -601,19 +601,19 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, * Assume indirect draws always use small instances. * This is needed for good VS wave utilization. */ - if (chip_class <= VI && + if (chip_class <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup) wd_switch_on_eop = true; - /* Required on CIK and later. */ + /* Required on GFX7 and later. */ if (info->max_se > 2 && !wd_switch_on_eop) ia_switch_on_eoi = true; - /* Required by Hawaii and, for some special cases, by VI. */ + /* Required by Hawaii and, for some special cases, by GFX8. */ if (ia_switch_on_eoi && (family == CHIP_HAWAII || - (chip_class == VI && + (chip_class == GFX8 && /* max primgroup in wave is always 2 - leave this for documentation */ (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2)))) partial_vs_wave = true; @@ -633,7 +633,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, assert(wd_switch_on_eop || !ia_switch_on_eop); } /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if (chip_class <= VI && ia_switch_on_eoi) + if (chip_class <= GFX8 && ia_switch_on_eoi) partial_es_wave = true; if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) { @@ -658,7 +658,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | - S_028AA8_WD_SWITCH_ON_EOP(chip_class >= CIK ? wd_switch_on_eop : 0); + S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0); } @@ -704,8 +704,8 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, if (!is_gfx8_mec) radeon_emit(cs, 0); /* unused */ } else { - if (chip_class == CIK || - chip_class == VI) { + if (chip_class == GFX7 || + chip_class == GFX8) { /* Two EOP events are required to make all engines go idle * (and optional cache flushes executed) before the timestamp * is written. @@ -788,7 +788,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (chip_class <= VI) { + if (chip_class <= GFX8) { if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | @@ -801,7 +801,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ - if (chip_class >= VI) { + if (chip_class >= GFX8) { si_cs_emit_write_event_eop(cs, chip_class, is_mec, @@ -911,12 +911,12 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, } if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) || - (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) { + (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) { si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | - S_0301F0_TC_WB_ACTION_ENA(chip_class >= VI)); + S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8)); cp_coher_cntl = 0; } else { if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) { @@ -1099,7 +1099,7 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, else if (flags & CP_DMA_USE_L2) header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); - if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating)); radeon_emit(cs, header); radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ @@ -1281,7 +1281,7 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) { - if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK) + if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7) return; if (!cmd_buffer->state.dma_is_busy) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 70f81119c02..0c521917027 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -1037,7 +1037,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, uint32_t pad_word = 0xffff1000U; bool emit_signal_sem = sem_info->cs_emit_signal; - if (radv_amdgpu_winsys(ws)->info.chip_class == SI) + if (radv_amdgpu_winsys(ws)->info.chip_class == GFX6) pad_word = 0x80000000; assert(cs_count); diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c index 35a585a5693..649a7698069 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c @@ -58,7 +58,7 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd) ws->info.num_sdma_rings = MIN2(ws->info.num_sdma_rings, MAX_RINGS_PER_TYPE); ws->info.num_compute_rings = MIN2(ws->info.num_compute_rings, MAX_RINGS_PER_TYPE); - ws->use_ib_bos = ws->info.chip_class >= CIK; + ws->use_ib_bos = ws->info.chip_class >= GFX7; return true; } diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 27565e0aa0c..497da0c3dfa 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -366,7 +366,7 @@ static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, templ.bind |= new_bind_flag; /* r600g doesn't react to dirty_tex_descriptor_counter */ - if (rctx->chip_class < SI) + if (rctx->chip_class < GFX6) return; if (rtex->resource.b.is_shared) @@ -1264,7 +1264,7 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, const struct pipe_box *box) { /* r600g doesn't react to dirty_tex_descriptor_counter */ - return rscreen->chip_class >= SI && + return rscreen->chip_class >= GFX6 && !rtex->resource.b.is_shared && !(transfer_usage & PIPE_TRANSFER_READ) && rtex->resource.b.b.last_level == 0 && diff --git a/src/gallium/drivers/r600/r600d_common.h b/src/gallium/drivers/r600/r600d_common.h index b06f90f8edd..979f26bc7da 100644 --- a/src/gallium/drivers/r600/r600d_common.h +++ b/src/gallium/drivers/r600/r600d_common.h @@ -85,7 +85,7 @@ #define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1) #define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x)) #define PKT3_SET_SH_REG 0x76 /* SI and later */ -#define PKT3_SET_UCONFIG_REG 0x79 /* CIK and later */ +#define PKT3_SET_UCONFIG_REG 0x79 /* GFX7 and later */ #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS1 0x1 /* EG and later */ #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS2 0x2 /* EG and later */ diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c index da9b25a442d..2728541dd29 100644 --- a/src/gallium/drivers/radeonsi/cik_sdma.c +++ b/src/gallium/drivers/radeonsi/cik_sdma.c @@ -180,12 +180,12 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, copy_width <= (1 << 14) && copy_height <= (1 << 14) && copy_depth <= (1 << 11) && - /* HW limitation - CIK: */ - (sctx->chip_class != CIK || + /* HW limitation - GFX7: */ + (sctx->chip_class != GFX7 || (copy_width < (1 << 14) && copy_height < (1 << 14) && copy_depth < (1 << 11))) && - /* HW limitation - some CIK parts: */ + /* HW limitation - some GFX7 parts: */ ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI) || (srcx + copy_width != (1 << 14) && @@ -207,7 +207,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, dstx | (dsty << 16)); radeon_emit(cs, dstz | ((dst_pitch - 1) << 16)); radeon_emit(cs, dst_slice_pitch - 1); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width | (copy_height << 16)); radeon_emit(cs, copy_depth); } else { @@ -264,7 +264,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, bpp == 16) return false; - if (sctx->chip_class == CIK && + if (sctx->chip_class == GFX7 && (copy_width_aligned == (1 << 14) || copy_height == (1 << 14) || copy_depth == (1 << 11))) @@ -371,7 +371,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, linear_x | (linear_y << 16)); radeon_emit(cs, linear_z | ((linear_pitch - 1) << 16)); radeon_emit(cs, linear_slice_pitch - 1); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width_aligned | (copy_height << 16)); radeon_emit(cs, copy_depth); } else { @@ -394,9 +394,9 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, dsty % 8 == 0 && srcx % 8 == 0 && srcy % 8 == 0 && - /* this can either be equal, or display->rotated (VI+ only) */ + /* this can either be equal, or display->rotated (GFX8+ only) */ (src_micro_mode == dst_micro_mode || - (sctx->chip_class >= VI && + (sctx->chip_class >= GFX8 && src_micro_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING && dst_micro_mode == V_009910_ADDR_SURF_ROTATED_MICRO_TILING))) { assert(src_pitch % 8 == 0); @@ -434,12 +434,12 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, copy_depth <= (1 << 11) && copy_width_aligned % 8 == 0 && copy_height_aligned % 8 == 0 && - /* HW limitation - CIK: */ - (sctx->chip_class != CIK || + /* HW limitation - GFX7: */ + (sctx->chip_class != GFX7 || (copy_width_aligned < (1 << 14) && copy_height_aligned < (1 << 14) && copy_depth < (1 << 11))) && - /* HW limitation - some CIK parts: */ + /* HW limitation - some GFX7 parts: */ ((sctx->family != CHIP_BONAIRE && sctx->family != CHIP_KAVERI && sctx->family != CHIP_KABINI && @@ -465,7 +465,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx, radeon_emit(cs, dstz | (dst_pitch_tile_max << 16)); radeon_emit(cs, dst_slice_tile_max); radeon_emit(cs, encode_tile_info(sctx, sdst, dst_level, false)); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { radeon_emit(cs, copy_width_aligned | (copy_height_aligned << 16)); radeon_emit(cs, copy_depth); @@ -502,7 +502,7 @@ static void cik_sdma_copy(struct pipe_context *ctx, return; } - if ((sctx->chip_class == CIK || sctx->chip_class == VI) && + if ((sctx->chip_class == GFX7 || sctx->chip_class == GFX8) && cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box)) return; diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 9d3d7d3d27a..5806342cca9 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -1151,7 +1151,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx, goto resolve_to_temp; /* This can happen with mipmapping. */ - if (sctx->chip_class == VI && + if (sctx->chip_class == GFX8 && !dst->surface.u.legacy.level[info->dst.level].dcc_fast_clear_size) goto resolve_to_temp; diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c index d294f236914..d0094031a95 100644 --- a/src/gallium/drivers/radeonsi/si_clear.c +++ b/src/gallium/drivers/radeonsi/si_clear.c @@ -313,7 +313,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, assert(!"unexpected micro mode"); return; } - } else if (sscreen->info.chip_class >= CIK) { + } else if (sscreen->info.chip_class >= GFX7) { /* These magic numbers were copied from addrlib. It doesn't use * any definitions for them either. They are all 2D_TILED_THIN1 * modes with different bpp and micro tile mode. @@ -332,7 +332,7 @@ static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, assert(!"unexpected micro mode"); return; } - } else { /* SI */ + } else { /* GFX6 */ switch (tex->last_msaa_resolve_target_micro_mode) { case RADEON_MICRO_MODE_DISPLAY: switch (tex->surface.bpe) { @@ -434,7 +434,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) continue; - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && !sctx->screen->info.htile_cmask_support_1d_tiling) continue; @@ -474,7 +474,7 @@ static void si_do_fast_color_clear(struct si_context *sctx, continue; /* This can happen with mipmapping or MSAA. */ - if (sctx->chip_class == VI && + if (sctx->chip_class == GFX8 && !tex->surface.u.legacy.level[level].dcc_fast_clear_size) continue; diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index f1a433b72df..46a0ba76ed5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -327,7 +327,7 @@ static void si_initialize_compute(struct si_context *sctx) radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff)); radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff)); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); @@ -342,7 +342,7 @@ static void si_initialize_compute(struct si_context *sctx) * kernel if we want to use something other than the default value, * which is now 0x22f. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { /* XXX: This should be: * (number of compute units) * 4 * (waves per simd) - 1 */ @@ -353,7 +353,7 @@ static void si_initialize_compute(struct si_context *sctx) /* Set the pointer to border colors. */ bc_va = sctx->border_color_buffer->gpu_address; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2); radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */ radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */ @@ -434,12 +434,12 @@ static bool si_switch_compute_shader(struct si_context *sctx, } lds_blocks = config->lds_size; - /* XXX: We are over allocating LDS. For SI, the shader reports + /* XXX: We are over allocating LDS. For GFX6, the shader reports * LDS in blocks of 256 bytes, so if there are 4 bytes lds * allocated in the shader and 4 bytes allocated by the state * tracker, then we will set LDS_SIZE to 512 bytes rather than 256. */ - if (sctx->chip_class <= SI) { + if (sctx->chip_class <= GFX6) { lds_blocks += align(program->local_size, 256) >> 8; } else { lds_blocks += align(program->local_size, 512) >> 9; @@ -474,7 +474,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, * command. However, that would add more complexity and we're likely * to get a shader state change in that case anyway. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b, 0, program->shader.bo->b.b.width0); } @@ -539,7 +539,7 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx, } else { scratch_dword3 |= S_008F0C_ELEMENT_SIZE(max_private_element_size); - if (sctx->chip_class < VI) { + if (sctx->chip_class < GFX8) { /* BUF_DATA_FORMAT is ignored, but it cannot be * BUF_DATA_FORMAT_INVALID. */ scratch_dword3 |= @@ -764,7 +764,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { unsigned num_cu_per_se = sscreen->info.num_good_compute_units / sscreen->info.max_se; @@ -777,7 +777,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, compute_resource_limits |= S_00B854_WAVES_PER_SH(sctx->cs_max_waves_per_sh); } else { - /* SI */ + /* GFX6 */ if (sctx->cs_max_waves_per_sh) { unsigned limit_div16 = DIV_ROUND_UP(sctx->cs_max_waves_per_sh, 16); compute_resource_limits |= S_00B854_WAVES_PER_SH_SI(limit_div16); @@ -792,7 +792,7 @@ static void si_emit_dispatch_packets(struct si_context *sctx, S_00B800_FORCE_START_AT_000(1) | /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. (same as Vulkan) */ - S_00B800_ORDER_MODE(sctx->chip_class >= CIK); + S_00B800_ORDER_MODE(sctx->chip_class >= GFX7); const uint *last_block = info->last_block; bool partial_block_en = last_block[0] || last_block[1] || last_block[2]; @@ -861,10 +861,10 @@ static void si_launch_grid( * compute isn't used, i.e. only one compute job can run at a time. * If async compute is possible, the threadgroup size must be limited * to 256 threads on all queues to avoid the bug. - * Only SI and certain CIK chips are affected. + * Only GFX6 and certain GFX7 chips are affected. */ bool cs_regalloc_hang = - (sctx->chip_class == SI || + (sctx->chip_class == GFX6 || sctx->family == CHIP_BONAIRE || sctx->family == CHIP_KABINI) && info->block[0] * info->block[1] * info->block[2] > 256; @@ -894,7 +894,7 @@ static void si_launch_grid( si_context_add_resource_size(sctx, info->indirect); /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && si_resource(info->indirect)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(info->indirect)->TC_L2_dirty = false; diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index fb0d8d2f1b6..1cfdc9b62c6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -36,7 +36,7 @@ static enum si_cache_policy get_cache_policy(struct si_context *sctx, { if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || coher == SI_COHERENCY_CP)) || - (sctx->chip_class >= CIK && coher == SI_COHERENCY_SHADER)) + (sctx->chip_class >= GFX7 && coher == SI_COHERENCY_SHADER)) return size <= 256 * 1024 ? L2_LRU : L2_STREAM; return L2_BYPASS; @@ -254,7 +254,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, (!force_cpdma && clear_value_size == 4 && offset % 4 == 0 && - (size > 32*1024 || sctx->chip_class <= VI))) { + (size > 32*1024 || sctx->chip_class <= GFX8))) { si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0, aligned_size, clear_value, clear_value_size, coher); @@ -418,7 +418,7 @@ void si_compute_copy_image(struct si_context *sctx, ctx->launch_grid(ctx, &info); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | - (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image); @@ -597,7 +597,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx, ctx->launch_grid(ctx, &info); sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | - (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | + (sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image); diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 404117d1813..f5c54ca0d52 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -61,7 +61,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, uint32_t header = 0, command = 0; assert(size <= cp_dma_max_byte_count(sctx)); - assert(sctx->chip_class != SI || cache_policy == L2_BYPASS); + assert(sctx->chip_class != GFX6 || cache_policy == L2_BYPASS); if (sctx->chip_class >= GFX9) command |= S_414_BYTE_COUNT_GFX9(size); @@ -90,7 +90,7 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, /* GDS increments the address, not CP. */ command |= S_414_DAS(V_414_REGISTER) | S_414_DAIC(V_414_NO_INCREMENT); - } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2) | S_500_DST_CACHE_POLICY(cache_policy == L2_STREAM); } @@ -102,12 +102,12 @@ static void si_emit_cp_dma(struct si_context *sctx, struct radeon_cmdbuf *cs, /* Both of these are required for GDS. It does increment the address. */ command |= S_414_SAS(V_414_REGISTER) | S_414_SAIC(V_414_NO_INCREMENT); - } else if (sctx->chip_class >= CIK && cache_policy != L2_BYPASS) { + } else if (sctx->chip_class >= GFX7 && cache_policy != L2_BYPASS) { header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) | S_500_SRC_CACHE_POLICY(cache_policy == L2_STREAM); } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); radeon_emit(cs, header); radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ @@ -412,7 +412,7 @@ void si_cp_dma_copy_buffer(struct si_context *sctx, void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size) { - assert(sctx->chip_class >= CIK); + assert(sctx->chip_class >= GFX7); si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU); @@ -491,7 +491,7 @@ void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only) } } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ /* Choose the right spot for the VBO prefetch. */ if (sctx->tes_shader.cso) { if (mask & SI_PREFETCH_LS) @@ -591,7 +591,7 @@ void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, assert(offset % 4 == 0); assert(size % 4 == 0); - if (sctx->chip_class == SI && dst_sel == V_370_MEM) + if (sctx->chip_class == GFX6 && dst_sel == V_370_MEM) dst_sel = V_370_MEM_GRBM; radeon_add_to_buffer_list(sctx, cs, buf, diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 9a4494a98fe..bd85fc49387 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -314,7 +314,7 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f) si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index f795c33cf26..5b812149754 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -347,7 +347,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, base_level_info->mode == RADEON_SURF_MODE_2D) state[0] |= tex->surface.tile_swizzle; - if (sscreen->info.chip_class >= VI) { + if (sscreen->info.chip_class >= GFX8) { state[6] &= C_008F28_COMPRESSION_EN; state[7] = 0; @@ -355,7 +355,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, meta_va = (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + tex->dcc_offset; - if (sscreen->info.chip_class == VI) { + if (sscreen->info.chip_class == GFX8) { meta_va += base_level_info->dcc_offset; assert(base_level_info->mode == RADEON_SURF_MODE_2D); } @@ -399,7 +399,7 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, S_008F24_META_RB_ALIGNED(meta.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ unsigned pitch = base_level_info->nblk_x * block_width; unsigned index = si_tile_mode_index(tex, base_level, is_stencil); @@ -1141,7 +1141,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) uint64_t va = buf->gpu_address + offset; int64_t num_records = (int64_t)buf->b.b.width0 - offset; - if (sctx->chip_class != VI && vb->stride) { + if (sctx->chip_class != GFX8 && vb->stride) { /* Round up by rounding down and adding 1 */ num_records = (num_records - velems->format_size[i]) / vb->stride + 1; @@ -1210,9 +1210,9 @@ static void si_set_constant_buffer(struct si_context *sctx, assert(slot < descs->num_elements); pipe_resource_reference(&buffers->buffers[slot], NULL); - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy + /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy * with a NULL buffer). We need to use a dummy buffer instead. */ - if (sctx->chip_class == CIK && + if (sctx->chip_class == GFX7 && (!input || (!input->buffer && !input->user_buffer))) input = &sctx->null_const_buf; @@ -1467,7 +1467,7 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot, break; } - if (sctx->chip_class >= VI && stride) + if (sctx->chip_class >= GFX8 && stride) num_records *= stride; /* Set the descriptor. */ diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c index bba1bd95826..8f2e15833b6 100644 --- a/src/gallium/drivers/radeonsi/si_dma_cs.c +++ b/src/gallium/drivers/radeonsi/si_dma_cs.c @@ -30,7 +30,7 @@ static void si_dma_emit_wait_idle(struct si_context *sctx) struct radeon_cmdbuf *cs = sctx->dma_cs; /* NOP waits for idle. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) radeon_emit(cs, 0x00000000); /* NOP */ else radeon_emit(cs, 0xf0000000); /* NOP */ @@ -42,7 +42,7 @@ void si_dma_emit_timestamp(struct si_context *sctx, struct si_resource *dst, struct radeon_cmdbuf *cs = sctx->dma_cs; uint64_t va = dst->gpu_address + offset; - if (sctx->chip_class == SI) { + if (sctx->chip_class == GFX6) { unreachable("SI DMA doesn't support the timestamp packet."); return; } @@ -87,7 +87,7 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, offset += sdst->gpu_address; - if (sctx->chip_class == SI) { + if (sctx->chip_class == GFX6) { /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE); si_need_dma_space(sctx, ncopy * 4, sdst, NULL); @@ -105,7 +105,7 @@ void si_sdma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, return; } - /* The following code is for CI, VI, Vega/Raven, etc. */ + /* The following code is for Sea Islands and later. */ /* the same maximum size as for copying */ ncopy = DIV_ROUND_UP(size, CIK_SDMA_COPY_MAX_SIZE); si_need_dma_space(sctx, ncopy * 5, sdst, NULL); diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c index ffda98d2834..b3212c1db35 100644 --- a/src/gallium/drivers/radeonsi/si_fence.c +++ b/src/gallium/drivers/radeonsi/si_fence.c @@ -115,8 +115,8 @@ void si_cp_release_mem(struct si_context *ctx, radeon_emit(cs, 0); /* immediate data hi */ radeon_emit(cs, 0); /* unused */ } else { - if (ctx->chip_class == CIK || - ctx->chip_class == VI) { + if (ctx->chip_class == GFX7 || + ctx->chip_class == GFX8) { struct si_resource *scratch = ctx->eop_bug_scratch; uint64_t va = scratch->gpu_address; @@ -153,8 +153,8 @@ unsigned si_cp_write_fence_dwords(struct si_screen *screen) { unsigned dwords = 6; - if (screen->info.chip_class == CIK || - screen->info.chip_class == VI) + if (screen->info.chip_class == GFX7 || + screen->info.chip_class == GFX8) dwords *= 2; return dwords; diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index d97aca1de23..71350661c2b 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -254,7 +254,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 32; case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK: - return sscreen->info.chip_class <= VI ? + return sscreen->info.chip_class <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0; /* Stream output. */ diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index d0d405c473f..c81718950a4 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -82,7 +82,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_GLOBAL_L2; - } else if (ctx->chip_class == SI) { + } else if (ctx->chip_class == GFX6) { /* The kernel flushes L2 before shaders are finished. */ wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH; @@ -147,7 +147,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, /* Make sure CP DMA is idle at the end of IBs after L2 prefetches * because the kernel doesn't wait for it. */ - if (ctx->chip_class >= CIK) + if (ctx->chip_class >= GFX7) si_cp_dma_wait_for_idle(ctx); /* Wait for draw calls to finish if needed. */ @@ -407,7 +407,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ctx->tracked_regs.reg_value[SI_TRACKED_SPI_SHADER_COL_FORMAT] = 0x00000000; ctx->tracked_regs.reg_value[SI_TRACKED_CB_SHADER_MASK] = 0xffffffff; ctx->tracked_regs.reg_value[SI_TRACKED_VGT_TF_PARAM] = 0x00000000; - ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From VI */ + ctx->tracked_regs.reg_value[SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL] = 0x0000001e; /* From GFX8 */ /* Set all saved registers state to saved. */ ctx->tracked_regs.reg_saved = 0xffffffffffffffff; diff --git a/src/gallium/drivers/radeonsi/si_gpu_load.c b/src/gallium/drivers/radeonsi/si_gpu_load.c index 481438f37bb..7c2e43b3fdd 100644 --- a/src/gallium/drivers/radeonsi/si_gpu_load.c +++ b/src/gallium/drivers/radeonsi/si_gpu_load.c @@ -102,7 +102,7 @@ static void si_update_mmio_counters(struct si_screen *sscreen, UPDATE_COUNTER(gui, GUI_ACTIVE); gui_busy = GUI_ACTIVE(value); - if (sscreen->info.chip_class == CIK || sscreen->info.chip_class == VI) { + if (sscreen->info.chip_class == GFX7 || sscreen->info.chip_class == GFX8) { /* SRBM_STATUS2 */ sscreen->ws->read_registers(sscreen->ws, SRBM_STATUS2, 1, &value); @@ -110,7 +110,7 @@ static void si_update_mmio_counters(struct si_screen *sscreen, sdma_busy = SDMA_BUSY(value); } - if (sscreen->info.chip_class >= VI) { + if (sscreen->info.chip_class >= GFX8) { /* CP_STAT */ sscreen->ws->read_registers(sscreen->ws, CP_STAT, 1, &value); diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index c15c444cc40..322950557e3 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -1284,11 +1284,11 @@ void si_init_perfcounters(struct si_screen *screen) unsigned i; switch (screen->info.chip_class) { - case CIK: + case GFX7: blocks = groups_CIK; num_blocks = ARRAY_SIZE(groups_CIK); break; - case VI: + case GFX8: blocks = groups_VI; num_blocks = ARRAY_SIZE(groups_VI); break; @@ -1296,13 +1296,13 @@ void si_init_perfcounters(struct si_screen *screen) blocks = groups_gfx9; num_blocks = ARRAY_SIZE(groups_gfx9); break; - case SI: + case GFX6: default: return; /* not implemented */ } if (screen->info.max_sh_per_se != 1) { - /* This should not happen on non-SI chips. */ + /* This should not happen on non-GFX6 chips. */ fprintf(stderr, "si_init_perfcounters: max_sh_per_se = %d not " "supported (inaccurate performance counters)\n", screen->info.max_sh_per_se); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4d36fd46a9b..d9dae8363f0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -115,7 +115,7 @@ static void si_init_compiler(struct si_screen *sscreen, /* Only create the less-optimizing version of the compiler on APUs * predating Ryzen (Raven). */ bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram && - sscreen->info.chip_class <= VI; + sscreen->info.chip_class <= GFX8; enum ac_target_machine_options tm_options = (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | @@ -394,7 +394,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (!sctx) return NULL; - sctx->has_graphics = sscreen->info.chip_class == SI || + sctx->has_graphics = sscreen->info.chip_class == GFX6 || !(flags & PIPE_CONTEXT_COMPUTE_ONLY); if (flags & PIPE_CONTEXT_DEBUG) @@ -419,8 +419,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } - if (sctx->chip_class == CIK || - sctx->chip_class == VI || + if (sctx->chip_class == GFX7 || + sctx->chip_class == GFX8 || sctx->chip_class == GFX9) { sctx->eop_bug_scratch = si_resource( pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT, @@ -536,7 +536,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } /* Initialize SDMA functions. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); @@ -563,9 +563,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, V_370_MEM, V_370_ME, &sctx->wait_mem_number); } - /* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads + /* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */ - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { sctx->null_const_buf.buffer = pipe_aligned_buffer_create(screen, SI_RESOURCE_FLAG_32BIT, @@ -638,7 +638,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, /* this must be last */ si_begin_new_gfx_cs(sctx); - if (sctx->chip_class == CIK) { + if (sctx->chip_class == GFX7) { /* Clear the NULL constant buffer, because loads should return zeros. * Note that this forces CP DMA to be used, because clover deadlocks * for some reason when the compute codepath is used. @@ -1017,11 +1017,11 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, si_init_perfcounters(sscreen); /* Determine tessellation ring info. */ - bool double_offchip_buffers = sscreen->info.chip_class >= CIK && + bool double_offchip_buffers = sscreen->info.chip_class >= GFX7 && sscreen->info.family != CHIP_CARRIZO && sscreen->info.family != CHIP_STONEY; /* This must be one less than the maximum number due to a hw limitation. - * Various hardware bugs in SI, CIK, and GFX9 need this. + * Various hardware bugs need this. */ unsigned max_offchip_buffers_per_se; @@ -1052,8 +1052,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->tess_offchip_ring_size = max_offchip_buffers * sscreen->tess_offchip_block_dw_size * 4; - if (sscreen->info.chip_class >= CIK) { - if (sscreen->info.chip_class >= VI) + if (sscreen->info.chip_class >= GFX7) { + if (sscreen->info.chip_class >= GFX8) --max_offchip_buffers; sscreen->vgt_hs_offchip_param = S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) | @@ -1065,28 +1065,28 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs - * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. - * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ - sscreen->has_clear_state = sscreen->info.chip_class >= CIK && + * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc. + * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.*/ + sscreen->has_clear_state = sscreen->info.chip_class >= GFX7 && sscreen->info.drm_major == 3; sscreen->has_distributed_tess = - sscreen->info.chip_class >= VI && + sscreen->info.chip_class >= GFX8 && sscreen->info.max_se >= 2; sscreen->has_draw_indirect_multi = (sscreen->info.family >= CHIP_POLARIS10) || - (sscreen->info.chip_class == VI && + (sscreen->info.chip_class == GFX8 && sscreen->info.pfp_fw_version >= 121 && sscreen->info.me_fw_version >= 87) || - (sscreen->info.chip_class == CIK && + (sscreen->info.chip_class == GFX7 && sscreen->info.pfp_fw_version >= 211 && sscreen->info.me_fw_version >= 173) || - (sscreen->info.chip_class == SI && + (sscreen->info.chip_class == GFX6 && sscreen->info.pfp_fw_version >= 79 && sscreen->info.me_fw_version >= 142); - sscreen->has_out_of_order_rast = sscreen->info.chip_class >= VI && + sscreen->has_out_of_order_rast = sscreen->info.chip_class >= GFX8 && sscreen->info.max_se >= 2 && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER)); sscreen->assume_no_z_fights = @@ -1137,7 +1137,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, * by the reality that LLVM 5.0 doesn't have working VGPR indexing * on GFX9. */ - sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= VI; + sscreen->llvm_has_working_vgpr_indexing = sscreen->info.chip_class <= GFX8; /* Some chips have RB+ registers, but don't support RB+. Those must * always disable it. @@ -1157,7 +1157,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->dcc_msaa_allowed = !(sscreen->debug_flags & DBG(NO_DCC_MSAA)); - sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI; + sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= GFX8; (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = @@ -1165,7 +1165,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_VMEM_L1; - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 695827c9dd7..1c98f41b5f3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -72,7 +72,7 @@ /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ #define SI_CONTEXT_INV_GLOBAL_L2 (1 << 6) /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't - * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */ + * invalidate L2. GFX6-GFX7 can't do it, so they will do complete invalidation. */ #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (1 << 7) /* Writeback & invalidate the L2 metadata cache. It can only be coupled with * a CB or DB flush. */ @@ -366,7 +366,7 @@ struct si_surface { unsigned cb_color_view; unsigned cb_color_attrib; unsigned cb_color_attrib2; /* GFX9 and later */ - unsigned cb_dcc_control; /* VI and later */ + unsigned cb_dcc_control; /* GFX8 and later */ unsigned spi_shader_col_format:8; /* no blending, no alpha-to-coverage. */ unsigned spi_shader_col_format_alpha:8; /* alpha-to-coverage */ unsigned spi_shader_col_format_blend:8; /* blending without alpha. */ @@ -923,7 +923,7 @@ struct si_context { bool bo_list_add_all_compute_resources; /* other shader resources */ - struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on CIK */ + struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */ struct pipe_resource *esgs_ring; struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; @@ -1572,7 +1572,7 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } @@ -1594,7 +1594,7 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, else if (shaders_read_metadata) sctx->flags |= SI_CONTEXT_INV_L2_METADATA; } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; } } diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 22c4a5b6e6e..0b7d53e745d 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -161,8 +161,8 @@ void si_pm4_upload_indirect_buffer(struct si_context *sctx, struct pipe_screen *screen = sctx->b.screen; unsigned aligned_ndw = align(state->ndw, 8); - /* only supported on CIK and later */ - if (sctx->chip_class < CIK) + /* only supported on GFX7 and later */ + if (sctx->chip_class < GFX7) return; assert(state->ndw); diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index d98bea2eeb3..cb42ebb92ce 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -1019,7 +1019,7 @@ static void si_emit_query_predication(struct si_context *ctx) /* Use the value written by compute shader as a workaround. Note that * the wait flag does not apply in this predication mode. * - * The shader outputs the result value to L2. Workarounds only affect VI + * The shader outputs the result value to L2. Workarounds only affect GFX8 * and later, where the CP reads data from L2, so we don't need an * additional flush. */ @@ -1608,11 +1608,11 @@ static void si_render_condition(struct pipe_context *ctx, if (query) { bool needs_workaround = false; - /* There was a firmware regression in VI which causes successive + /* There was a firmware regression in GFX8 which causes successive * SET_PREDICATION packets to give the wrong answer for * non-inverted stream overflow predication. */ - if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) || + if (((sctx->chip_class == GFX8 && sctx->screen->info.pfp_fw_feature < 49) || (sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) && !condition && (squery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || @@ -1786,7 +1786,7 @@ static unsigned si_get_num_queries(struct si_screen *sscreen) { /* amdgpu */ if (sscreen->info.drm_major == 3) { - if (sscreen->info.chip_class >= VI) + if (sscreen->info.chip_class >= GFX8) return ARRAY_SIZE(si_driver_query_list); else return ARRAY_SIZE(si_driver_query_list) - 7; @@ -1794,7 +1794,7 @@ static unsigned si_get_num_queries(struct si_screen *sscreen) /* radeon */ if (sscreen->info.has_read_registers_query) { - if (sscreen->info.chip_class == CIK) + if (sscreen->info.chip_class == GFX7) return ARRAY_SIZE(si_driver_query_list) - 6; else return ARRAY_SIZE(si_driver_query_list) - 7; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index f6d882cf583..98c11e1c98d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -105,7 +105,7 @@ static bool llvm_type_is_64bit(struct si_shader_context *ctx, static bool is_merged_shader(struct si_shader_context *ctx) { - if (ctx->screen->info.chip_class <= VI) + if (ctx->screen->info.chip_class <= GFX8) return false; return ctx->shader->key.as_ls || @@ -3082,7 +3082,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* Store the dynamic HS control word. */ offset = 0; - if (ctx->screen->info.chip_class <= VI) { + if (ctx->screen->info.chip_class <= GFX8) { ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->i32, 0x80000000, 0), 1, ctx->i32_0, tf_base, @@ -3719,7 +3719,7 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi, addrs[4 * i + 0], ""); break; default: - fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", + fprintf(stderr, "Warning: GFX6 unhandled fs output type:%d\n", semantic_name); } } @@ -4215,11 +4215,11 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, { struct si_shader_context *ctx = si_shader_context(bld_base); - /* SI only (thanks to a hw bug workaround): + /* GFX6 only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. */ - if (ctx->screen->info.chip_class == SI && + if (ctx->screen->info.chip_class == GFX6 && ctx->type == PIPE_SHADER_TESS_CTRL) { ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT); return; @@ -4327,7 +4327,7 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader) case PIPE_SHADER_TESS_CTRL: /* Return this so that LLVM doesn't remove s_barrier * instructions on chips where we use s_barrier. */ - return shader->selector->screen->info.chip_class >= CIK ? 128 : 64; + return shader->selector->screen->info.chip_class >= GFX7 ? 128 : 64; case PIPE_SHADER_GEOMETRY: return shader->selector->screen->info.chip_class >= GFX9 ? 128 : 64; @@ -4536,7 +4536,7 @@ static void create_function(struct si_shader_context *ctx) declare_vs_input_vgprs(ctx, &fninfo, &num_prolog_vgprs); break; - case PIPE_SHADER_TESS_CTRL: /* SI-CI-VI */ + case PIPE_SHADER_TESS_CTRL: /* GFX6-GFX8 */ declare_global_desc_pointers(ctx, &fninfo); declare_per_stage_desc_pointers(ctx, &fninfo, true); ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32); @@ -4851,7 +4851,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); - if (ctx->screen->info.chip_class <= VI && + if (ctx->screen->info.chip_class <= GFX8 && (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) { unsigned ring = ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS @@ -4897,7 +4897,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx) stride = 4 * num_components * sel->gs_max_out_vertices; - /* Limit on the stride field for <= CIK. */ + /* Limit on the stride field for <= GFX7. */ assert(stride < (1 << 14)); num_records = 64; @@ -5222,7 +5222,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) struct si_screen *sscreen = shader->selector->screen; struct si_shader_config *conf = &shader->config; unsigned num_inputs = shader->selector->info.num_inputs; - unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256; + unsigned lds_increment = sscreen->info.chip_class >= GFX7 ? 512 : 256; unsigned lds_per_wave = 0; unsigned max_simd_waves; @@ -5452,7 +5452,7 @@ static int si_compile_llvm(struct si_screen *sscreen, * - Floating-point output modifiers would be ignored by the hw. * - Some opcodes don't support denormals, such as v_mad_f32. We would * have to stop using those. - * - SI & CI would be very slow. + * - GFX6 & GFX7 would be very slow. */ conf->float_mode |= V_00B028_FP_64_DENORMS; @@ -6576,7 +6576,7 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler, /* Assume a slow CPU. */ assert(!sel->screen->info.has_dedicated_vram && - sel->screen->info.chip_class <= VI); + sel->screen->info.chip_class <= GFX8); /* For a crazy dEQP test containing 2597 memory opcodes, mostly * buffer stores. */ @@ -6831,7 +6831,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, if (sel->type == PIPE_SHADER_COMPUTE) { unsigned wave_size = 64; unsigned max_vgprs = 256; - unsigned max_sgprs = sscreen->info.chip_class >= VI ? 800 : 512; + unsigned max_sgprs = sscreen->info.chip_class >= GFX8 ? 800 : 512; unsigned max_sgprs_per_wave = 128; unsigned max_block_threads = si_get_max_workgroup_size(shader); unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size); @@ -7263,7 +7263,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, /* Create the function. */ si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo, - ctx->screen->info.chip_class >= CIK ? 128 : 64); + ctx->screen->info.chip_class >= GFX7 ? 128 : 64); ac_declare_lds_as_pointer(&ctx->ac); func = ctx->main_fn; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 82c521efcb7..16b78fbf43e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -247,7 +247,7 @@ enum { #define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24) #define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF -/* SI-specific system values. */ +/* Driver-specific system values. */ enum { /* Values from set_tess_state. */ TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT, diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 5e540fc5098..be0cb89f722 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -48,8 +48,8 @@ static LLVMValueRef get_buffer_size( LLVMBuildExtractElement(builder, descriptor, LLVMConstInt(ctx->i32, 2, 0), ""); - if (ctx->screen->info.chip_class == VI) { - /* On VI, the descriptor contains the size in bytes, + if (ctx->screen->info.chip_class == GFX8) { + /* On GFX8, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ @@ -132,7 +132,7 @@ ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type t /* Match the resource type set in the descriptor. */ if (dim == ac_image_cube || - (screen->info.chip_class <= VI && dim == ac_image_3d)) + (screen->info.chip_class <= GFX8 && dim == ac_image_3d)) dim = ac_image_2darray; else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) { /* When a single layer of a 3D texture is bound, the shader @@ -161,7 +161,7 @@ ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type t static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc) { - if (ctx->screen->info.chip_class <= CIK) { + if (ctx->screen->info.chip_class <= GFX7) { return rsrc; } else { LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); @@ -327,11 +327,11 @@ static unsigned get_cache_policy(struct si_shader_context *ctx, unsigned cache_policy = 0; if (!atomic && - /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. + /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. * All store opcodes not aligned to a dword are affected. * The only way to get unaligned stores in radeonsi is through * shader images. */ - ((may_store_unaligned && ctx->screen->info.chip_class == SI) || + ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) || /* If this is write-only, don't keep data in L1 to prevent * evicting L1 cache lines that may be needed by other * instructions. */ @@ -1099,13 +1099,13 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * - * SI-CI: + * GFX6-GFX7: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * - * VI: + * GFX8: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, @@ -1113,7 +1113,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, { LLVMValueRef img7, samp0; - if (ctx->screen->info.chip_class >= VI) + if (ctx->screen->info.chip_class >= GFX8) return samp; img7 = LLVMBuildExtractElement(ctx->ac.builder, res, @@ -1446,7 +1446,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, * so the depth comparison value isn't clamped for Z16 and * Z24 anymore. Do it manually here. */ - if (ctx->screen->info.chip_class >= VI) { + if (ctx->screen->info.chip_class >= GFX8) { LLVMValueRef upgraded; LLVMValueRef clamped; upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, @@ -1530,7 +1530,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && - ctx->screen->info.chip_class <= VI) { + ctx->screen->info.chip_class <= GFX8) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]); } @@ -1687,7 +1687,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, /* The hardware needs special lowering for Gather4 with integer formats. */ LLVMValueRef gather4_int_result_workaround = NULL; - if (ctx->screen->info.chip_class <= VI && + if (ctx->screen->info.chip_class <= GFX8 && opcode == TGSI_OPCODE_TG4) { assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 55965bc86a1..bc91e6f5148 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -103,12 +103,12 @@ static void si_emit_cb_render_state(struct si_context *sctx) radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, cb_target_mask); - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { /* DCC MSAA workaround for blending. * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- * COMBINER_DISABLE, but that would be more complicated. */ - bool oc_disable = (sctx->chip_class == VI || + bool oc_disable = (sctx->chip_class == GFX8 || sctx->chip_class == GFX9) && blend && blend->blend_enable_4bit & cb_target_mask && @@ -1391,7 +1391,7 @@ static void si_emit_db_render_state(struct si_context *sctx) !sctx->occlusion_queries_disabled) { bool perfect = sctx->num_perfect_occlusion_queries > 0; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { unsigned log_sample_rate = sctx->framebuffer.log_samples; /* Stoney doesn't increment occlusion query counters @@ -1413,7 +1413,7 @@ static void si_emit_db_render_state(struct si_context *sctx) } } else { /* Disable occlusion queries. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { db_count_control = 0; } else { db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); @@ -1433,8 +1433,8 @@ static void si_emit_db_render_state(struct si_context *sctx) db_shader_control = sctx->ps_db_shader_control; - /* Bug workaround for smoothing (overrasterization) on SI. */ - if (sctx->chip_class == SI && sctx->smoothing_enabled) { + /* Bug workaround for smoothing (overrasterization) on GFX6. */ + if (sctx->chip_class == GFX6 && sctx->smoothing_enabled) { db_shader_control &= C_02880C_Z_ORDER; db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); } @@ -1589,7 +1589,7 @@ static uint32_t si_translate_dbformat(enum pipe_format format) case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: - return V_028040_Z_24; /* deprecated on SI */ + return V_028040_Z_24; /* deprecated on AMD GCN */ case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: return V_028040_Z_32_FLOAT; @@ -1623,9 +1623,9 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, /* * Implemented as an 8_8_8_8 data format to fix texture * gathers in stencil sampling. This affects at least - * GL45-CTS.texture_cube_map_array.sampling on VI. + * GL45-CTS.texture_cube_map_array.sampling on GFX8. */ - if (sscreen->info.chip_class <= VI) + if (sscreen->info.chip_class <= GFX8) return V_008F14_IMG_DATA_FORMAT_8_8_8_8; if (format == PIPE_FORMAT_X24S8_UINT) @@ -2461,14 +2461,14 @@ static void si_initialize_color_surface(struct si_context *sctx, color_info |= S_028C70_COMPRESSION(1); unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh); - if (sctx->chip_class == SI) { - /* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */ + if (sctx->chip_class == GFX6) { + /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); } } } - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; @@ -2492,7 +2492,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } /* This must be set for fast clear to work without FMASK. */ - if (!tex->surface.fmask_size && sctx->chip_class == SI) { + if (!tex->surface.fmask_size && sctx->chip_class == GFX6) { unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); } @@ -2576,7 +2576,7 @@ static void si_init_depth_surface(struct si_context *sctx, } if (tex->surface.has_stencil) { - /* Stencil buffer workaround ported from the SI-CI-VI code. + /* Stencil buffer workaround ported from the GFX6-GFX8 code. * See that for explanation. */ s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); @@ -2592,7 +2592,7 @@ static void si_init_depth_surface(struct si_context *sctx, S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned); } } else { - /* SI-CI-VI */ + /* GFX6-GFX8 */ struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); @@ -2607,7 +2607,7 @@ static void si_init_depth_surface(struct si_context *sctx, s_info = S_028044_FORMAT(stencil_format); surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { struct radeon_info *info = &sctx->screen->info; unsigned index = tex->surface.u.legacy.tiling_index[level]; unsigned stencil_index = tex->surface.u.legacy.stencil_tiling_index[level]; @@ -2746,7 +2746,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, bool unbound = false; int i; - /* Reject zero-sized framebuffers due to a hw bug on SI that occurs + /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. * We could implement the full workaround here, but it's a useless case. */ @@ -2935,7 +2935,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, } /* For optimal DCC performance. */ - if (sctx->chip_class == VI) + if (sctx->chip_class == GFX8) sctx->framebuffer.dcc_overwrite_combiner_watermark = 4; else if (num_bpp64_colorbufs >= 5) sctx->framebuffer.dcc_overwrite_combiner_watermark = 8; @@ -3139,7 +3139,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_set_context_reg(cs, R_0287A0_CB_MRT0_EPITCH + i * 4, S_0287A0_EPITCH(tex->surface.u.gfx9.surf.epitch)); } else { - /* Compute mutable surface parameters (SI-CI-VI). */ + /* Compute mutable surface parameters (GFX6-GFX8). */ const struct legacy_surf_level *level_info = &tex->surface.u.legacy.level[cb->base.u.tex.level]; unsigned pitch_tile_max, slice_tile_max, tile_mode_index; @@ -3167,20 +3167,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); if (tex->surface.fmask_size) { - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1); cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index); cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); } radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, - sctx->chip_class >= VI ? 14 : 13); + sctx->chip_class >= GFX8 ? 14 : 13); radeon_emit(cs, cb_color_base); /* CB_COLOR0_BASE */ radeon_emit(cs, cb_color_pitch); /* CB_COLOR0_PITCH */ radeon_emit(cs, cb_color_slice); /* CB_COLOR0_SLICE */ @@ -3195,7 +3195,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_emit(cs, tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ radeon_emit(cs, tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ - if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */ + if (sctx->chip_class >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */ radeon_emit(cs, cb_dcc_base); } } @@ -3328,7 +3328,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx) /* The exclusion bits can be set to improve rasterization efficiency * if no sample lies on the pixel boundary (-8 sample offset). */ - bool exclusion = sctx->chip_class >= CIK && + bool exclusion = sctx->chip_class >= GFX7 && (!rs->multisample_enable || nr_samples != 16); radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, @@ -3606,11 +3606,11 @@ si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, /* The NUM_RECORDS field has a different meaning depending on the chip, * instruction type, STRIDE, and SWIZZLE_ENABLE. * - * SI-CIK: + * GFX6-GFX7: * - If STRIDE == 0, it's in byte units. * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. * - * VI: + * GFX8: * - For SMEM and STRIDE == 0, it's in byte units. * - For SMEM and STRIDE != 0, it's in units of STRIDE. * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. @@ -3633,7 +3633,7 @@ si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, * the first element is readable when IDXEN == 0. */ num_records = num_records ? MAX2(num_records, stride) : 0; - else if (screen->info.chip_class == VI) + else if (screen->info.chip_class == GFX8) num_records *= stride; state[4] = 0; @@ -3720,9 +3720,9 @@ si_make_texture_descriptor(struct si_screen *screen, /* * X24S8 is implemented as an 8_8_8_8 data format, to * fix texture gathers. This affects at least - * GL45-CTS.texture_cube_map_array.sampling on VI. + * GL45-CTS.texture_cube_map_array.sampling on GFX8. */ - if (screen->info.chip_class <= VI) + if (screen->info.chip_class <= GFX8) util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); else util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); @@ -3816,7 +3816,7 @@ si_make_texture_descriptor(struct si_screen *screen, if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || - (screen->info.chip_class <= VI && + (screen->info.chip_class <= GFX8 && res->target == PIPE_TEXTURE_3D))) { /* For the purpose of shader images, treat cube maps and 3D * textures as 2D arrays. For 3D textures, the address @@ -3887,7 +3887,7 @@ si_make_texture_descriptor(struct si_screen *screen, /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (screen->info.chip_class <= CIK && res->nr_samples <= 1) { + if (screen->info.chip_class <= GFX7 && res->nr_samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -4094,7 +4094,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, height = height0; depth = texture->depth0; - if (sctx->chip_class <= VI && force_level) { + if (sctx->chip_class <= GFX8 && force_level) { assert(force_level == first_level && force_level == last_level); base_level = force_level; @@ -4331,7 +4331,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | - S_008F30_COMPAT_MODE(sctx->chip_class >= VI)); + S_008F30_COMPAT_MODE(sctx->chip_class >= GFX8)); rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); @@ -4340,9 +4340,9 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) | S_008F38_MIP_POINT_PRECLAMP(0) | - S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) | + S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) | S_008F38_FILTER_PREC_FIX(1) | - S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI)); + S_008F38_ANISO_OVERRIDE(sctx->chip_class >= GFX8)); rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false); /* Create sampler resource for integer textures. */ @@ -4537,9 +4537,9 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, /* The hardware always treats the 2-bit alpha channel as * unsigned, so a shader workaround is needed. The affected - * chips are VI and older except Stoney (GFX8.1). + * chips are GFX8 and older except Stoney (GFX8.1). */ - always_fix = sscreen->info.chip_class <= VI && + always_fix = sscreen->info.chip_class <= GFX8 && sscreen->info.family != CHIP_STONEY && channel->type == UTIL_FORMAT_TYPE_SIGNED; } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) { @@ -4585,7 +4585,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, * into account would complicate the fast path (where everything * is nicely aligned). */ - bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == SI; + bool check_alignment = log_hw_load_size >= 1 && sscreen->info.chip_class == GFX6; bool opencode = sscreen->options.vs_fetch_always_opencode; if (check_alignment && @@ -4810,10 +4810,10 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) } if (flags & PIPE_BARRIER_INDEX_BUFFER) { - /* Indices are read through TC L2 since VI. + /* Indices are read through TC L2 since GFX8. * L1 isn't used. */ - if (sctx->screen->info.chip_class <= CIK) + if (sctx->screen->info.chip_class <= GFX7) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4824,12 +4824,12 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) sctx->framebuffer.uncompressed_cb_mask) { sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->screen->info.chip_class <= VI && + if (sctx->screen->info.chip_class <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER) sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } @@ -4917,7 +4917,7 @@ void si_init_screen_state_functions(struct si_screen *sscreen) static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) { - unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX : + unsigned reg = sctx->chip_class >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; si_pm4_set_reg(pm4, reg, value); } @@ -4954,7 +4954,7 @@ si_write_harvested_raster_configs(struct si_context *sctx, } si_set_grbm_gfx_index(sctx, pm4, ~0); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } } @@ -4973,7 +4973,7 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p */ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { @@ -4988,8 +4988,8 @@ static void si_init_config(struct si_context *sctx) bool has_clear_state = sscreen->has_clear_state; struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state); - /* SI, radeon kernel disabled CLEAR_STATE. */ - assert(has_clear_state || sscreen->info.chip_class == SI || + /* GFX6, radeon kernel disabled CLEAR_STATE. */ + assert(has_clear_state || sscreen->info.chip_class == GFX6 || sscreen->info.drm_major != 3); if (!pm4) @@ -5006,7 +5006,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_cmd_end(pm4, false); } - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) si_set_raster_config(sctx, pm4); si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); @@ -5014,7 +5014,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); } @@ -5028,14 +5028,14 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); if (!has_clear_state) si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); - if (sctx->chip_class < CIK) + if (sctx->chip_class < GFX7) si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ - if (sctx->chip_class <= CIK) { + if (sctx->chip_class <= GFX7) { si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); @@ -5077,7 +5077,7 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (sctx->chip_class >= GFX9) { si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F)); @@ -5136,7 +5136,7 @@ static void si_init_config(struct si_context *sctx) S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F)); } - if (sctx->chip_class >= VI) { + if (sctx->chip_class >= GFX8) { unsigned vgt_tess_distribution; vgt_tess_distribution = @@ -5159,7 +5159,7 @@ static void si_init_config(struct si_context *sctx) } si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 8e01e1b35e1..7bbe66d46ae 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -78,7 +78,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, struct si_shader_selector *tcs = sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id; - bool has_primid_instancing_bug = sctx->chip_class == SI && + bool has_primid_instancing_bug = sctx->chip_class == GFX6 && sctx->screen->info.max_se == 1; unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL]; unsigned num_tcs_input_cp = info->vertices_per_patch; @@ -152,7 +152,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Make sure that the data fits in LDS. This assumes the shaders only * use LDS for the inputs and outputs. * - * While CIK can use 64K per threadgroup, there is a hang on Stoney + * While GFX7 can use 64K per threadgroup, there is a hang on Stoney * with 2 CUs if we use more than 32K. The closed Vulkan driver also * uses 32K at most on all GCN chips. */ @@ -185,8 +185,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (temp_verts_per_tg > 64 && temp_verts_per_tg % 64 < 48) *num_patches = (temp_verts_per_tg & ~63) / max_verts_per_patch; - if (sctx->chip_class == SI) { - /* SI bug workaround, related to power management. Limit LS-HS + if (sctx->chip_class == GFX6) { + /* GFX6 bug workaround, related to power management. Limit LS-HS * threadgroups to only one wave. */ unsigned one_wave = 64 / max_verts_per_patch; @@ -200,7 +200,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, * The intended solution is to restrict threadgroups to * a single instance by setting SWITCH_ON_EOI, which * should cause IA to split instances up. However, this - * doesn't work correctly on SI when there is no other + * doesn't work correctly on GFX6 when there is no other * SE to switch to. */ if (has_primid_instancing_bug && tess_uses_primid) @@ -238,7 +238,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Compute the LDS size. */ lds_size = output_patch0_offset + output_patch_size * *num_patches; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { assert(lds_size <= 65536); lds_size = align(lds_size, 512) / 512; } else { @@ -272,7 +272,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */ - if (sctx->chip_class == CIK && sctx->family != CHIP_HAWAII) + if (sctx->chip_class == GFX7 && sctx->family != CHIP_HAWAII) radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); radeon_emit(cs, ls_current->config.rsrc1); @@ -297,7 +297,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); if (sctx->last_ls_hs_config != ls_hs_config) { - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); } else { @@ -349,10 +349,10 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, key->u.uses_gs) partial_vs_wave = true; - /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= VI) */ + /* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= GFX8) */ if (sscreen->has_distributed_tess) { if (key->u.uses_gs) { - if (sscreen->info.chip_class == VI) + if (sscreen->info.chip_class == GFX8) partial_es_wave = true; } else { partial_vs_wave = true; @@ -367,7 +367,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, wd_switch_on_eop = true; } - if (sscreen->info.chip_class >= CIK) { + if (sscreen->info.chip_class >= GFX7) { /* WD_SWITCH_ON_EOP has no effect on GPUs with less than * 4 shader engines. Set 1 to pass the assertion below. * The other cases are hardware requirements. @@ -400,12 +400,12 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, * Assume indirect draws always use small instances. * This is needed for good VS wave utilization. */ - if (sscreen->info.chip_class <= VI && + if (sscreen->info.chip_class <= GFX8 && sscreen->info.max_se == 4 && key->u.multi_instances_smaller_than_primgroup) wd_switch_on_eop = true; - /* Required on CIK and later. */ + /* Required on GFX7 and later. */ if (sscreen->info.max_se == 4 && !wd_switch_on_eop) ia_switch_on_eoi = true; @@ -421,10 +421,10 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, sscreen->info.family == CHIP_VEGAM)) partial_vs_wave = true; - /* Required by Hawaii and, for some special cases, by VI. */ + /* Required by Hawaii and, for some special cases, by GFX8. */ if (ia_switch_on_eoi && (sscreen->info.family == CHIP_HAWAII || - (sscreen->info.chip_class == VI && + (sscreen->info.chip_class == GFX8 && (key->u.uses_gs || max_primgroup_in_wave != 2)))) partial_vs_wave = true; @@ -444,16 +444,16 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen, } /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ - if (sscreen->info.chip_class <= VI && ia_switch_on_eoi) + if (sscreen->info.chip_class <= GFX8 && ia_switch_on_eoi) partial_es_wave = true; return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | - S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= CIK ? wd_switch_on_eop : 0) | + S_028AA8_WD_SWITCH_ON_EOP(sscreen->info.chip_class >= GFX7 ? wd_switch_on_eop : 0) | /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ - S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == VI ? + S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->info.chip_class == GFX8 ? max_primgroup_in_wave : 0) | S_030960_EN_INST_OPT_BASIC(sscreen->info.chip_class >= GFX9) | S_030960_EN_INST_OPT_ADV(sscreen->info.chip_class >= GFX9); @@ -519,7 +519,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, if (sctx->gs_shader.cso) { /* GS requirement. */ - if (sctx->chip_class <= VI && + if (sctx->chip_class <= GFX8 && SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1); @@ -625,7 +625,7 @@ static void si_emit_draw_registers(struct si_context *sctx, radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); - else if (sctx->chip_class >= CIK) + else if (sctx->chip_class >= GFX7) radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); else radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); @@ -633,7 +633,7 @@ static void si_emit_draw_registers(struct si_context *sctx, sctx->last_multi_vgt_param = ia_multi_vgt_param; } if (prim != sctx->last_prim) { - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); else @@ -700,12 +700,12 @@ static void si_emit_draw_packets(struct si_context *sctx, break; case 2: index_type = V_028A7C_VGT_INDEX_16 | - (SI_BIG_ENDIAN && sctx->chip_class <= CIK ? + (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_16_BIT : 0); break; case 4: index_type = V_028A7C_VGT_INDEX_32 | - (SI_BIG_ENDIAN && sctx->chip_class <= CIK ? + (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_32_BIT : 0); break; default: @@ -733,10 +733,10 @@ static void si_emit_draw_packets(struct si_context *sctx, si_resource(indexbuf), RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); } else { - /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, + /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, * so the state must be re-emitted before the next indexed draw. */ - if (sctx->chip_class >= CIK) + if (sctx->chip_class >= GFX7) sctx->last_index_size = -1; } @@ -918,7 +918,7 @@ void si_emit_cache_flush(struct si_context *sctx) if (flags & SI_CONTEXT_FLUSH_AND_INV_DB) sctx->num_db_cache_flushes++; - /* SI has a bug that it always flushes ICACHE and KCACHE if either + /* GFX6 has a bug that it always flushes ICACHE and KCACHE if either * bit is set. An alternative way is to write SQC_CACHES, but that * doesn't seem to work reliably. Since the bug doesn't affect * correctness (it only does more work than necessary) and @@ -931,7 +931,7 @@ void si_emit_cache_flush(struct si_context *sctx) if (flags & SI_CONTEXT_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | @@ -944,7 +944,7 @@ void si_emit_cache_flush(struct si_context *sctx) S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ - if (sctx->chip_class == VI) + if (sctx->chip_class == GFX8) si_cp_release_mem(sctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE, @@ -1085,25 +1085,25 @@ void si_emit_cache_flush(struct si_context *sctx) radeon_emit(cs, 0); } - /* SI-CI-VI only: + /* GFX6-GFX8 only: * When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC * waits for idle, so it should be last. SURFACE_SYNC is done in PFP. * * cp_coher_cntl should contain all necessary flags except TC flags * at this point. * - * SI-CIK don't support L2 write-back. + * GFX6-GFX7 don't support L2 write-back. */ if (flags & SI_CONTEXT_INV_GLOBAL_L2 || - (sctx->chip_class <= CIK && + (sctx->chip_class <= GFX7 && (flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { - /* Invalidate L1 & L2. (L1 is always invalidated on SI) - * WB must be set on VI+ when TC_ACTION is set. + /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) + * WB must be set on GFX8+ when TC_ACTION is set. */ si_emit_surface_sync(sctx, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | - S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= VI)); + S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8)); cp_coher_cntl = 0; sctx->num_L2_invalidates++; } else { @@ -1260,7 +1260,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i unsigned index_offset = info->indirect ? info->start * index_size : 0; if (likely(!info->indirect)) { - /* SI-CI treat instance_count==0 as instance_count==1. There is + /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip * direct draws. */ @@ -1362,8 +1362,8 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i if (index_size) { /* Translate or upload, if needed. */ - /* 8-bit indices are supported on VI. */ - if (sctx->chip_class <= CIK && index_size == 1) { + /* 8-bit indices are supported on GFX8. */ + if (sctx->chip_class <= GFX7 && index_size == 1) { unsigned start, count, start_offset, size, offset; void *ptr; @@ -1403,9 +1403,9 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* info->start will be added by the drawing code */ index_offset -= start_offset; - } else if (sctx->chip_class <= CIK && + } else if (sctx->chip_class <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) { - /* VI reads index buffers through TC L2, so it doesn't + /* GFX8 reads index buffers through TC L2, so it doesn't * need this. */ sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(indexbuf)->TC_L2_dirty = false; @@ -1419,7 +1419,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i si_context_add_resource_size(sctx, indirect->buffer); /* Indirect buffers use TC L2 on GFX9, but not older hw. */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { if (si_resource(indirect->buffer)->TC_L2_dirty) { sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; si_resource(indirect->buffer)->TC_L2_dirty = false; @@ -1498,7 +1498,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Start prefetches after the draw has been started. Both will run * in parallel, but starting the draw first is more important. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, false); } else { /* If we don't wait for idle, start prefetches first, then set @@ -1508,7 +1508,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i si_emit_cache_flush(sctx); /* Only prefetch the API VS and VBO descriptors. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, true); if (!si_upload_graphics_shader_descriptors(sctx)) @@ -1527,7 +1527,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i /* Prefetch the remaining shaders after the draw has been * started. */ - if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask) + if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask) cik_emit_prefetch_L2(sctx, false); } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 51a3af92d0c..10677f175de 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -473,7 +473,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) unsigned vgpr_comp_cnt; uint64_t va; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -547,7 +547,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) S_00B428_FLOAT_MODE(shader->config.float_mode) | S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt)); - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2); } @@ -587,7 +587,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) uint64_t va; unsigned oc_lds_en; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); pm4 = si_get_shader_pm4_state(shader); if (!pm4) @@ -973,7 +973,7 @@ static void si_emit_shader_vs(struct si_context *sctx) SI_TRACKED_VGT_PRIMITIVEID_EN, shader->ctx_reg.vs.vgt_primitiveid_en); - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { radeon_opt_set_context_reg(sctx, R_028AB4_VGT_REUSE_OFF, SI_TRACKED_VGT_REUSE_OFF, shader->ctx_reg.vs.vgt_reuse_off); @@ -1052,7 +1052,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, shader->ctx_reg.vs.vgt_primitiveid_en = 0; } - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { /* Reuse needs to be set off if we write oViewport. */ shader->ctx_reg.vs.vgt_reuse_off = S_028AB4_REUSE_OFF(info->writes_viewport_index); @@ -1602,11 +1602,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, blend && blend->alpha_to_coverage) key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; - /* On SI and CIK except Hawaii, the CB doesn't clamp outputs + /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs * to the range supported by the type if a channel has less * than 16 bits and the export format is 16_ABGR. */ - if (sctx->chip_class <= CIK && sctx->family != CHIP_HAWAII) { + if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) { key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8; key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10; } @@ -2706,10 +2706,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) switch (shader->selector->type) { case PIPE_SHADER_VERTEX: if (shader->key.as_ls) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, ls, shader->pm4); } else if (shader->key.as_es) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, es, shader->pm4); } else { si_pm4_delete_state(sctx, vs, shader->pm4); @@ -2720,7 +2720,7 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader) break; case PIPE_SHADER_TESS_EVAL: if (shader->key.as_es) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_delete_state(sctx, es, shader->pm4); } else { si_pm4_delete_state(sctx, vs, shader->pm4); @@ -2937,10 +2937,10 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) unsigned num_se = sctx->screen->info.max_se; unsigned wave_size = 64; unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ - /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16. - * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). + /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16. + * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). */ - unsigned gs_vertex_reuse = (sctx->chip_class >= VI ? 32 : 16) * num_se; + unsigned gs_vertex_reuse = (sctx->chip_class >= GFX8 ? 32 : 16) * num_se; unsigned alignment = 256 * num_se; /* The maximum size is 63.999 MB per SE. */ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; @@ -2967,7 +2967,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) * * GFX9 doesn't have the ESGS ring. */ - bool update_esgs = sctx->chip_class <= VI && + bool update_esgs = sctx->chip_class <= GFX8 && esgs_ring_size && (!sctx->esgs_ring || sctx->esgs_ring->width0 < esgs_ring_size); @@ -3005,9 +3005,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) if (!pm4) return false; - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (sctx->esgs_ring) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256); } @@ -3039,7 +3039,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) /* Set ring bindings. */ if (sctx->esgs_ring) { - assert(sctx->chip_class <= VI); + assert(sctx->chip_class <= GFX8); si_set_ring_buffer(sctx, SI_ES_RING_ESGS, sctx->esgs_ring, 0, sctx->esgs_ring->width0, true, true, 4, 64, 0); @@ -3288,7 +3288,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) sctx->screen->tess_offchip_ring_size; /* Append these registers to the init config state. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4)); si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, @@ -3376,7 +3376,7 @@ bool si_update_shaders(struct si_context *sctx) } /* VS as LS */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); if (r) @@ -3408,7 +3408,7 @@ bool si_update_shaders(struct si_context *sctx) if (sctx->gs_shader.cso) { /* TES as ES */ - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { r = si_shader_select(ctx, &sctx->tes_shader, &compiler_state); if (r) @@ -3424,7 +3424,7 @@ bool si_update_shaders(struct si_context *sctx) si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4); } } else if (sctx->gs_shader.cso) { - if (sctx->chip_class <= VI) { + if (sctx->chip_class <= GFX8) { /* VS as ES */ r = si_shader_select(ctx, &sctx->vs_shader, &compiler_state); @@ -3457,7 +3457,7 @@ bool si_update_shaders(struct si_context *sctx) return false; } else { si_pm4_bind_state(sctx, gs, NULL); - if (sctx->chip_class <= VI) + if (sctx->chip_class <= GFX8) si_pm4_bind_state(sctx, es, NULL); } @@ -3504,7 +3504,7 @@ bool si_update_shaders(struct si_context *sctx) sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); - if (sctx->chip_class == SI) + if (sctx->chip_class == GFX6) si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); if (sctx->framebuffer.nr_samples <= 1) @@ -3522,7 +3522,7 @@ bool si_update_shaders(struct si_context *sctx) return false; } - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { if (si_pm4_state_enabled_and_changed(sctx, ls)) sctx->prefetch_L2_mask |= SI_PREFETCH_LS; else if (!sctx->queued.named.ls) diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c index 2a0a4bef9a2..e7058f19a8a 100644 --- a/src/gallium/drivers/radeonsi/si_state_streamout.c +++ b/src/gallium/drivers/radeonsi/si_state_streamout.c @@ -103,7 +103,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, * to flush it. * * The only cases which requires flushing it is VGT DMA index - * fetching (on <= CIK) and indirect draw data, which are rare + * fetching (on <= GFX7) and indirect draw data, which are rare * cases. Thus, flag the TC L2 dirtiness in the resource and * handle it at draw call time. */ @@ -195,7 +195,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx) unsigned reg_strmout_cntl; /* The register is at different places on different ASICs. */ - if (sctx->chip_class >= CIK) { + if (sctx->chip_class >= GFX7) { reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); } else { @@ -230,7 +230,7 @@ static void si_emit_streamout_begin(struct si_context *sctx) t[i]->stride_in_dw = stride_in_dw[i]; - /* SI binds streamout buffers as shader resources. + /* AMD GCN binds streamout buffers as shader resources. * VGT only counts primitives and tells the shader * through SGPRs what to do. */ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c index 792d1c4efd1..a144d7b661c 100644 --- a/src/gallium/drivers/radeonsi/si_state_viewport.c +++ b/src/gallium/drivers/radeonsi/si_state_viewport.c @@ -126,10 +126,10 @@ static void si_emit_one_scissor(struct si_context *ctx, if (scissor) si_clip_scissor(&final, scissor); - /* Workaround for a hw bug on SI that occurs when PA_SU_HARDWARE_- + /* Workaround for a hw bug on GFX6 that occurs when PA_SU_HARDWARE_- * SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. */ - if (ctx->chip_class == SI && (final.maxx == 0 || final.maxy == 0)) { + if (ctx->chip_class == GFX6 && (final.maxx == 0 || final.maxy == 0)) { radeon_emit(cs, S_028250_TL_X(1) | S_028250_TL_Y(1) | S_028250_WINDOW_OFFSET_DISABLE(1)); @@ -180,9 +180,9 @@ static void si_emit_guardband(struct si_context *ctx) int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2; int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2; - /* SI-CI need to align the offset to an ubertile consisting of all SEs. */ + /* GFX6-GFX7 need to align the offset to an ubertile consisting of all SEs. */ const unsigned hw_screen_offset_alignment = - ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); + ctx->chip_class >= GFX8 ? 16 : MAX2(ctx->screen->se_tile_repeat, 16); /* Indexed by quantization modes */ static int max_viewport_size[] = {65535, 16383, 4095}; diff --git a/src/gallium/drivers/radeonsi/si_test_dma_perf.c b/src/gallium/drivers/radeonsi/si_test_dma_perf.c index 124f5bb5c12..263187d683f 100644 --- a/src/gallium/drivers/radeonsi/si_test_dma_perf.c +++ b/src/gallium/drivers/radeonsi/si_test_dma_perf.c @@ -112,11 +112,11 @@ void si_test_dma_perf(struct si_screen *sscreen) unsigned cs_dwords_per_thread = test_cs ? cs_dwords_per_thread_list[cs_method % NUM_SHADERS] : 0; - if (sctx->chip_class == SI) { - /* SI doesn't support CP DMA operations through L2. */ + if (sctx->chip_class == GFX6) { + /* GFX6 doesn't support CP DMA operations through L2. */ if (test_cp && cache_policy != L2_BYPASS) continue; - /* WAVES_PER_SH is in multiples of 16 on SI. */ + /* WAVES_PER_SH is in multiples of 16 on GFX6. */ if (test_cs && cs_waves_per_sh % 16 != 0) continue; } @@ -151,7 +151,7 @@ void si_test_dma_perf(struct si_screen *sscreen) unsigned query_type = PIPE_QUERY_TIME_ELAPSED; if (test_sdma) { - if (sctx->chip_class == SI) + if (sctx->chip_class == GFX6) query_type = SI_QUERY_TIME_ELAPSED_SDMA_SI; else query_type = SI_QUERY_TIME_ELAPSED_SDMA; @@ -346,10 +346,10 @@ void si_test_dma_perf(struct si_screen *sscreen) if (!r->is_valid) continue; - /* Ban CP DMA clears via MC on <= VI. They are super slow + /* Ban CP DMA clears via MC on <= GFX8. They are super slow * on GTT, which we can get due to BO evictions. */ - if (sctx->chip_class <= VI && placement == 1 && + if (sctx->chip_class <= GFX8 && placement == 1 && r->is_cp && r->cache_policy == L2_BYPASS) continue; diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 59d50376438..74c9cf9d7bf 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -254,10 +254,10 @@ static int si_init_surface(struct si_screen *sscreen, array_mode == RADEON_SURF_MODE_2D)) { /* TC-compatible HTILE only supports Z32_FLOAT. * GFX9 also supports Z16_UNORM. - * On VI, promote Z16 to Z32. DB->CB copies will convert + * On GFX8, promote Z16 to Z32. DB->CB copies will convert * the format for transfers. */ - if (sscreen->info.chip_class == VI) + if (sscreen->info.chip_class == GFX8) bpe = 4; flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; @@ -267,7 +267,7 @@ static int si_init_surface(struct si_screen *sscreen, flags |= RADEON_SURF_SBUFFER; } - if (sscreen->info.chip_class >= VI && + if (sscreen->info.chip_class >= GFX8 && (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC || ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT || (ptex->nr_samples >= 2 && !sscreen->dcc_msaa_allowed))) @@ -278,8 +278,8 @@ static int si_init_surface(struct si_screen *sscreen, bpe == 16 && ptex->nr_samples >= 2) flags |= RADEON_SURF_DISABLE_DCC; - /* VI: DCC clear for 4x and 8x MSAA array textures unimplemented. */ - if (sscreen->info.chip_class == VI && + /* GFX8: DCC clear for 4x and 8x MSAA array textures unimplemented. */ + if (sscreen->info.chip_class == GFX8 && ptex->nr_storage_samples >= 4 && ptex->array_size > 1) flags |= RADEON_SURF_DISABLE_DCC; @@ -700,7 +700,7 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, md.size_metadata = 10 * 4; /* Dwords [10:..] contain the mipmap level offsets. */ - if (sscreen->info.chip_class <= VI) { + if (sscreen->info.chip_class <= GFX8) { for (unsigned i = 0; i <= res->last_level; i++) md.metadata[10+i] = tex->surface.u.legacy.level[i].offset >> 8; @@ -716,7 +716,7 @@ static void si_get_opaque_metadata(struct si_screen *sscreen, { uint32_t *desc = &md->metadata[2]; - if (sscreen->info.chip_class < VI) + if (sscreen->info.chip_class < GFX8) return; /* Return if DCC is enabled. The texture should be set up with it @@ -757,7 +757,7 @@ static bool si_has_displayable_dcc(struct si_texture *tex) { struct si_screen *sscreen = (struct si_screen*)tex->buffer.b.b.screen; - if (sscreen->info.chip_class <= VI) + if (sscreen->info.chip_class <= GFX8) return false; /* This needs a cache flush before scanout. @@ -849,7 +849,7 @@ static boolean si_texture_get_handle(struct pipe_screen* screen, assert(tex->surface.tile_swizzle == 0); } - /* Since shader image stores don't support DCC on VI, + /* Since shader image stores don't support DCC on GFX8, * disable it for external clients that want write * access. */ @@ -974,7 +974,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; unsigned num_pipes = sscreen->info.num_tile_pipes; - assert(sscreen->info.chip_class <= VI); + assert(sscreen->info.chip_class <= GFX8); tex->surface.htile_size = 0; @@ -989,7 +989,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, * are always reproducible. I think I have seen the test hang * on Carrizo too, though it was very rare there. */ - if (sscreen->info.chip_class >= CIK && num_pipes < 4) + if (sscreen->info.chip_class >= GFX7 && num_pipes < 4) num_pipes = 4; switch (num_pipes) { @@ -1036,7 +1036,7 @@ static void si_texture_get_htile_size(struct si_screen *sscreen, static void si_texture_allocate_htile(struct si_screen *sscreen, struct si_texture *tex) { - if (sscreen->info.chip_class <= VI && !tex->tc_compatible_htile) + if (sscreen->info.chip_class <= GFX8 && !tex->tc_compatible_htile) si_texture_get_htile_size(sscreen, tex); if (!tex->surface.htile_size) @@ -1229,7 +1229,7 @@ si_texture_create_object(struct pipe_screen *screen, RADEON_SURF_TC_COMPATIBLE_HTILE); /* TC-compatible HTILE: - * - VI only supports Z32_FLOAT. + * - GFX8 only supports Z32_FLOAT. * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ if (tex->tc_compatible_htile) { if (sscreen->info.chip_class >= GFX9 && @@ -1506,10 +1506,10 @@ si_choose_tiling(struct si_screen *sscreen, if (templ->flags & SI_RESOURCE_FLAG_TRANSFER) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI, + /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8, * which requires 2D tiling. */ - if (sscreen->info.chip_class == VI && tc_compatible_htile) + if (sscreen->info.chip_class == GFX8 && tc_compatible_htile) return RADEON_SURF_MODE_2D; /* Handle common candidates for the linear mode. @@ -1525,7 +1525,7 @@ si_choose_tiling(struct si_screen *sscreen, if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Cursors are linear on SI. + /* Cursors are linear on AMD GCN. * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */ if (templ->bind & PIPE_BIND_CURSOR) return RADEON_SURF_MODE_LINEAR_ALIGNED; @@ -1582,7 +1582,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen, struct radeon_surf surface = {0}; bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH; bool tc_compatible_htile = - sscreen->info.chip_class >= VI && + sscreen->info.chip_class >= GFX8 && /* There are issues with TC-compatible HTILE on Tonga (and * Iceland is the same design), and documented bug workarounds * don't help. For example, this fails: @@ -2450,7 +2450,7 @@ void vi_separate_dcc_try_enable(struct si_context *sctx, sctx->screen->debug_flags & DBG(NO_DCC_FB)) return; - assert(sctx->chip_class >= VI); + assert(sctx->chip_class >= GFX8); if (tex->dcc_offset) return; /* already enabled */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 972030eaaa8..5e45caa362b 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -384,7 +384,7 @@ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs) static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs) { - return cs->ctx->ws->info.chip_class >= CIK && + return cs->ctx->ws->info.chip_class >= GFX7 && (cs->ring_type == RING_GFX || cs->ring_type == RING_COMPUTE); } @@ -1523,7 +1523,7 @@ static int amdgpu_cs_flush(struct radeon_cmdbuf *rcs, switch (cs->ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ - if (ws->info.chip_class <= SI) { + if (ws->info.chip_class <= GFX6) { while (rcs->current.cdw & 7) radeon_emit(rcs, 0xf0000000); /* NOP packet */ } else { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index e847c5cff10..e8a6677f378 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -68,11 +68,11 @@ static void handle_env_var_force_family(struct amdgpu_winsys *ws) if (i >= CHIP_VEGA10) ws->info.chip_class = GFX9; else if (i >= CHIP_TONGA) - ws->info.chip_class = VI; + ws->info.chip_class = GFX8; else if (i >= CHIP_BONAIRE) - ws->info.chip_class = CIK; + ws->info.chip_class = GFX7; else - ws->info.chip_class = SI; + ws->info.chip_class = GFX6; /* Don't submit any IBs. */ setenv("RADEON_NOOP", "1", 1); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 490c246d6e0..43185366803 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -552,7 +552,7 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, switch (cs->ring_type) { case RING_DMA: /* pad DMA ring to 8 DWs */ - if (cs->ws->info.chip_class <= SI) { + if (cs->ws->info.chip_class <= GFX6) { while (rcs->current.cdw & 7) radeon_emit(&cs->base, 0xf0000000); /* NOP packet */ } else { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c index 20cfc86ebe0..d33c4c7132d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_surface.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_surface.c @@ -50,14 +50,14 @@ static void set_micro_tile_mode(struct radeon_surf *surf, { uint32_t tile_mode; - if (info->chip_class < SI) { + if (info->chip_class < GFX6) { surf->micro_tile_mode = 0; return; } tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; - if (info->chip_class >= CIK) + if (info->chip_class >= GFX7) surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); else surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); @@ -231,7 +231,7 @@ static void si_compute_cmask(const struct radeon_info *info, if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) return; - assert(info->chip_class <= VI); + assert(info->chip_class <= GFX8); switch (num_pipes) { case 2: diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 293372cc26d..225cc01a33d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -269,14 +269,14 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) case CHIP_VERDE: case CHIP_OLAND: case CHIP_HAINAN: - ws->info.chip_class = SI; + ws->info.chip_class = GFX6; break; case CHIP_BONAIRE: case CHIP_KAVERI: case CHIP_KABINI: case CHIP_HAWAII: case CHIP_MULLINS: - ws->info.chip_class = CIK; + ws->info.chip_class = GFX7; break; } @@ -542,18 +542,18 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) return false; } - if (ws->info.chip_class == CIK) { + if (ws->info.chip_class == GFX7) { if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL, ws->info.cik_macrotile_mode_array)) { - fprintf(stderr, "radeon: Kernel 3.13 is required for CIK support.\n"); + fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n"); return false; } } - if (ws->info.chip_class >= SI) { + if (ws->info.chip_class >= GFX6) { if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL, ws->info.si_tile_mode_array)) { - fprintf(stderr, "radeon: Kernel 3.10 is required for SI support.\n"); + fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n"); return false; } } @@ -561,14 +561,14 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) /* Hawaii with old firmware needs type2 nop packet. * accel_working2 with value 3 indicates the new firmware. */ - ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= SI || + ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 || (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 3); ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */ ws->info.ib_start_alignment = 4096; ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40; - /* HTILE is broken with 1D tiling on old kernels and CIK. */ - ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != CIK || + /* HTILE is broken with 1D tiling on old kernels and GFX7. */ + ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 || ws->info.drm_minor >= 38; ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48; ws->info.has_bo_metadata = false; @@ -579,15 +579,15 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) ws->info.kernel_flushes_tc_l2_after_ib = true; /* Old kernels disallowed register writes via COPY_DATA * that are used for indirect compute dispatches. */ - ws->info.has_indirect_compute_dispatch = ws->info.chip_class == CIK || - (ws->info.chip_class == SI && + ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 || + (ws->info.chip_class == GFX6 && ws->info.drm_minor >= 45); - /* SI doesn't support unaligned loads. */ - ws->info.has_unaligned_shader_loads = ws->info.chip_class == CIK && + /* GFX6 doesn't support unaligned loads. */ + ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 && ws->info.drm_minor >= 50; ws->info.has_sparse_vm_mappings = false; - /* 2D tiling on CIK is supported since DRM 2.35.0 */ - ws->info.has_2d_tiling = ws->info.chip_class <= SI || ws->info.drm_minor >= 35; + /* 2D tiling on GFX7 is supported since DRM 2.35.0 */ + ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35; ws->info.has_read_registers_query = ws->info.drm_minor >= 42; ws->info.max_alignment = 1024*1024; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 266695f0c03..a5e93186bf5 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -89,7 +89,7 @@ setup_primitive_restart(struct gl_context *ctx, struct pipe_draw_info *info) _mesa_primitive_restart_index(ctx, index_size); /* Enable primitive restart only when the restart index can have an - * effect. This is required for correctness in radeonsi VI support. + * effect. This is required for correctness in radeonsi GFX8 support. * Other hardware may also benefit from taking a faster, non-restart path * when possible. */