Follow-up of !4319 using the same clang-format config.
Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5310>
+++ /dev/null
-[*.{c,h}]
-indent_style = tab
-indent_size = tab
* SOFTWARE.
*/
-#include "ac_gpu_info.h"
#include "ac_binary.h"
+#include "ac_gpu_info.h"
#include "util/u_math.h"
#include "util/u_memory.h"
#include <gelf.h>
#include <libelf.h>
-#include <stdio.h>
-
#include <sid.h>
+#include <stdio.h>
-#define SPILLED_SGPRS 0x4
-#define SPILLED_VGPRS 0x8
+#define SPILLED_SGPRS 0x4
+#define SPILLED_VGPRS 0x8
/* Parse configuration data in .AMDGPU.config section format. */
-void ac_parse_shader_binary_config(const char *data, size_t nbytes,
- unsigned wave_size,
- bool really_needs_scratch,
- const struct radeon_info *info,
- struct ac_shader_config *conf)
+void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
+ bool really_needs_scratch, const struct radeon_info *info,
+ struct ac_shader_config *conf)
{
- uint32_t scratch_size = 0;
+ uint32_t scratch_size = 0;
- for (size_t i = 0; i < nbytes; i += 8) {
- unsigned reg = util_le32_to_cpu(*(uint32_t*)(data + i));
- unsigned value = util_le32_to_cpu(*(uint32_t*)(data + i + 4));
- switch (reg) {
- case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
- case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
- case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
- case R_00B848_COMPUTE_PGM_RSRC1:
- case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
- if (wave_size == 32)
- conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
- else
- conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
+ for (size_t i = 0; i < nbytes; i += 8) {
+ unsigned reg = util_le32_to_cpu(*(uint32_t *)(data + i));
+ unsigned value = util_le32_to_cpu(*(uint32_t *)(data + i + 4));
+ switch (reg) {
+ case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
+ case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
+ case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
+ case R_00B848_COMPUTE_PGM_RSRC1:
+ case R_00B428_SPI_SHADER_PGM_RSRC1_HS:
+ if (wave_size == 32)
+ conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 8);
+ else
+ conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
- conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
- /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
- conf->float_mode = G_00B028_FLOAT_MODE(value);
- conf->rsrc1 = value;
- break;
- case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
- conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
- /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
- conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
- conf->rsrc2 = value;
- break;
- case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
- conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
- conf->rsrc2 = value;
- break;
- case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
- conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
- conf->rsrc2 = value;
- break;
- case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
- conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
- conf->rsrc2 = value;
- break;
- case R_00B84C_COMPUTE_PGM_RSRC2:
- conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
- conf->rsrc2 = value;
- break;
- case R_00B8A0_COMPUTE_PGM_RSRC3:
- conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
- conf->rsrc3 = value;
- break;
- case R_0286CC_SPI_PS_INPUT_ENA:
- conf->spi_ps_input_ena = value;
- break;
- case R_0286D0_SPI_PS_INPUT_ADDR:
- conf->spi_ps_input_addr = value;
- break;
- case R_0286E8_SPI_TMPRING_SIZE:
- case R_00B860_COMPUTE_TMPRING_SIZE:
- /* WAVESIZE is in units of 256 dwords. */
- scratch_size = value;
- break;
- case SPILLED_SGPRS:
- conf->spilled_sgprs = value;
- break;
- case SPILLED_VGPRS:
- conf->spilled_vgprs = value;
- break;
- default:
- {
- static bool printed;
+ conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
+ /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
+ conf->float_mode = G_00B028_FLOAT_MODE(value);
+ conf->rsrc1 = value;
+ break;
+ case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
+ conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
+ /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
+ conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
+ conf->rsrc2 = value;
+ break;
+ case R_00B12C_SPI_SHADER_PGM_RSRC2_VS:
+ conf->num_shared_vgprs = G_00B12C_SHARED_VGPR_CNT(value);
+ conf->rsrc2 = value;
+ break;
+ case R_00B22C_SPI_SHADER_PGM_RSRC2_GS:
+ conf->num_shared_vgprs = G_00B22C_SHARED_VGPR_CNT(value);
+ conf->rsrc2 = value;
+ break;
+ case R_00B42C_SPI_SHADER_PGM_RSRC2_HS:
+ conf->num_shared_vgprs = G_00B42C_SHARED_VGPR_CNT(value);
+ conf->rsrc2 = value;
+ break;
+ case R_00B84C_COMPUTE_PGM_RSRC2:
+ conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
+ conf->rsrc2 = value;
+ break;
+ case R_00B8A0_COMPUTE_PGM_RSRC3:
+ conf->num_shared_vgprs = G_00B8A0_SHARED_VGPR_CNT(value);
+ conf->rsrc3 = value;
+ break;
+ case R_0286CC_SPI_PS_INPUT_ENA:
+ conf->spi_ps_input_ena = value;
+ break;
+ case R_0286D0_SPI_PS_INPUT_ADDR:
+ conf->spi_ps_input_addr = value;
+ break;
+ case R_0286E8_SPI_TMPRING_SIZE:
+ case R_00B860_COMPUTE_TMPRING_SIZE:
+ /* WAVESIZE is in units of 256 dwords. */
+ scratch_size = value;
+ break;
+ case SPILLED_SGPRS:
+ conf->spilled_sgprs = value;
+ break;
+ case SPILLED_VGPRS:
+ conf->spilled_vgprs = value;
+ break;
+ default: {
+ static bool printed;
- if (!printed) {
- fprintf(stderr, "Warning: LLVM emitted unknown "
- "config register: 0x%x\n", reg);
- printed = true;
- }
- }
- break;
- }
- }
+ if (!printed) {
+ fprintf(stderr,
+ "Warning: LLVM emitted unknown "
+ "config register: 0x%x\n",
+ reg);
+ printed = true;
+ }
+ } break;
+ }
+ }
- if (!conf->spi_ps_input_addr)
- conf->spi_ps_input_addr = conf->spi_ps_input_ena;
+ if (!conf->spi_ps_input_addr)
+ conf->spi_ps_input_addr = conf->spi_ps_input_ena;
- if (really_needs_scratch) {
- /* sgprs spills aren't spilling */
- conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
- }
+ if (really_needs_scratch) {
+ /* sgprs spills aren't spilling */
+ conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
+ }
- /* GFX 10.3 internally:
- * - aligns VGPRS to 16 for Wave32 and 8 for Wave64
- * - aligns LDS to 1024
- *
- * For shader-db stats, set num_vgprs that the hw actually uses.
- */
- if (info->chip_class >= GFX10_3) {
- conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
- }
+ /* GFX 10.3 internally:
+ * - aligns VGPRS to 16 for Wave32 and 8 for Wave64
+ * - aligns LDS to 1024
+ *
+ * For shader-db stats, set num_vgprs that the hw actually uses.
+ */
+ if (info->chip_class >= GFX10_3) {
+ conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
+ }
- /* Enable 64-bit and 16-bit denormals, because there is no performance
- * cost.
- *
- * Don't enable denormals for 32-bit floats, because:
- * - denormals disable output modifiers
- * - denormals break v_mad_f32
- * - GFX6 & GFX7 would be very slow
- */
- conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
- conf->float_mode |= V_00B028_FP_64_DENORMS;
+ /* Enable 64-bit and 16-bit denormals, because there is no performance
+ * cost.
+ *
+ * Don't enable denormals for 32-bit floats, because:
+ * - denormals disable output modifiers
+ * - denormals break v_mad_f32
+ * - GFX6 & GFX7 would be very slow
+ */
+ conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
+ conf->float_mode |= V_00B028_FP_64_DENORMS;
}
#ifndef AC_BINARY_H
#define AC_BINARY_H
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
struct radeon_info;
struct ac_shader_config {
- unsigned num_sgprs;
- unsigned num_vgprs;
- unsigned num_shared_vgprs; /* GFX10: number of VGPRs shared between half-waves */
- unsigned spilled_sgprs;
- unsigned spilled_vgprs;
- unsigned lds_size; /* in HW allocation units; i.e 256 bytes on SI, 512 bytes on CI+ */
- unsigned spi_ps_input_ena;
- unsigned spi_ps_input_addr;
- unsigned float_mode;
- unsigned scratch_bytes_per_wave;
- unsigned rsrc1;
- unsigned rsrc2;
- unsigned rsrc3;
+ unsigned num_sgprs;
+ unsigned num_vgprs;
+ unsigned num_shared_vgprs; /* GFX10: number of VGPRs shared between half-waves */
+ unsigned spilled_sgprs;
+ unsigned spilled_vgprs;
+ unsigned lds_size; /* in HW allocation units; i.e 256 bytes on SI, 512 bytes on CI+ */
+ unsigned spi_ps_input_ena;
+ unsigned spi_ps_input_addr;
+ unsigned float_mode;
+ unsigned scratch_bytes_per_wave;
+ unsigned rsrc1;
+ unsigned rsrc2;
+ unsigned rsrc3;
};
-void ac_parse_shader_binary_config(const char *data, size_t nbytes,
- unsigned wave_size,
- bool really_needs_scratch,
- const struct radeon_info *info,
- struct ac_shader_config *conf);
+void ac_parse_shader_binary_config(const char *data, size_t nbytes, unsigned wave_size,
+ bool really_needs_scratch, const struct radeon_info *info,
+ struct ac_shader_config *conf);
#ifdef __cplusplus
}
#include "ac_debug.h"
#ifdef HAVE_VALGRIND
-#include <valgrind.h>
#include <memcheck.h>
+#include <valgrind.h>
#define VG(x) x
#else
#define VG(x) ((void)0)
#endif
-#include <inttypes.h>
-
#include "sid.h"
#include "sid_tables.h"
#include "util/u_math.h"
#include "util/u_string.h"
#include <assert.h>
+#include <inttypes.h>
/* Parsed IBs are difficult to read without colors. Use "less -R file" to
* read them, or use "aha -b -f file" to convert them to html.
*/
-#define COLOR_RESET "\033[0m"
-#define COLOR_RED "\033[31m"
-#define COLOR_GREEN "\033[1;32m"
-#define COLOR_YELLOW "\033[1;33m"
-#define COLOR_CYAN "\033[1;36m"
+#define COLOR_RESET "\033[0m"
+#define COLOR_RED "\033[31m"
+#define COLOR_GREEN "\033[1;32m"
+#define COLOR_YELLOW "\033[1;33m"
+#define COLOR_CYAN "\033[1;36m"
#define INDENT_PKT 8
struct ac_ib_parser {
- FILE *f;
- uint32_t *ib;
- unsigned num_dw;
- const int *trace_ids;
- unsigned trace_id_count;
- enum chip_class chip_class;
- ac_debug_addr_callback addr_callback;
- void *addr_callback_data;
-
- unsigned cur_dw;
+ FILE *f;
+ uint32_t *ib;
+ unsigned num_dw;
+ const int *trace_ids;
+ unsigned trace_id_count;
+ enum chip_class chip_class;
+ ac_debug_addr_callback addr_callback;
+ void *addr_callback_data;
+
+ unsigned cur_dw;
};
static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib);
static void print_spaces(FILE *f, unsigned num)
{
- fprintf(f, "%*s", num, "");
+ fprintf(f, "%*s", num, "");
}
static void print_value(FILE *file, uint32_t value, int bits)
{
- /* Guess if it's int or float */
- if (value <= (1 << 15)) {
- if (value <= 9)
- fprintf(file, "%u\n", value);
- else
- fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value);
- } else {
- float f = uif(value);
-
- if (fabs(f) < 100000 && f*10 == floor(f*10))
- fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value);
- else
- /* Don't print more leading zeros than there are bits. */
- fprintf(file, "0x%0*x\n", bits / 4, value);
- }
+ /* Guess if it's int or float */
+ if (value <= (1 << 15)) {
+ if (value <= 9)
+ fprintf(file, "%u\n", value);
+ else
+ fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value);
+ } else {
+ float f = uif(value);
+
+ if (fabs(f) < 100000 && f * 10 == floor(f * 10))
+ fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value);
+ else
+ /* Don't print more leading zeros than there are bits. */
+ fprintf(file, "0x%0*x\n", bits / 4, value);
+ }
}
-static void print_named_value(FILE *file, const char *name, uint32_t value,
- int bits)
+static void print_named_value(FILE *file, const char *name, uint32_t value, int bits)
{
- print_spaces(file, INDENT_PKT);
- fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name);
- print_value(file, value, bits);
+ print_spaces(file, INDENT_PKT);
+ fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name);
+ print_value(file, value, bits);
}
static const struct si_reg *find_register(enum chip_class chip_class, unsigned offset)
{
- const struct si_reg *table;
- unsigned table_size;
-
- switch (chip_class) {
- case GFX10_3:
- case GFX10:
- table = gfx10_reg_table;
- table_size = ARRAY_SIZE(gfx10_reg_table);
- break;
- case GFX9:
- table = gfx9_reg_table;
- table_size = ARRAY_SIZE(gfx9_reg_table);
- break;
- case GFX8:
- table = gfx8_reg_table;
- table_size = ARRAY_SIZE(gfx8_reg_table);
- break;
- case GFX7:
- table = gfx7_reg_table;
- table_size = ARRAY_SIZE(gfx7_reg_table);
- break;
- case GFX6:
- table = gfx6_reg_table;
- table_size = ARRAY_SIZE(gfx6_reg_table);
- break;
- default:
- return NULL;
- }
-
- for (unsigned i = 0; i < table_size; i++) {
- const struct si_reg *reg = &table[i];
-
- if (reg->offset == offset)
- return reg;
- }
-
- return NULL;
+ const struct si_reg *table;
+ unsigned table_size;
+
+ switch (chip_class) {
+ case GFX10_3:
+ case GFX10:
+ table = gfx10_reg_table;
+ table_size = ARRAY_SIZE(gfx10_reg_table);
+ break;
+ case GFX9:
+ table = gfx9_reg_table;
+ table_size = ARRAY_SIZE(gfx9_reg_table);
+ break;
+ case GFX8:
+ table = gfx8_reg_table;
+ table_size = ARRAY_SIZE(gfx8_reg_table);
+ break;
+ case GFX7:
+ table = gfx7_reg_table;
+ table_size = ARRAY_SIZE(gfx7_reg_table);
+ break;
+ case GFX6:
+ table = gfx6_reg_table;
+ table_size = ARRAY_SIZE(gfx6_reg_table);
+ break;
+ default:
+ return NULL;
+ }
+
+ for (unsigned i = 0; i < table_size; i++) {
+ const struct si_reg *reg = &table[i];
+
+ if (reg->offset == offset)
+ return reg;
+ }
+
+ return NULL;
}
const char *ac_get_register_name(enum chip_class chip_class, unsigned offset)
{
- const struct si_reg *reg = find_register(chip_class, offset);
+ const struct si_reg *reg = find_register(chip_class, offset);
- return reg ? sid_strings + reg->name_offset : "(no name)";
+ return reg ? sid_strings + reg->name_offset : "(no name)";
}
-void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset,
- uint32_t value, uint32_t field_mask)
+void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
+ uint32_t field_mask)
{
- const struct si_reg *reg = find_register(chip_class, offset);
-
- if (reg) {
- const char *reg_name = sid_strings + reg->name_offset;
- bool first_field = true;
-
- print_spaces(file, INDENT_PKT);
- fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ",
- reg_name);
-
- if (!reg->num_fields) {
- print_value(file, value, 32);
- return;
- }
-
- for (unsigned f = 0; f < reg->num_fields; f++) {
- const struct si_field *field = sid_fields_table + reg->fields_offset + f;
- const int *values_offsets = sid_strings_offsets + field->values_offset;
- uint32_t val = (value & field->mask) >>
- (ffs(field->mask) - 1);
-
- if (!(field->mask & field_mask))
- continue;
-
- /* Indent the field. */
- if (!first_field)
- print_spaces(file,
- INDENT_PKT + strlen(reg_name) + 4);
-
- /* Print the field. */
- fprintf(file, "%s = ", sid_strings + field->name_offset);
-
- if (val < field->num_values && values_offsets[val] >= 0)
- fprintf(file, "%s\n", sid_strings + values_offsets[val]);
- else
- print_value(file, val,
- util_bitcount(field->mask));
-
- first_field = false;
- }
- return;
- }
-
- print_spaces(file, INDENT_PKT);
- fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value);
+ const struct si_reg *reg = find_register(chip_class, offset);
+
+ if (reg) {
+ const char *reg_name = sid_strings + reg->name_offset;
+ bool first_field = true;
+
+ print_spaces(file, INDENT_PKT);
+ fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", reg_name);
+
+ if (!reg->num_fields) {
+ print_value(file, value, 32);
+ return;
+ }
+
+ for (unsigned f = 0; f < reg->num_fields; f++) {
+ const struct si_field *field = sid_fields_table + reg->fields_offset + f;
+ const int *values_offsets = sid_strings_offsets + field->values_offset;
+ uint32_t val = (value & field->mask) >> (ffs(field->mask) - 1);
+
+ if (!(field->mask & field_mask))
+ continue;
+
+ /* Indent the field. */
+ if (!first_field)
+ print_spaces(file, INDENT_PKT + strlen(reg_name) + 4);
+
+ /* Print the field. */
+ fprintf(file, "%s = ", sid_strings + field->name_offset);
+
+ if (val < field->num_values && values_offsets[val] >= 0)
+ fprintf(file, "%s\n", sid_strings + values_offsets[val]);
+ else
+ print_value(file, val, util_bitcount(field->mask));
+
+ first_field = false;
+ }
+ return;
+ }
+
+ print_spaces(file, INDENT_PKT);
+ fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value);
}
static uint32_t ac_ib_get(struct ac_ib_parser *ib)
{
- uint32_t v = 0;
+ uint32_t v = 0;
- if (ib->cur_dw < ib->num_dw) {
- v = ib->ib[ib->cur_dw];
+ if (ib->cur_dw < ib->num_dw) {
+ v = ib->ib[ib->cur_dw];
#ifdef HAVE_VALGRIND
- /* Help figure out where garbage data is written to IBs.
- *
- * Arguably we should do this already when the IBs are written,
- * see RADEON_VALGRIND. The problem is that client-requests to
- * Valgrind have an overhead even when Valgrind isn't running,
- * and radeon_emit is performance sensitive...
- */
- if (VALGRIND_CHECK_VALUE_IS_DEFINED(v))
- fprintf(ib->f, COLOR_RED "Valgrind: The next DWORD is garbage"
- COLOR_RESET "\n");
+ /* Help figure out where garbage data is written to IBs.
+ *
+ * Arguably we should do this already when the IBs are written,
+ * see RADEON_VALGRIND. The problem is that client-requests to
+ * Valgrind have an overhead even when Valgrind isn't running,
+ * and radeon_emit is performance sensitive...
+ */
+ if (VALGRIND_CHECK_VALUE_IS_DEFINED(v))
+ fprintf(ib->f, COLOR_RED "Valgrind: The next DWORD is garbage" COLOR_RESET "\n");
#endif
- fprintf(ib->f, "\n\035#%08x ", v);
- } else {
- fprintf(ib->f, "\n\035#???????? ");
- }
+ fprintf(ib->f, "\n\035#%08x ", v);
+ } else {
+ fprintf(ib->f, "\n\035#???????? ");
+ }
- ib->cur_dw++;
- return v;
+ ib->cur_dw++;
+ return v;
}
static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset,
- struct ac_ib_parser *ib)
+ struct ac_ib_parser *ib)
{
- unsigned reg_dw = ac_ib_get(ib);
- unsigned reg = ((reg_dw & 0xFFFF) << 2) + reg_offset;
- unsigned index = reg_dw >> 28;
- int i;
-
- if (index != 0) {
- print_spaces(f, INDENT_PKT);
- fprintf(f, "INDEX = %u\n", index);
- }
-
- for (i = 0; i < count; i++)
- ac_dump_reg(f, ib->chip_class, reg + i*4, ac_ib_get(ib), ~0);
+ unsigned reg_dw = ac_ib_get(ib);
+ unsigned reg = ((reg_dw & 0xFFFF) << 2) + reg_offset;
+ unsigned index = reg_dw >> 28;
+ int i;
+
+ if (index != 0) {
+ print_spaces(f, INDENT_PKT);
+ fprintf(f, "INDEX = %u\n", index);
+ }
+
+ for (i = 0; i < count; i++)
+ ac_dump_reg(f, ib->chip_class, reg + i * 4, ac_ib_get(ib), ~0);
}
static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
int *current_trace_id)
{
- unsigned first_dw = ib->cur_dw;
- int count = PKT_COUNT_G(header);
- unsigned op = PKT3_IT_OPCODE_G(header);
- const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : "";
- int i;
-
- /* Print the name first. */
- for (i = 0; i < ARRAY_SIZE(packet3_table); i++)
- if (packet3_table[i].op == op)
- break;
-
- if (i < ARRAY_SIZE(packet3_table)) {
- const char *name = sid_strings + packet3_table[i].name_offset;
-
- if (op == PKT3_SET_CONTEXT_REG ||
- op == PKT3_SET_CONFIG_REG ||
- op == PKT3_SET_UCONFIG_REG ||
- op == PKT3_SET_UCONFIG_REG_INDEX ||
- op == PKT3_SET_SH_REG)
- fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n",
- name, predicate);
- else
- fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n",
- name, predicate);
- } else
- fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n",
- op, predicate);
-
- /* Print the contents. */
- switch (op) {
- case PKT3_SET_CONTEXT_REG:
- ac_parse_set_reg_packet(f, count, SI_CONTEXT_REG_OFFSET, ib);
- break;
- case PKT3_SET_CONFIG_REG:
- ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib);
- break;
- case PKT3_SET_UCONFIG_REG:
- case PKT3_SET_UCONFIG_REG_INDEX:
- ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib);
- break;
- case PKT3_SET_SH_REG:
- ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
- break;
- case PKT3_ACQUIRE_MEM:
- ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
- print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
- if (ib->chip_class >= GFX10)
- ac_dump_reg(f, ib->chip_class, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
- break;
- case PKT3_SURFACE_SYNC:
- if (ib->chip_class >= GFX7) {
- ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
- } else {
- ac_dump_reg(f, ib->chip_class, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
- }
- print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
- break;
- case PKT3_EVENT_WRITE: {
- uint32_t event_dw = ac_ib_get(ib);
- ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
- S_028A90_EVENT_TYPE(~0));
- print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
- print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1);
- if (count > 0) {
- print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
- print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 16);
- }
- break;
- }
- case PKT3_EVENT_WRITE_EOP: {
- uint32_t event_dw = ac_ib_get(ib);
- ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
- S_028A90_EVENT_TYPE(~0));
- print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
- print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
- print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
- print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
- print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
- print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
- print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
- uint32_t addr_hi_dw = ac_ib_get(ib);
- print_named_value(f, "ADDRESS_HI", addr_hi_dw, 16);
- print_named_value(f, "DST_SEL", (addr_hi_dw >> 16) & 0x3, 2);
- print_named_value(f, "INT_SEL", (addr_hi_dw >> 24) & 0x7, 3);
- print_named_value(f, "DATA_SEL", addr_hi_dw >> 29, 3);
- print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
- print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
- break;
- }
- case PKT3_RELEASE_MEM: {
- uint32_t event_dw = ac_ib_get(ib);
- if (ib->chip_class >= GFX10) {
- ac_dump_reg(f, ib->chip_class, R_490_RELEASE_MEM_OP, event_dw, ~0u);
- } else {
- ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
- S_028A90_EVENT_TYPE(~0));
- print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
- print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
- print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
- print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
- print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
- print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
- print_named_value(f, "TC_NC_ACTION_ENA", (event_dw >> 19) & 0x1, 1);
- print_named_value(f, "TC_WC_ACTION_ENA", (event_dw >> 20) & 0x1, 1);
- print_named_value(f, "TC_MD_ACTION_ENA", (event_dw >> 21) & 0x1, 1);
- }
- uint32_t sel_dw = ac_ib_get(ib);
- print_named_value(f, "DST_SEL", (sel_dw >> 16) & 0x3, 2);
- print_named_value(f, "INT_SEL", (sel_dw >> 24) & 0x7, 3);
- print_named_value(f, "DATA_SEL", sel_dw >> 29, 3);
- print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
- print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
- print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
- print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
- print_named_value(f, "CTXID", ac_ib_get(ib), 32);
- break;
- }
- case PKT3_WAIT_REG_MEM:
- print_named_value(f, "OP", ac_ib_get(ib), 32);
- print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
- print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
- print_named_value(f, "REF", ac_ib_get(ib), 32);
- print_named_value(f, "MASK", ac_ib_get(ib), 32);
- print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
- break;
- case PKT3_DRAW_INDEX_AUTO:
- ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
- break;
- case PKT3_DRAW_INDEX_2:
- ac_dump_reg(f, ib->chip_class, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
- break;
- case PKT3_INDEX_TYPE:
- ac_dump_reg(f, ib->chip_class, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
- break;
- case PKT3_NUM_INSTANCES:
- ac_dump_reg(f, ib->chip_class, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
- break;
- case PKT3_WRITE_DATA:
- ac_dump_reg(f, ib->chip_class, R_370_CONTROL, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
- /* The payload is written automatically */
- break;
- case PKT3_CP_DMA:
- ac_dump_reg(f, ib->chip_class, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
- break;
- case PKT3_DMA_DATA:
- ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
- ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
- break;
- case PKT3_INDIRECT_BUFFER_SI:
- case PKT3_INDIRECT_BUFFER_CONST:
- case PKT3_INDIRECT_BUFFER_CIK: {
- uint32_t base_lo_dw = ac_ib_get(ib);
- ac_dump_reg(f, ib->chip_class, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
- uint32_t base_hi_dw = ac_ib_get(ib);
- ac_dump_reg(f, ib->chip_class, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
- uint32_t control_dw = ac_ib_get(ib);
- ac_dump_reg(f, ib->chip_class, R_3F2_IB_CONTROL, control_dw, ~0);
-
- if (!ib->addr_callback)
- break;
-
- uint64_t addr = ((uint64_t)base_hi_dw << 32) | base_lo_dw;
- void *data = ib->addr_callback(ib->addr_callback_data, addr);
- if (!data)
- break;
-
- if (G_3F2_CHAIN(control_dw)) {
- ib->ib = data;
- ib->num_dw = G_3F2_IB_SIZE(control_dw);
- ib->cur_dw = 0;
- return;
- }
-
- struct ac_ib_parser ib_recurse;
- memcpy(&ib_recurse, ib, sizeof(ib_recurse));
- ib_recurse.ib = data;
- ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw);
- ib_recurse.cur_dw = 0;
- if(ib_recurse.trace_id_count) {
- if (*current_trace_id == *ib->trace_ids) {
- ++ib_recurse.trace_ids;
- --ib_recurse.trace_id_count;
- } else {
- ib_recurse.trace_id_count = 0;
- }
- }
-
- fprintf(f, "\n\035>------------------ nested begin ------------------\n");
- ac_do_parse_ib(f, &ib_recurse);
- fprintf(f, "\n\035<------------------- nested end -------------------\n");
- break;
- }
- case PKT3_CLEAR_STATE:
- case PKT3_INCREMENT_DE_COUNTER:
- case PKT3_PFP_SYNC_ME:
- break;
- case PKT3_NOP:
- if (header == PKT3_NOP_PAD) {
- count = -1; /* One dword NOP. */
- } else if (count == 0 && ib->cur_dw < ib->num_dw &&
- AC_IS_TRACE_POINT(ib->ib[ib->cur_dw])) {
- unsigned packet_id = AC_GET_TRACE_POINT_ID(ib->ib[ib->cur_dw]);
-
- print_spaces(f, INDENT_PKT);
- fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
-
- if (!ib->trace_id_count)
- break; /* tracing was disabled */
-
- *current_trace_id = packet_id;
-
- print_spaces(f, INDENT_PKT);
- if (packet_id < *ib->trace_ids)
- fprintf(f, COLOR_RED
- "This trace point was reached by the CP."
- COLOR_RESET "\n");
- else if (packet_id == *ib->trace_ids)
- fprintf(f, COLOR_RED
- "!!!!! This is the last trace point that "
- "was reached by the CP !!!!!"
- COLOR_RESET "\n");
- else if (packet_id+1 == *ib->trace_ids)
- fprintf(f, COLOR_RED
- "!!!!! This is the first trace point that "
- "was NOT been reached by the CP !!!!!"
- COLOR_RESET "\n");
- else
- fprintf(f, COLOR_RED
- "!!!!! This trace point was NOT reached "
- "by the CP !!!!!"
- COLOR_RESET "\n");
- break;
- }
- break;
- }
-
- /* print additional dwords */
- while (ib->cur_dw <= first_dw + count)
- ac_ib_get(ib);
-
- if (ib->cur_dw > first_dw + count + 1)
- fprintf(f, COLOR_RED "\n!!!!! count in header too low !!!!!"
- COLOR_RESET "\n");
+ unsigned first_dw = ib->cur_dw;
+ int count = PKT_COUNT_G(header);
+ unsigned op = PKT3_IT_OPCODE_G(header);
+ const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : "";
+ int i;
+
+ /* Print the name first. */
+ for (i = 0; i < ARRAY_SIZE(packet3_table); i++)
+ if (packet3_table[i].op == op)
+ break;
+
+ if (i < ARRAY_SIZE(packet3_table)) {
+ const char *name = sid_strings + packet3_table[i].name_offset;
+
+ if (op == PKT3_SET_CONTEXT_REG || op == PKT3_SET_CONFIG_REG || op == PKT3_SET_UCONFIG_REG ||
+ op == PKT3_SET_UCONFIG_REG_INDEX || op == PKT3_SET_SH_REG)
+ fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n", name, predicate);
+ else
+ fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n", name, predicate);
+ } else
+ fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n", op, predicate);
+
+ /* Print the contents. */
+ switch (op) {
+ case PKT3_SET_CONTEXT_REG:
+ ac_parse_set_reg_packet(f, count, SI_CONTEXT_REG_OFFSET, ib);
+ break;
+ case PKT3_SET_CONFIG_REG:
+ ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib);
+ break;
+ case PKT3_SET_UCONFIG_REG:
+ case PKT3_SET_UCONFIG_REG_INDEX:
+ ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib);
+ break;
+ case PKT3_SET_SH_REG:
+ ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
+ break;
+ case PKT3_ACQUIRE_MEM:
+ ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
+ print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
+ if (ib->chip_class >= GFX10)
+ ac_dump_reg(f, ib->chip_class, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
+ break;
+ case PKT3_SURFACE_SYNC:
+ if (ib->chip_class >= GFX7) {
+ ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
+ } else {
+ ac_dump_reg(f, ib->chip_class, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
+ }
+ print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
+ break;
+ case PKT3_EVENT_WRITE: {
+ uint32_t event_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
+ S_028A90_EVENT_TYPE(~0));
+ print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
+ print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1);
+ if (count > 0) {
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 16);
+ }
+ break;
+ }
+ case PKT3_EVENT_WRITE_EOP: {
+ uint32_t event_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
+ S_028A90_EVENT_TYPE(~0));
+ print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
+ print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
+ print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
+ print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
+ print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
+ print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ uint32_t addr_hi_dw = ac_ib_get(ib);
+ print_named_value(f, "ADDRESS_HI", addr_hi_dw, 16);
+ print_named_value(f, "DST_SEL", (addr_hi_dw >> 16) & 0x3, 2);
+ print_named_value(f, "INT_SEL", (addr_hi_dw >> 24) & 0x7, 3);
+ print_named_value(f, "DATA_SEL", addr_hi_dw >> 29, 3);
+ print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
+ break;
+ }
+ case PKT3_RELEASE_MEM: {
+ uint32_t event_dw = ac_ib_get(ib);
+ if (ib->chip_class >= GFX10) {
+ ac_dump_reg(f, ib->chip_class, R_490_RELEASE_MEM_OP, event_dw, ~0u);
+ } else {
+ ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
+ S_028A90_EVENT_TYPE(~0));
+ print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
+ print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
+ print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
+ print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
+ print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
+ print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
+ print_named_value(f, "TC_NC_ACTION_ENA", (event_dw >> 19) & 0x1, 1);
+ print_named_value(f, "TC_WC_ACTION_ENA", (event_dw >> 20) & 0x1, 1);
+ print_named_value(f, "TC_MD_ACTION_ENA", (event_dw >> 21) & 0x1, 1);
+ }
+ uint32_t sel_dw = ac_ib_get(ib);
+ print_named_value(f, "DST_SEL", (sel_dw >> 16) & 0x3, 2);
+ print_named_value(f, "INT_SEL", (sel_dw >> 24) & 0x7, 3);
+ print_named_value(f, "DATA_SEL", sel_dw >> 29, 3);
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
+ print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
+ print_named_value(f, "CTXID", ac_ib_get(ib), 32);
+ break;
+ }
+ case PKT3_WAIT_REG_MEM:
+ print_named_value(f, "OP", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
+ print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
+ print_named_value(f, "REF", ac_ib_get(ib), 32);
+ print_named_value(f, "MASK", ac_ib_get(ib), 32);
+ print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
+ break;
+ case PKT3_DRAW_INDEX_AUTO:
+ ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
+ break;
+ case PKT3_DRAW_INDEX_2:
+ ac_dump_reg(f, ib->chip_class, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
+ break;
+ case PKT3_INDEX_TYPE:
+ ac_dump_reg(f, ib->chip_class, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
+ break;
+ case PKT3_NUM_INSTANCES:
+ ac_dump_reg(f, ib->chip_class, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
+ break;
+ case PKT3_WRITE_DATA:
+ ac_dump_reg(f, ib->chip_class, R_370_CONTROL, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
+ /* The payload is written automatically */
+ break;
+ case PKT3_CP_DMA:
+ ac_dump_reg(f, ib->chip_class, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
+ break;
+ case PKT3_DMA_DATA:
+ ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
+ ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
+ break;
+ case PKT3_INDIRECT_BUFFER_SI:
+ case PKT3_INDIRECT_BUFFER_CONST:
+ case PKT3_INDIRECT_BUFFER_CIK: {
+ uint32_t base_lo_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
+ uint32_t base_hi_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
+ uint32_t control_dw = ac_ib_get(ib);
+ ac_dump_reg(f, ib->chip_class, R_3F2_IB_CONTROL, control_dw, ~0);
+
+ if (!ib->addr_callback)
+ break;
+
+ uint64_t addr = ((uint64_t)base_hi_dw << 32) | base_lo_dw;
+ void *data = ib->addr_callback(ib->addr_callback_data, addr);
+ if (!data)
+ break;
+
+ if (G_3F2_CHAIN(control_dw)) {
+ ib->ib = data;
+ ib->num_dw = G_3F2_IB_SIZE(control_dw);
+ ib->cur_dw = 0;
+ return;
+ }
+
+ struct ac_ib_parser ib_recurse;
+ memcpy(&ib_recurse, ib, sizeof(ib_recurse));
+ ib_recurse.ib = data;
+ ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw);
+ ib_recurse.cur_dw = 0;
+ if (ib_recurse.trace_id_count) {
+ if (*current_trace_id == *ib->trace_ids) {
+ ++ib_recurse.trace_ids;
+ --ib_recurse.trace_id_count;
+ } else {
+ ib_recurse.trace_id_count = 0;
+ }
+ }
+
+ fprintf(f, "\n\035>------------------ nested begin ------------------\n");
+ ac_do_parse_ib(f, &ib_recurse);
+ fprintf(f, "\n\035<------------------- nested end -------------------\n");
+ break;
+ }
+ case PKT3_CLEAR_STATE:
+ case PKT3_INCREMENT_DE_COUNTER:
+ case PKT3_PFP_SYNC_ME:
+ break;
+ case PKT3_NOP:
+ if (header == PKT3_NOP_PAD) {
+ count = -1; /* One dword NOP. */
+ } else if (count == 0 && ib->cur_dw < ib->num_dw && AC_IS_TRACE_POINT(ib->ib[ib->cur_dw])) {
+ unsigned packet_id = AC_GET_TRACE_POINT_ID(ib->ib[ib->cur_dw]);
+
+ print_spaces(f, INDENT_PKT);
+ fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
+
+ if (!ib->trace_id_count)
+ break; /* tracing was disabled */
+
+ *current_trace_id = packet_id;
+
+ print_spaces(f, INDENT_PKT);
+ if (packet_id < *ib->trace_ids)
+ fprintf(f, COLOR_RED "This trace point was reached by the CP." COLOR_RESET "\n");
+ else if (packet_id == *ib->trace_ids)
+ fprintf(f, COLOR_RED "!!!!! This is the last trace point that "
+ "was reached by the CP !!!!!" COLOR_RESET "\n");
+ else if (packet_id + 1 == *ib->trace_ids)
+ fprintf(f, COLOR_RED "!!!!! This is the first trace point that "
+ "was NOT been reached by the CP !!!!!" COLOR_RESET "\n");
+ else
+ fprintf(f, COLOR_RED "!!!!! This trace point was NOT reached "
+ "by the CP !!!!!" COLOR_RESET "\n");
+ break;
+ }
+ break;
+ }
+
+ /* print additional dwords */
+ while (ib->cur_dw <= first_dw + count)
+ ac_ib_get(ib);
+
+ if (ib->cur_dw > first_dw + count + 1)
+ fprintf(f, COLOR_RED "\n!!!!! count in header too low !!!!!" COLOR_RESET "\n");
}
/**
*/
static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib)
{
- int current_trace_id = -1;
-
- while (ib->cur_dw < ib->num_dw) {
- uint32_t header = ac_ib_get(ib);
- unsigned type = PKT_TYPE_G(header);
-
- switch (type) {
- case 3:
- ac_parse_packet3(f, header, ib, ¤t_trace_id);
- break;
- case 2:
- /* type-2 nop */
- if (header == 0x80000000) {
- fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n");
- break;
- }
- /* fall through */
- default:
- fprintf(f, "Unknown packet type %i\n", type);
- break;
- }
- }
+ int current_trace_id = -1;
+
+ while (ib->cur_dw < ib->num_dw) {
+ uint32_t header = ac_ib_get(ib);
+ unsigned type = PKT_TYPE_G(header);
+
+ switch (type) {
+ case 3:
+ ac_parse_packet3(f, header, ib, ¤t_trace_id);
+ break;
+ case 2:
+ /* type-2 nop */
+ if (header == 0x80000000) {
+ fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n");
+ break;
+ }
+ /* fall through */
+ default:
+ fprintf(f, "Unknown packet type %i\n", type);
+ break;
+ }
+ }
}
static void format_ib_output(FILE *f, char *out)
{
- unsigned depth = 0;
+ unsigned depth = 0;
- for (;;) {
- char op = 0;
+ for (;;) {
+ char op = 0;
- if (out[0] == '\n' && out[1] == '\035')
- out++;
- if (out[0] == '\035') {
- op = out[1];
- out += 2;
- }
+ if (out[0] == '\n' && out[1] == '\035')
+ out++;
+ if (out[0] == '\035') {
+ op = out[1];
+ out += 2;
+ }
- if (op == '<')
- depth--;
+ if (op == '<')
+ depth--;
- unsigned indent = 4 * depth;
- if (op != '#')
- indent += 9;
+ unsigned indent = 4 * depth;
+ if (op != '#')
+ indent += 9;
- if (indent)
- print_spaces(f, indent);
+ if (indent)
+ print_spaces(f, indent);
- char *end = strchrnul(out, '\n');
- fwrite(out, end - out, 1, f);
- fputc('\n', f); /* always end with a new line */
- if (!*end)
- break;
+ char *end = strchrnul(out, '\n');
+ fwrite(out, end - out, 1, f);
+ fputc('\n', f); /* always end with a new line */
+ if (!*end)
+ break;
- out = end + 1;
+ out = end + 1;
- if (op == '>')
- depth++;
- }
+ if (op == '>')
+ depth++;
+ }
}
/**
* \param addr_callback_data user data for addr_callback
*/
void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_ids,
- unsigned trace_id_count, enum chip_class chip_class,
+ unsigned trace_id_count, enum chip_class chip_class,
ac_debug_addr_callback addr_callback, void *addr_callback_data)
{
- struct ac_ib_parser ib = {};
- ib.ib = ib_ptr;
- ib.num_dw = num_dw;
- ib.trace_ids = trace_ids;
- ib.trace_id_count = trace_id_count;
- ib.chip_class = chip_class;
- ib.addr_callback = addr_callback;
- ib.addr_callback_data = addr_callback_data;
-
- char *out;
- size_t outsize;
- FILE *memf = open_memstream(&out, &outsize);
- ib.f = memf;
- ac_do_parse_ib(memf, &ib);
- fclose(memf);
-
- if (out) {
- format_ib_output(f, out);
- free(out);
- }
-
- if (ib.cur_dw > ib.num_dw) {
- printf("\nPacket ends after the end of IB.\n");
- exit(1);
- }
+ struct ac_ib_parser ib = {};
+ ib.ib = ib_ptr;
+ ib.num_dw = num_dw;
+ ib.trace_ids = trace_ids;
+ ib.trace_id_count = trace_id_count;
+ ib.chip_class = chip_class;
+ ib.addr_callback = addr_callback;
+ ib.addr_callback_data = addr_callback_data;
+
+ char *out;
+ size_t outsize;
+ FILE *memf = open_memstream(&out, &outsize);
+ ib.f = memf;
+ ac_do_parse_ib(memf, &ib);
+ fclose(memf);
+
+ if (out) {
+ format_ib_output(f, out);
+ free(out);
+ }
+
+ if (ib.cur_dw > ib.num_dw) {
+ printf("\nPacket ends after the end of IB.\n");
+ exit(1);
+ }
}
/**
* be NULL.
* \param addr_callback_data user data for addr_callback
*/
-void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
- unsigned trace_id_count, const char *name,
- enum chip_class chip_class, ac_debug_addr_callback addr_callback,
- void *addr_callback_data)
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
+ const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
+ void *addr_callback_data)
{
- fprintf(f, "------------------ %s begin ------------------\n", name);
+ fprintf(f, "------------------ %s begin ------------------\n", name);
- ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count,
- chip_class, addr_callback, addr_callback_data);
+ ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count, chip_class, addr_callback,
+ addr_callback_data);
- fprintf(f, "------------------- %s end -------------------\n\n", name);
+ fprintf(f, "------------------- %s end -------------------\n\n", name);
}
/**
* \param old_dmesg_timestamp previous dmesg timestamp parsed at init time
* \param out_addr detected VM fault addr
*/
-bool ac_vm_fault_occured(enum chip_class chip_class,
- uint64_t *old_dmesg_timestamp, uint64_t *out_addr)
+bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
+ uint64_t *out_addr)
{
- char line[2000];
- unsigned sec, usec;
- int progress = 0;
- uint64_t dmesg_timestamp = 0;
- bool fault = false;
-
- FILE *p = popen("dmesg", "r");
- if (!p)
- return false;
-
- while (fgets(line, sizeof(line), p)) {
- char *msg, len;
-
- if (!line[0] || line[0] == '\n')
- continue;
-
- /* Get the timestamp. */
- if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
- static bool hit = false;
- if (!hit) {
- fprintf(stderr, "%s: failed to parse line '%s'\n",
- __func__, line);
- hit = true;
- }
- continue;
- }
- dmesg_timestamp = sec * 1000000ull + usec;
-
- /* If just updating the timestamp. */
- if (!out_addr)
- continue;
-
- /* Process messages only if the timestamp is newer. */
- if (dmesg_timestamp <= *old_dmesg_timestamp)
- continue;
-
- /* Only process the first VM fault. */
- if (fault)
- continue;
-
- /* Remove trailing \n */
- len = strlen(line);
- if (len && line[len-1] == '\n')
- line[len-1] = 0;
-
- /* Get the message part. */
- msg = strchr(line, ']');
- if (!msg)
- continue;
- msg++;
-
- const char *header_line, *addr_line_prefix, *addr_line_format;
-
- if (chip_class >= GFX9) {
- /* Match this:
- * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
- * ..: at page 0x0000000219f8f000 from 27
- * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
- */
- header_line = "VMC page fault";
- addr_line_prefix = " at page";
- addr_line_format = "%"PRIx64;
- } else {
- header_line = "GPU fault detected:";
- addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
- addr_line_format = "%"PRIX64;
- }
-
- switch (progress) {
- case 0:
- if (strstr(msg, header_line))
- progress = 1;
- break;
- case 1:
- msg = strstr(msg, addr_line_prefix);
- if (msg) {
- msg = strstr(msg, "0x");
- if (msg) {
- msg += 2;
- if (sscanf(msg, addr_line_format, out_addr) == 1)
- fault = true;
- }
- }
- progress = 0;
- break;
- default:
- progress = 0;
- }
- }
- pclose(p);
-
- if (dmesg_timestamp > *old_dmesg_timestamp)
- *old_dmesg_timestamp = dmesg_timestamp;
-
- return fault;
+ char line[2000];
+ unsigned sec, usec;
+ int progress = 0;
+ uint64_t dmesg_timestamp = 0;
+ bool fault = false;
+
+ FILE *p = popen("dmesg", "r");
+ if (!p)
+ return false;
+
+ while (fgets(line, sizeof(line), p)) {
+ char *msg, len;
+
+ if (!line[0] || line[0] == '\n')
+ continue;
+
+ /* Get the timestamp. */
+ if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
+ static bool hit = false;
+ if (!hit) {
+ fprintf(stderr, "%s: failed to parse line '%s'\n", __func__, line);
+ hit = true;
+ }
+ continue;
+ }
+ dmesg_timestamp = sec * 1000000ull + usec;
+
+ /* If just updating the timestamp. */
+ if (!out_addr)
+ continue;
+
+ /* Process messages only if the timestamp is newer. */
+ if (dmesg_timestamp <= *old_dmesg_timestamp)
+ continue;
+
+ /* Only process the first VM fault. */
+ if (fault)
+ continue;
+
+ /* Remove trailing \n */
+ len = strlen(line);
+ if (len && line[len - 1] == '\n')
+ line[len - 1] = 0;
+
+ /* Get the message part. */
+ msg = strchr(line, ']');
+ if (!msg)
+ continue;
+ msg++;
+
+ const char *header_line, *addr_line_prefix, *addr_line_format;
+
+ if (chip_class >= GFX9) {
+ /* Match this:
+ * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
+ * ..: at page 0x0000000219f8f000 from 27
+ * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
+ */
+ header_line = "VMC page fault";
+ addr_line_prefix = " at page";
+ addr_line_format = "%" PRIx64;
+ } else {
+ header_line = "GPU fault detected:";
+ addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
+ addr_line_format = "%" PRIX64;
+ }
+
+ switch (progress) {
+ case 0:
+ if (strstr(msg, header_line))
+ progress = 1;
+ break;
+ case 1:
+ msg = strstr(msg, addr_line_prefix);
+ if (msg) {
+ msg = strstr(msg, "0x");
+ if (msg) {
+ msg += 2;
+ if (sscanf(msg, addr_line_format, out_addr) == 1)
+ fault = true;
+ }
+ }
+ progress = 0;
+ break;
+ default:
+ progress = 0;
+ }
+ }
+ pclose(p);
+
+ if (dmesg_timestamp > *old_dmesg_timestamp)
+ *old_dmesg_timestamp = dmesg_timestamp;
+
+ return fault;
}
static int compare_wave(const void *p1, const void *p2)
{
- struct ac_wave_info *w1 = (struct ac_wave_info *)p1;
- struct ac_wave_info *w2 = (struct ac_wave_info *)p2;
-
- /* Sort waves according to PC and then SE, SH, CU, etc. */
- if (w1->pc < w2->pc)
- return -1;
- if (w1->pc > w2->pc)
- return 1;
- if (w1->se < w2->se)
- return -1;
- if (w1->se > w2->se)
- return 1;
- if (w1->sh < w2->sh)
- return -1;
- if (w1->sh > w2->sh)
- return 1;
- if (w1->cu < w2->cu)
- return -1;
- if (w1->cu > w2->cu)
- return 1;
- if (w1->simd < w2->simd)
- return -1;
- if (w1->simd > w2->simd)
- return 1;
- if (w1->wave < w2->wave)
- return -1;
- if (w1->wave > w2->wave)
- return 1;
-
- return 0;
+ struct ac_wave_info *w1 = (struct ac_wave_info *)p1;
+ struct ac_wave_info *w2 = (struct ac_wave_info *)p2;
+
+ /* Sort waves according to PC and then SE, SH, CU, etc. */
+ if (w1->pc < w2->pc)
+ return -1;
+ if (w1->pc > w2->pc)
+ return 1;
+ if (w1->se < w2->se)
+ return -1;
+ if (w1->se > w2->se)
+ return 1;
+ if (w1->sh < w2->sh)
+ return -1;
+ if (w1->sh > w2->sh)
+ return 1;
+ if (w1->cu < w2->cu)
+ return -1;
+ if (w1->cu > w2->cu)
+ return 1;
+ if (w1->simd < w2->simd)
+ return -1;
+ if (w1->simd > w2->simd)
+ return 1;
+ if (w1->wave < w2->wave)
+ return -1;
+ if (w1->wave > w2->wave)
+ return 1;
+
+ return 0;
}
/* Return wave information. "waves" should be a large enough array. */
unsigned ac_get_wave_info(enum chip_class chip_class,
- struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP])
+ struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP])
{
- char line[2000], cmd[128];
- unsigned num_waves = 0;
-
- sprintf(cmd, "umr -O halt_waves -wa %s", chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
-
- FILE *p = popen(cmd, "r");
- if (!p)
- return 0;
-
- if (!fgets(line, sizeof(line), p) ||
- strncmp(line, "SE", 2) != 0) {
- pclose(p);
- return 0;
- }
-
- while (fgets(line, sizeof(line), p)) {
- struct ac_wave_info *w;
- uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
-
- assert(num_waves < AC_MAX_WAVES_PER_CHIP);
- w = &waves[num_waves];
-
- if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
- &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
- &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
- &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
- w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
- w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
- w->matched = false;
- num_waves++;
- }
- }
-
- qsort(waves, num_waves, sizeof(struct ac_wave_info), compare_wave);
-
- pclose(p);
- return num_waves;
+ char line[2000], cmd[128];
+ unsigned num_waves = 0;
+
+ sprintf(cmd, "umr -O halt_waves -wa %s", chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
+
+ FILE *p = popen(cmd, "r");
+ if (!p)
+ return 0;
+
+ if (!fgets(line, sizeof(line), p) || strncmp(line, "SE", 2) != 0) {
+ pclose(p);
+ return 0;
+ }
+
+ while (fgets(line, sizeof(line), p)) {
+ struct ac_wave_info *w;
+ uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
+
+ assert(num_waves < AC_MAX_WAVES_PER_CHIP);
+ w = &waves[num_waves];
+
+ if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x", &w->se, &w->sh, &w->cu, &w->simd,
+ &w->wave, &w->status, &pc_hi, &pc_lo, &w->inst_dw0, &w->inst_dw1, &exec_hi,
+ &exec_lo) == 12) {
+ w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
+ w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
+ w->matched = false;
+ num_waves++;
+ }
+ }
+
+ qsort(waves, num_waves, sizeof(struct ac_wave_info), compare_wave);
+
+ pclose(p);
+ return num_waves;
}
#ifndef AC_DEBUG_H
#define AC_DEBUG_H
+#include "amd_family.h"
+
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
-#include <stdbool.h>
-
-#include "amd_family.h"
-#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff))
-#define AC_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
-#define AC_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
+#define AC_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id)&0xffff))
+#define AC_IS_TRACE_POINT(x) (((x)&0xcafe0000) == 0xcafe0000)
+#define AC_GET_TRACE_POINT_ID(x) ((x)&0xffff)
#define AC_MAX_WAVES_PER_CHIP (64 * 40)
#endif
struct ac_wave_info {
- unsigned se; /* shader engine */
- unsigned sh; /* shader array */
- unsigned cu; /* compute unit */
- unsigned simd;
- unsigned wave;
- uint32_t status;
- uint64_t pc; /* program counter */
- uint32_t inst_dw0;
- uint32_t inst_dw1;
- uint64_t exec;
- bool matched; /* whether the wave is used by a currently-bound shader */
+ unsigned se; /* shader engine */
+ unsigned sh; /* shader array */
+ unsigned cu; /* compute unit */
+ unsigned simd;
+ unsigned wave;
+ uint32_t status;
+ uint64_t pc; /* program counter */
+ uint32_t inst_dw0;
+ uint32_t inst_dw1;
+ uint64_t exec;
+ bool matched; /* whether the wave is used by a currently-bound shader */
};
typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
const char *ac_get_register_name(enum chip_class chip_class, unsigned offset);
-void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset,
- uint32_t value, uint32_t field_mask);
+void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, uint32_t value,
+ uint32_t field_mask);
void ac_parse_ib_chunk(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
- unsigned trace_id_count, enum chip_class chip_class,
- ac_debug_addr_callback addr_callback, void *addr_callback_data);
-void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
- unsigned trace_id_count, const char *name, enum chip_class chip_class,
- ac_debug_addr_callback addr_callback, void *addr_callback_data);
+ unsigned trace_id_count, enum chip_class chip_class,
+ ac_debug_addr_callback addr_callback, void *addr_callback_data);
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, unsigned trace_id_count,
+ const char *name, enum chip_class chip_class, ac_debug_addr_callback addr_callback,
+ void *addr_callback_data);
-bool ac_vm_fault_occured(enum chip_class chip_class,
- uint64_t *old_dmesg_timestamp, uint64_t *out_addr);
+bool ac_vm_fault_occured(enum chip_class chip_class, uint64_t *old_dmesg_timestamp,
+ uint64_t *out_addr);
unsigned ac_get_wave_info(enum chip_class chip_class,
- struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
+ struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]);
#ifdef __cplusplus
}
#ifndef AC_EXP_PARAM_H
#define AC_EXP_PARAM_H
-enum {
- /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
- AC_EXP_PARAM_OFFSET_0 = 0,
- AC_EXP_PARAM_OFFSET_31 = 31,
- /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
- AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
- AC_EXP_PARAM_DEFAULT_VAL_0001,
- AC_EXP_PARAM_DEFAULT_VAL_1110,
- AC_EXP_PARAM_DEFAULT_VAL_1111,
- AC_EXP_PARAM_UNDEFINED = 255,
+enum
+{
+ /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
+ AC_EXP_PARAM_OFFSET_0 = 0,
+ AC_EXP_PARAM_OFFSET_31 = 31,
+ /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
+ AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
+ AC_EXP_PARAM_DEFAULT_VAL_0001,
+ AC_EXP_PARAM_DEFAULT_VAL_1110,
+ AC_EXP_PARAM_DEFAULT_VAL_1111,
+ AC_EXP_PARAM_UNDEFINED = 255,
};
#endif
*/
#include "ac_gpu_info.h"
+
#include "addrlib/src/amdgpu_asic_addr.h"
+#include "drm-uapi/amdgpu_drm.h"
#include "sid.h"
-
#include "util/macros.h"
#include "util/u_math.h"
+#include <amdgpu.h>
#include <stdio.h>
-
#include <xf86drm.h>
-#include "drm-uapi/amdgpu_drm.h"
-#include <amdgpu.h>
-
-#define CIK_TILE_MODE_COLOR_2D 14
-
-#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f)
-#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0
-#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4
-#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5
-#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6
-#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7
-#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8
-#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9
-#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10
-#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11
-#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12
-#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13
-#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14
-#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16
-#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17
+#define CIK_TILE_MODE_COLOR_2D 14
+
+#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f)
+#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6
+#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13
+#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14
+#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16
+#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17
static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
{
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
case CIK__PIPE_CONFIG__ADDR_SURF_P2:
- return 2;
+ return 2;
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32:
- return 4;
+ return 4;
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32:
- return 8;
+ return 8;
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
- return 16;
+ return 16;
default:
- fprintf(stderr, "Invalid GFX7 pipe configuration, assuming P2\n");
- assert(!"this should never occur");
- return 2;
+ fprintf(stderr, "Invalid GFX7 pipe configuration, assuming P2\n");
+ assert(!"this should never occur");
+ return 2;
}
}
static bool has_syncobj(int fd)
{
- uint64_t value;
- if (drmGetCap(fd, DRM_CAP_SYNCOBJ, &value))
- return false;
- return value ? true : false;
+ uint64_t value;
+ if (drmGetCap(fd, DRM_CAP_SYNCOBJ, &value))
+ return false;
+ return value ? true : false;
}
static bool has_timeline_syncobj(int fd)
{
- uint64_t value;
- if (drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &value))
- return false;
- return value ? true : false;
+ uint64_t value;
+ if (drmGetCap(fd, DRM_CAP_SYNCOBJ_TIMELINE, &value))
+ return false;
+ return value ? true : false;
}
static uint64_t fix_vram_size(uint64_t size)
{
- /* The VRAM size is underreported, so we need to fix it, because
- * it's used to compute the number of memory modules for harvesting.
- */
- return align64(size, 256*1024*1024);
+ /* The VRAM size is underreported, so we need to fix it, because
+ * it's used to compute the number of memory modules for harvesting.
+ */
+ return align64(size, 256 * 1024 * 1024);
}
-static uint32_t
-get_l2_cache_size(enum radeon_family family)
+static uint32_t get_l2_cache_size(enum radeon_family family)
{
- switch (family) {
- case CHIP_KABINI:
- case CHIP_STONEY:
- return 128 * 1024;
- case CHIP_OLAND:
- case CHIP_HAINAN:
- case CHIP_ICELAND:
- return 256 * 1024;
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_BONAIRE:
- case CHIP_KAVERI:
- case CHIP_POLARIS12:
- case CHIP_CARRIZO:
- return 512 * 1024;
- case CHIP_TAHITI:
- case CHIP_TONGA:
- return 768 * 1024;
- break;
- case CHIP_HAWAII:
- case CHIP_POLARIS11:
- return 1024 * 1024;
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- return 2048 * 1024;
- break;
- default:
- return 4096 * 1024;
- }
+ switch (family) {
+ case CHIP_KABINI:
+ case CHIP_STONEY:
+ return 128 * 1024;
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ case CHIP_ICELAND:
+ return 256 * 1024;
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_BONAIRE:
+ case CHIP_KAVERI:
+ case CHIP_POLARIS12:
+ case CHIP_CARRIZO:
+ return 512 * 1024;
+ case CHIP_TAHITI:
+ case CHIP_TONGA:
+ return 768 * 1024;
+ break;
+ case CHIP_HAWAII:
+ case CHIP_POLARIS11:
+ return 1024 * 1024;
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ return 2048 * 1024;
+ break;
+ default:
+ return 4096 * 1024;
+ }
}
-bool ac_query_gpu_info(int fd, void *dev_p,
- struct radeon_info *info,
- struct amdgpu_gpu_info *amdinfo)
+bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
+ struct amdgpu_gpu_info *amdinfo)
{
- struct drm_amdgpu_info_device device_info = {};
- struct amdgpu_buffer_size_alignments alignment_info = {};
- struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
- struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_jpeg = {};
- struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
- struct amdgpu_gds_resource_info gds = {};
- uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
- int r, i, j;
- amdgpu_device_handle dev = dev_p;
- drmDevicePtr devinfo;
-
- /* Get PCI info. */
- r = drmGetDevice2(fd, 0, &devinfo);
- if (r) {
- fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
- return false;
- }
- info->pci_domain = devinfo->businfo.pci->domain;
- info->pci_bus = devinfo->businfo.pci->bus;
- info->pci_dev = devinfo->businfo.pci->dev;
- info->pci_func = devinfo->businfo.pci->func;
- drmFreeDevice(&devinfo);
-
- assert(info->drm_major == 3);
- info->is_amdgpu = true;
-
- /* Query hardware and driver information. */
- r = amdgpu_query_gpu_info(dev, amdinfo);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
- return false;
- }
-
- r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(device_info),
- &device_info);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_info(dev_info) failed.\n");
- return false;
- }
-
- r = amdgpu_query_buffer_size_alignment(dev, &alignment_info);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
- return false;
- }
-
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, &dma);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
- return false;
- }
-
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, &gfx);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(gfx) failed.\n");
- return false;
- }
-
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
- return false;
- }
-
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD, 0, &uvd);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
- return false;
- }
-
- if (info->drm_minor >= 17) {
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, &uvd_enc);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n");
- return false;
- }
- }
-
- if (info->drm_minor >= 17) {
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
- return false;
- }
- }
-
- if (info->drm_minor >= 17) {
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_ENC, 0, &vcn_enc);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_enc) failed.\n");
- return false;
- }
- }
-
- if (info->drm_minor >= 27) {
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_JPEG, 0, &vcn_jpeg);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_jpeg) failed.\n");
- return false;
- }
- }
-
- r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0,
- &info->me_fw_version,
- &info->me_fw_feature);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(me) failed.\n");
- return false;
- }
-
- r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0,
- &info->pfp_fw_version,
- &info->pfp_fw_feature);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n");
- return false;
- }
-
- r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_CE, 0, 0,
- &info->ce_fw_version,
- &info->ce_fw_feature);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(ce) failed.\n");
- return false;
- }
-
- r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0,
- &uvd_version, &uvd_feature);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(uvd) failed.\n");
- return false;
- }
-
- r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCE, 0, &vce);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
- return false;
- }
-
- r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0,
- &vce_version, &vce_feature);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
- return false;
- }
-
- r = amdgpu_query_sw_info(dev, amdgpu_sw_info_address32_hi, &info->address32_hi);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) failed.\n");
- return false;
- }
-
- r = amdgpu_query_gds_info(dev, &gds);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_gds_info failed.\n");
- return false;
- }
-
- if (info->drm_minor >= 9) {
- struct drm_amdgpu_memory_info meminfo = {};
-
- r = amdgpu_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_info(memory) failed.\n");
- return false;
- }
-
- /* Note: usable_heap_size values can be random and can't be relied on. */
- info->gart_size = meminfo.gtt.total_heap_size;
- info->vram_size = fix_vram_size(meminfo.vram.total_heap_size);
- info->vram_vis_size = meminfo.cpu_accessible_vram.total_heap_size;
- } else {
- /* This is a deprecated interface, which reports usable sizes
- * (total minus pinned), but the pinned size computation is
- * buggy, so the values returned from these functions can be
- * random.
- */
- struct amdgpu_heap_info vram, vram_vis, gtt;
-
- r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
- return false;
- }
-
- r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
- &vram_vis);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
- return false;
- }
-
- r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t);
- if (r) {
- fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
- return false;
- }
-
- info->gart_size = gtt.heap_size;
- info->vram_size = fix_vram_size(vram.heap_size);
- info->vram_vis_size = vram_vis.heap_size;
- }
-
- /* Set chip identification. */
- info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
- info->pci_rev_id = amdinfo->pci_rev_id;
- info->vce_harvest_config = amdinfo->vce_harvest_config;
-
-#define identify_chip2(asic, chipname) \
- if (ASICREV_IS(amdinfo->chip_external_rev, asic)) { \
- info->family = CHIP_##chipname; \
- info->name = #chipname; \
- }
+ struct drm_amdgpu_info_device device_info = {};
+ struct amdgpu_buffer_size_alignments alignment_info = {};
+ struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
+ struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_jpeg = {};
+ struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
+ struct amdgpu_gds_resource_info gds = {};
+ uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
+ int r, i, j;
+ amdgpu_device_handle dev = dev_p;
+ drmDevicePtr devinfo;
+
+ /* Get PCI info. */
+ r = drmGetDevice2(fd, 0, &devinfo);
+ if (r) {
+ fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
+ return false;
+ }
+ info->pci_domain = devinfo->businfo.pci->domain;
+ info->pci_bus = devinfo->businfo.pci->bus;
+ info->pci_dev = devinfo->businfo.pci->dev;
+ info->pci_func = devinfo->businfo.pci->func;
+ drmFreeDevice(&devinfo);
+
+ assert(info->drm_major == 3);
+ info->is_amdgpu = true;
+
+ /* Query hardware and driver information. */
+ r = amdgpu_query_gpu_info(dev, amdinfo);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_info(dev, AMDGPU_INFO_DEV_INFO, sizeof(device_info), &device_info);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_info(dev_info) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_buffer_size_alignment(dev, &alignment_info);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, &dma);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, &gfx);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(gfx) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD, 0, &uvd);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
+ return false;
+ }
+
+ if (info->drm_minor >= 17) {
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD_ENC, 0, &uvd_enc);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd_enc) failed.\n");
+ return false;
+ }
+ }
+
+ if (info->drm_minor >= 17) {
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
+ return false;
+ }
+ }
+
+ if (info->drm_minor >= 17) {
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_ENC, 0, &vcn_enc);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_enc) failed.\n");
+ return false;
+ }
+ }
+
+ if (info->drm_minor >= 27) {
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_JPEG, 0, &vcn_jpeg);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_jpeg) failed.\n");
+ return false;
+ }
+ }
+
+ r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, &info->me_fw_version,
+ &info->me_fw_feature);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(me) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0, &info->pfp_fw_version,
+ &info->pfp_fw_feature);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_CE, 0, 0, &info->ce_fw_version,
+ &info->ce_fw_feature);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(ce) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0, &uvd_version, &uvd_feature);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(uvd) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCE, 0, &vce);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0, &vce_version, &vce_feature);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_sw_info(dev, amdgpu_sw_info_address32_hi, &info->address32_hi);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_sw_info(address32_hi) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_gds_info(dev, &gds);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_gds_info failed.\n");
+ return false;
+ }
+
+ if (info->drm_minor >= 9) {
+ struct drm_amdgpu_memory_info meminfo = {};
+
+ r = amdgpu_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_info(memory) failed.\n");
+ return false;
+ }
+
+ /* Note: usable_heap_size values can be random and can't be relied on. */
+ info->gart_size = meminfo.gtt.total_heap_size;
+ info->vram_size = fix_vram_size(meminfo.vram.total_heap_size);
+ info->vram_vis_size = meminfo.cpu_accessible_vram.total_heap_size;
+ } else {
+ /* This is a deprecated interface, which reports usable sizes
+ * (total minus pinned), but the pinned size computation is
+ * buggy, so the values returned from these functions can be
+ * random.
+ */
+ struct amdgpu_heap_info vram, vram_vis, gtt;
+
+ r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ &vram_vis);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
+ return false;
+ }
+
+ r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t);
+ if (r) {
+ fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
+ return false;
+ }
+
+ info->gart_size = gtt.heap_size;
+ info->vram_size = fix_vram_size(vram.heap_size);
+ info->vram_vis_size = vram_vis.heap_size;
+ }
+
+ /* Set chip identification. */
+ info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
+ info->pci_rev_id = amdinfo->pci_rev_id;
+ info->vce_harvest_config = amdinfo->vce_harvest_config;
+
+#define identify_chip2(asic, chipname) \
+ if (ASICREV_IS(amdinfo->chip_external_rev, asic)) { \
+ info->family = CHIP_##chipname; \
+ info->name = #chipname; \
+ }
#define identify_chip(chipname) identify_chip2(chipname, chipname)
- switch (amdinfo->family_id) {
- case FAMILY_SI:
- identify_chip(TAHITI);
- identify_chip(PITCAIRN);
- identify_chip2(CAPEVERDE, VERDE);
- identify_chip(OLAND);
- identify_chip(HAINAN);
- break;
- case FAMILY_CI:
- identify_chip(BONAIRE);
- identify_chip(HAWAII);
- break;
- case FAMILY_KV:
- identify_chip2(SPECTRE, KAVERI);
- identify_chip2(SPOOKY, KAVERI);
- identify_chip2(KALINDI, KABINI);
- identify_chip2(GODAVARI, KABINI);
- break;
- case FAMILY_VI:
- identify_chip(ICELAND);
- identify_chip(TONGA);
- identify_chip(FIJI);
- identify_chip(POLARIS10);
- identify_chip(POLARIS11);
- identify_chip(POLARIS12);
- identify_chip(VEGAM);
- break;
- case FAMILY_CZ:
- identify_chip(CARRIZO);
- identify_chip(STONEY);
- break;
- case FAMILY_AI:
- identify_chip(VEGA10);
- identify_chip(VEGA12);
- identify_chip(VEGA20);
- identify_chip(ARCTURUS);
- break;
- case FAMILY_RV:
- identify_chip(RAVEN);
- identify_chip(RAVEN2);
- identify_chip(RENOIR);
- break;
- case FAMILY_NV:
- identify_chip(NAVI10);
- identify_chip(NAVI12);
- identify_chip(NAVI14);
- identify_chip(SIENNA_CICHLID);
- identify_chip(NAVY_FLOUNDER);
- break;
- }
-
- if (!info->name) {
- fprintf(stderr, "amdgpu: unknown (family_id, chip_external_rev): (%u, %u)\n",
- amdinfo->family_id, amdinfo->chip_external_rev);
- return false;
- }
-
- if (info->family >= CHIP_SIENNA_CICHLID)
- info->chip_class = GFX10_3;
- else if (info->family >= CHIP_NAVI10)
- info->chip_class = GFX10;
- else if (info->family >= CHIP_VEGA10)
- info->chip_class = GFX9;
- else if (info->family >= CHIP_TONGA)
- info->chip_class = GFX8;
- else if (info->family >= CHIP_BONAIRE)
- info->chip_class = GFX7;
- else if (info->family >= CHIP_TAHITI)
- info->chip_class = GFX6;
- else {
- fprintf(stderr, "amdgpu: Unknown family.\n");
- return false;
- }
-
- info->family_id = amdinfo->family_id;
- info->chip_external_rev = amdinfo->chip_external_rev;
- info->marketing_name = amdgpu_get_marketing_name(dev);
- info->is_pro_graphics = info->marketing_name &&
- (!strcmp(info->marketing_name, "Pro") ||
- !strcmp(info->marketing_name, "PRO") ||
- !strcmp(info->marketing_name, "Frontier"));
-
- /* Set which chips have dedicated VRAM. */
- info->has_dedicated_vram =
- !(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
-
- /* The kernel can split large buffers in VRAM but not in GTT, so large
- * allocations can fail or cause buffer movement failures in the kernel.
- */
- if (info->has_dedicated_vram)
- info->max_alloc_size = info->vram_size * 0.8;
- else
- info->max_alloc_size = info->gart_size * 0.7;
-
- info->vram_type = amdinfo->vram_type;
- info->vram_bit_width = amdinfo->vram_bit_width;
- info->ce_ram_size = amdinfo->ce_ram_size;
-
- info->l2_cache_size = get_l2_cache_size(info->family);
- info->l1_cache_size = 16384;
-
- /* Set which chips have uncached device memory. */
- info->has_l2_uncached = info->chip_class >= GFX9;
-
- /* Set hardware information. */
- info->gds_size = gds.gds_total_size;
- info->gds_gfx_partition_size = gds.gds_gfx_partition_size;
- /* convert the shader/memory clocks from KHz to MHz */
- info->max_shader_clock = amdinfo->max_engine_clk / 1000;
- info->max_memory_clock = amdinfo->max_memory_clk / 1000;
- info->num_tcc_blocks = device_info.num_tcc_blocks;
- info->max_se = amdinfo->num_shader_engines;
- info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine;
- info->has_hw_decode =
- (uvd.available_rings != 0) || (vcn_dec.available_rings != 0) ||
- (vcn_jpeg.available_rings != 0);
- info->uvd_fw_version =
- uvd.available_rings ? uvd_version : 0;
- info->vce_fw_version =
- vce.available_rings ? vce_version : 0;
- info->uvd_enc_supported =
- uvd_enc.available_rings ? true : false;
- info->has_userptr = true;
- info->has_syncobj = has_syncobj(fd);
- info->has_timeline_syncobj = has_timeline_syncobj(fd);
- info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
- info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
- info->has_ctx_priority = info->drm_minor >= 22;
- info->has_local_buffers = info->drm_minor >= 20;
- info->kernel_flushes_hdp_before_ib = true;
- info->htile_cmask_support_1d_tiling = true;
- info->si_TA_CS_BC_BASE_ADDR_allowed = true;
- info->has_bo_metadata = true;
- info->has_gpu_reset_status_query = true;
- info->has_eqaa_surface_allocator = true;
- info->has_format_bc1_through_bc7 = true;
- /* DRM 3.1.0 doesn't flush TC for GFX8 correctly. */
- info->kernel_flushes_tc_l2_after_ib = info->chip_class != GFX8 ||
- info->drm_minor >= 2;
- info->has_indirect_compute_dispatch = true;
- /* GFX6 doesn't support unaligned loads. */
- info->has_unaligned_shader_loads = info->chip_class != GFX6;
- /* Disable sparse mappings on GFX6 due to VM faults in CP DMA. Enable them once
- * these faults are mitigated in software.
- */
- info->has_sparse_vm_mappings = info->chip_class >= GFX7 && info->drm_minor >= 13;
- info->has_2d_tiling = true;
- info->has_read_registers_query = true;
- info->has_scheduled_fence_dependency = info->drm_minor >= 28;
- info->mid_command_buffer_preemption_enabled =
- amdinfo->ids_flags & AMDGPU_IDS_FLAGS_PREEMPTION;
-
- info->pa_sc_tile_steering_override = device_info.pa_sc_tile_steering_override;
- info->num_render_backends = amdinfo->rb_pipes;
- /* The value returned by the kernel driver was wrong. */
- if (info->family == CHIP_KAVERI)
- info->num_render_backends = 2;
-
- info->clock_crystal_freq = amdinfo->gpu_counter_freq;
- if (!info->clock_crystal_freq) {
- fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n");
- info->clock_crystal_freq = 1;
- }
- if (info->chip_class >= GFX10) {
- info->tcc_cache_line_size = 128;
-
- if (info->drm_minor >= 35) {
- info->tcc_harvested = device_info.tcc_disabled_mask != 0;
- } else {
- /* This is a hack, but it's all we can do without a kernel upgrade. */
- info->tcc_harvested =
- (info->vram_size / info->num_tcc_blocks) != 512*1024*1024;
- }
- } else {
- info->tcc_cache_line_size = 64;
- }
- info->gb_addr_config = amdinfo->gb_addr_cfg;
- if (info->chip_class >= GFX9) {
- info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
- info->pipe_interleave_bytes =
- 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(amdinfo->gb_addr_cfg);
- } else {
- info->num_tile_pipes = cik_get_num_tile_pipes(amdinfo);
- info->pipe_interleave_bytes =
- 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(amdinfo->gb_addr_cfg);
- }
- info->r600_has_virtual_memory = true;
-
- /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
- * 16KB makes some SIMDs unoccupied).
- *
- * LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
- */
- info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
- info->lds_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
-
- assert(util_is_power_of_two_or_zero(dma.available_rings + 1));
- assert(util_is_power_of_two_or_zero(compute.available_rings + 1));
-
- info->has_graphics = gfx.available_rings > 0;
- info->num_rings[RING_GFX] = util_bitcount(gfx.available_rings);
- info->num_rings[RING_COMPUTE] = util_bitcount(compute.available_rings);
- info->num_rings[RING_DMA] = util_bitcount(dma.available_rings);
- info->num_rings[RING_UVD] = util_bitcount(uvd.available_rings);
- info->num_rings[RING_VCE] = util_bitcount(vce.available_rings);
- info->num_rings[RING_UVD_ENC] = util_bitcount(uvd_enc.available_rings);
- info->num_rings[RING_VCN_DEC] = util_bitcount(vcn_dec.available_rings);
- info->num_rings[RING_VCN_ENC] = util_bitcount(vcn_enc.available_rings);
- info->num_rings[RING_VCN_JPEG] = util_bitcount(vcn_jpeg.available_rings);
-
- /* This is "align_mask" copied from the kernel, maximums of all IP versions. */
- info->ib_pad_dw_mask[RING_GFX] = 0xff;
- info->ib_pad_dw_mask[RING_COMPUTE] = 0xff;
- info->ib_pad_dw_mask[RING_DMA] = 0xf;
- info->ib_pad_dw_mask[RING_UVD] = 0xf;
- info->ib_pad_dw_mask[RING_VCE] = 0x3f;
- info->ib_pad_dw_mask[RING_UVD_ENC] = 0x3f;
- info->ib_pad_dw_mask[RING_VCN_DEC] = 0xf;
- info->ib_pad_dw_mask[RING_VCN_ENC] = 0x3f;
- info->ib_pad_dw_mask[RING_VCN_JPEG] = 0xf;
-
- /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
- * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
- * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.
- */
- info->has_clear_state = info->chip_class >= GFX7;
-
- info->has_distributed_tess = info->chip_class >= GFX10 ||
- (info->chip_class >= GFX8 && info->max_se >= 2);
-
- info->has_dcc_constant_encode = info->family == CHIP_RAVEN2 ||
- info->family == CHIP_RENOIR ||
- info->chip_class >= GFX10;
-
- info->has_rbplus = info->family == CHIP_STONEY ||
- info->chip_class >= GFX9;
-
- /* Some chips have RB+ registers, but don't support RB+. Those must
- * always disable it.
- */
- info->rbplus_allowed = info->has_rbplus &&
- (info->family == CHIP_STONEY ||
- info->family == CHIP_VEGA12 ||
- info->family == CHIP_RAVEN ||
- info->family == CHIP_RAVEN2 ||
- info->family == CHIP_RENOIR ||
- info->chip_class >= GFX10_3);
-
- info->has_out_of_order_rast = info->chip_class >= GFX8 &&
- info->chip_class <= GFX9 &&
- info->max_se >= 2;
-
- /* Whether chips support double rate packed math instructions. */
- info->has_packed_math_16bit = info->chip_class >= GFX9;
-
- /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
- info->has_load_ctx_reg_pkt = info->chip_class >= GFX9 ||
- (info->chip_class >= GFX8 &&
- info->me_fw_feature >= 41);
-
- info->cpdma_prefetch_writes_memory = info->chip_class <= GFX8;
-
- info->has_gfx9_scissor_bug = info->family == CHIP_VEGA10 ||
- info->family == CHIP_RAVEN;
-
- info->has_tc_compat_zrange_bug = info->chip_class >= GFX8 &&
- info->chip_class <= GFX9;
-
- info->has_msaa_sample_loc_bug = (info->family >= CHIP_POLARIS10 &&
- info->family <= CHIP_POLARIS12) ||
- info->family == CHIP_VEGA10 ||
- info->family == CHIP_RAVEN;
-
- info->has_ls_vgpr_init_bug = info->family == CHIP_VEGA10 ||
- info->family == CHIP_RAVEN;
-
- /* Get the number of good compute units. */
- info->num_good_compute_units = 0;
- for (i = 0; i < info->max_se; i++) {
- for (j = 0; j < info->max_sh_per_se; j++) {
- /*
- * The cu bitmap in amd gpu info structure is
- * 4x4 size array, and it's usually suitable for Vega
- * ASICs which has 4*2 SE/SH layout.
- * But for Arcturus, SE/SH layout is changed to 8*1.
- * To mostly reduce the impact, we make it compatible
- * with current bitmap array as below:
- * SE4,SH0 --> cu_bitmap[0][1]
- * SE5,SH0 --> cu_bitmap[1][1]
- * SE6,SH0 --> cu_bitmap[2][1]
- * SE7,SH0 --> cu_bitmap[3][1]
- */
- info->cu_mask[i%4][j+i/4] = amdinfo->cu_bitmap[i%4][j+i/4];
- info->num_good_compute_units +=
- util_bitcount(info->cu_mask[i][j]);
- }
- }
-
- /* On GFX10, only whole WGPs (in units of 2 CUs) can be disabled,
- * and max - min <= 2.
- */
- unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1;
- info->max_good_cu_per_sa = DIV_ROUND_UP(info->num_good_compute_units,
- (info->max_se * info->max_sh_per_se * cu_group)) * cu_group;
- info->min_good_cu_per_sa = (info->num_good_compute_units /
- (info->max_se * info->max_sh_per_se * cu_group)) * cu_group;
-
- memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
- sizeof(amdinfo->gb_tile_mode));
- info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask;
-
- memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode,
- sizeof(amdinfo->gb_macro_tile_mode));
-
- info->pte_fragment_size = alignment_info.size_local;
- info->gart_page_size = alignment_info.size_remote;
-
- if (info->chip_class == GFX6)
- info->gfx_ib_pad_with_type2 = true;
-
- unsigned ib_align = 0;
- ib_align = MAX2(ib_align, gfx.ib_start_alignment);
- ib_align = MAX2(ib_align, gfx.ib_size_alignment);
- ib_align = MAX2(ib_align, compute.ib_start_alignment);
- ib_align = MAX2(ib_align, compute.ib_size_alignment);
- ib_align = MAX2(ib_align, dma.ib_start_alignment);
- ib_align = MAX2(ib_align, dma.ib_size_alignment);
- ib_align = MAX2(ib_align, uvd.ib_start_alignment);
- ib_align = MAX2(ib_align, uvd.ib_size_alignment);
- ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment);
- ib_align = MAX2(ib_align, uvd_enc.ib_size_alignment);
- ib_align = MAX2(ib_align, vce.ib_start_alignment);
- ib_align = MAX2(ib_align, vce.ib_size_alignment);
- ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment);
- ib_align = MAX2(ib_align, vcn_dec.ib_size_alignment);
- ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment);
- ib_align = MAX2(ib_align, vcn_enc.ib_size_alignment);
- ib_align = MAX2(ib_align, vcn_jpeg.ib_start_alignment);
- ib_align = MAX2(ib_align, vcn_jpeg.ib_size_alignment);
- /* GFX10 and maybe GFX9 need this alignment for cache coherency. */
- if (info->chip_class >= GFX9)
- ib_align = MAX2(ib_align, info->tcc_cache_line_size);
- /* The kernel pads gfx and compute IBs to 256 dwords since:
- * 66f3b2d527154bd258a57c8815004b5964aa1cf5
- * Do the same.
- */
- ib_align = MAX2(ib_align, 1024);
- info->ib_alignment = ib_align;
-
- if ((info->drm_minor >= 31 &&
- (info->family == CHIP_RAVEN ||
- info->family == CHIP_RAVEN2 ||
- info->family == CHIP_RENOIR)) ||
- (info->drm_minor >= 34 &&
- (info->family == CHIP_NAVI12 ||
- info->family == CHIP_NAVI14)) ||
- info->chip_class >= GFX10_3) {
- if (info->num_render_backends == 1)
- info->use_display_dcc_unaligned = true;
- else
- info->use_display_dcc_with_retile_blit = true;
- }
-
- info->has_gds_ordered_append = info->chip_class >= GFX7 &&
- info->drm_minor >= 29;
-
- if (info->chip_class >= GFX9) {
- unsigned pc_lines = 0;
-
- switch (info->family) {
- case CHIP_VEGA10:
- case CHIP_VEGA12:
- case CHIP_VEGA20:
- pc_lines = 2048;
- break;
- case CHIP_RAVEN:
- case CHIP_RAVEN2:
- case CHIP_RENOIR:
- case CHIP_NAVI10:
- case CHIP_NAVI12:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
- pc_lines = 1024;
- break;
- case CHIP_NAVI14:
- pc_lines = 512;
- break;
- case CHIP_ARCTURUS:
- break;
- default:
- assert(0);
- }
-
- info->pc_lines = pc_lines;
-
- if (info->chip_class >= GFX10) {
- info->pbb_max_alloc_count = pc_lines / 3;
- } else {
- info->pbb_max_alloc_count =
- MIN2(128, pc_lines / (4 * info->max_se));
- }
- }
-
- /* The number of SDPs is the same as the number of TCCs for now. */
- if (info->chip_class >= GFX10)
- info->num_sdp_interfaces = device_info.num_tcc_blocks;
-
- if (info->chip_class >= GFX10_3)
- info->max_wave64_per_simd = 16;
- else if (info->chip_class == GFX10)
- info->max_wave64_per_simd = 20;
- else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
- info->max_wave64_per_simd = 8;
- else
- info->max_wave64_per_simd = 10;
-
- if (info->chip_class >= GFX10) {
- info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd;
- info->min_sgpr_alloc = 128;
- info->sgpr_alloc_granularity = 128;
- /* Don't use late alloc on small chips. */
- info->use_late_alloc = info->num_render_backends > 4;
- } else if (info->chip_class >= GFX8) {
- info->num_physical_sgprs_per_simd = 800;
- info->min_sgpr_alloc = 16;
- info->sgpr_alloc_granularity = 16;
- info->use_late_alloc = true;
- } else {
- info->num_physical_sgprs_per_simd = 512;
- info->min_sgpr_alloc = 8;
- info->sgpr_alloc_granularity = 8;
- /* Potential hang on Kabini: */
- info->use_late_alloc = info->family != CHIP_KABINI;
- }
-
- info->max_sgpr_alloc = info->family == CHIP_TONGA ||
- info->family == CHIP_ICELAND ? 96 : 104;
-
- info->min_wave64_vgpr_alloc = 4;
- info->max_vgpr_alloc = 256;
- info->wave64_vgpr_alloc_granularity = 4;
-
- info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
- info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
-
- return true;
+ switch (amdinfo->family_id) {
+ case FAMILY_SI:
+ identify_chip(TAHITI);
+ identify_chip(PITCAIRN);
+ identify_chip2(CAPEVERDE, VERDE);
+ identify_chip(OLAND);
+ identify_chip(HAINAN);
+ break;
+ case FAMILY_CI:
+ identify_chip(BONAIRE);
+ identify_chip(HAWAII);
+ break;
+ case FAMILY_KV:
+ identify_chip2(SPECTRE, KAVERI);
+ identify_chip2(SPOOKY, KAVERI);
+ identify_chip2(KALINDI, KABINI);
+ identify_chip2(GODAVARI, KABINI);
+ break;
+ case FAMILY_VI:
+ identify_chip(ICELAND);
+ identify_chip(TONGA);
+ identify_chip(FIJI);
+ identify_chip(POLARIS10);
+ identify_chip(POLARIS11);
+ identify_chip(POLARIS12);
+ identify_chip(VEGAM);
+ break;
+ case FAMILY_CZ:
+ identify_chip(CARRIZO);
+ identify_chip(STONEY);
+ break;
+ case FAMILY_AI:
+ identify_chip(VEGA10);
+ identify_chip(VEGA12);
+ identify_chip(VEGA20);
+ identify_chip(ARCTURUS);
+ break;
+ case FAMILY_RV:
+ identify_chip(RAVEN);
+ identify_chip(RAVEN2);
+ identify_chip(RENOIR);
+ break;
+ case FAMILY_NV:
+ identify_chip(NAVI10);
+ identify_chip(NAVI12);
+ identify_chip(NAVI14);
+ identify_chip(SIENNA_CICHLID);
+ identify_chip(NAVY_FLOUNDER);
+ break;
+ }
+
+ if (!info->name) {
+ fprintf(stderr, "amdgpu: unknown (family_id, chip_external_rev): (%u, %u)\n",
+ amdinfo->family_id, amdinfo->chip_external_rev);
+ return false;
+ }
+
+ if (info->family >= CHIP_SIENNA_CICHLID)
+ info->chip_class = GFX10_3;
+ else if (info->family >= CHIP_NAVI10)
+ info->chip_class = GFX10;
+ else if (info->family >= CHIP_VEGA10)
+ info->chip_class = GFX9;
+ else if (info->family >= CHIP_TONGA)
+ info->chip_class = GFX8;
+ else if (info->family >= CHIP_BONAIRE)
+ info->chip_class = GFX7;
+ else if (info->family >= CHIP_TAHITI)
+ info->chip_class = GFX6;
+ else {
+ fprintf(stderr, "amdgpu: Unknown family.\n");
+ return false;
+ }
+
+ info->family_id = amdinfo->family_id;
+ info->chip_external_rev = amdinfo->chip_external_rev;
+ info->marketing_name = amdgpu_get_marketing_name(dev);
+ info->is_pro_graphics = info->marketing_name && (!strcmp(info->marketing_name, "Pro") ||
+ !strcmp(info->marketing_name, "PRO") ||
+ !strcmp(info->marketing_name, "Frontier"));
+
+ /* Set which chips have dedicated VRAM. */
+ info->has_dedicated_vram = !(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
+
+ /* The kernel can split large buffers in VRAM but not in GTT, so large
+ * allocations can fail or cause buffer movement failures in the kernel.
+ */
+ if (info->has_dedicated_vram)
+ info->max_alloc_size = info->vram_size * 0.8;
+ else
+ info->max_alloc_size = info->gart_size * 0.7;
+
+ info->vram_type = amdinfo->vram_type;
+ info->vram_bit_width = amdinfo->vram_bit_width;
+ info->ce_ram_size = amdinfo->ce_ram_size;
+
+ info->l2_cache_size = get_l2_cache_size(info->family);
+ info->l1_cache_size = 16384;
+
+ /* Set which chips have uncached device memory. */
+ info->has_l2_uncached = info->chip_class >= GFX9;
+
+ /* Set hardware information. */
+ info->gds_size = gds.gds_total_size;
+ info->gds_gfx_partition_size = gds.gds_gfx_partition_size;
+ /* convert the shader/memory clocks from KHz to MHz */
+ info->max_shader_clock = amdinfo->max_engine_clk / 1000;
+ info->max_memory_clock = amdinfo->max_memory_clk / 1000;
+ info->num_tcc_blocks = device_info.num_tcc_blocks;
+ info->max_se = amdinfo->num_shader_engines;
+ info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine;
+ info->has_hw_decode = (uvd.available_rings != 0) || (vcn_dec.available_rings != 0) ||
+ (vcn_jpeg.available_rings != 0);
+ info->uvd_fw_version = uvd.available_rings ? uvd_version : 0;
+ info->vce_fw_version = vce.available_rings ? vce_version : 0;
+ info->uvd_enc_supported = uvd_enc.available_rings ? true : false;
+ info->has_userptr = true;
+ info->has_syncobj = has_syncobj(fd);
+ info->has_timeline_syncobj = has_timeline_syncobj(fd);
+ info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
+ info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
+ info->has_ctx_priority = info->drm_minor >= 22;
+ info->has_local_buffers = info->drm_minor >= 20;
+ info->kernel_flushes_hdp_before_ib = true;
+ info->htile_cmask_support_1d_tiling = true;
+ info->si_TA_CS_BC_BASE_ADDR_allowed = true;
+ info->has_bo_metadata = true;
+ info->has_gpu_reset_status_query = true;
+ info->has_eqaa_surface_allocator = true;
+ info->has_format_bc1_through_bc7 = true;
+ /* DRM 3.1.0 doesn't flush TC for GFX8 correctly. */
+ info->kernel_flushes_tc_l2_after_ib = info->chip_class != GFX8 || info->drm_minor >= 2;
+ info->has_indirect_compute_dispatch = true;
+ /* GFX6 doesn't support unaligned loads. */
+ info->has_unaligned_shader_loads = info->chip_class != GFX6;
+ /* Disable sparse mappings on GFX6 due to VM faults in CP DMA. Enable them once
+ * these faults are mitigated in software.
+ */
+ info->has_sparse_vm_mappings = info->chip_class >= GFX7 && info->drm_minor >= 13;
+ info->has_2d_tiling = true;
+ info->has_read_registers_query = true;
+ info->has_scheduled_fence_dependency = info->drm_minor >= 28;
+ info->mid_command_buffer_preemption_enabled = amdinfo->ids_flags & AMDGPU_IDS_FLAGS_PREEMPTION;
+
+ info->pa_sc_tile_steering_override = device_info.pa_sc_tile_steering_override;
+ info->num_render_backends = amdinfo->rb_pipes;
+ /* The value returned by the kernel driver was wrong. */
+ if (info->family == CHIP_KAVERI)
+ info->num_render_backends = 2;
+
+ info->clock_crystal_freq = amdinfo->gpu_counter_freq;
+ if (!info->clock_crystal_freq) {
+ fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n");
+ info->clock_crystal_freq = 1;
+ }
+ if (info->chip_class >= GFX10) {
+ info->tcc_cache_line_size = 128;
+
+ if (info->drm_minor >= 35) {
+ info->tcc_harvested = device_info.tcc_disabled_mask != 0;
+ } else {
+ /* This is a hack, but it's all we can do without a kernel upgrade. */
+ info->tcc_harvested = (info->vram_size / info->num_tcc_blocks) != 512 * 1024 * 1024;
+ }
+ } else {
+ info->tcc_cache_line_size = 64;
+ }
+ info->gb_addr_config = amdinfo->gb_addr_cfg;
+ if (info->chip_class >= GFX9) {
+ info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
+ info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(amdinfo->gb_addr_cfg);
+ } else {
+ info->num_tile_pipes = cik_get_num_tile_pipes(amdinfo);
+ info->pipe_interleave_bytes = 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(amdinfo->gb_addr_cfg);
+ }
+ info->r600_has_virtual_memory = true;
+
+ /* LDS is 64KB per CU (4 SIMDs), which is 16KB per SIMD (usage above
+ * 16KB makes some SIMDs unoccupied).
+ *
+ * LDS is 128KB in WGP mode and 64KB in CU mode. Assume the WGP mode is used.
+ */
+ info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
+ info->lds_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
+
+ assert(util_is_power_of_two_or_zero(dma.available_rings + 1));
+ assert(util_is_power_of_two_or_zero(compute.available_rings + 1));
+
+ info->has_graphics = gfx.available_rings > 0;
+ info->num_rings[RING_GFX] = util_bitcount(gfx.available_rings);
+ info->num_rings[RING_COMPUTE] = util_bitcount(compute.available_rings);
+ info->num_rings[RING_DMA] = util_bitcount(dma.available_rings);
+ info->num_rings[RING_UVD] = util_bitcount(uvd.available_rings);
+ info->num_rings[RING_VCE] = util_bitcount(vce.available_rings);
+ info->num_rings[RING_UVD_ENC] = util_bitcount(uvd_enc.available_rings);
+ info->num_rings[RING_VCN_DEC] = util_bitcount(vcn_dec.available_rings);
+ info->num_rings[RING_VCN_ENC] = util_bitcount(vcn_enc.available_rings);
+ info->num_rings[RING_VCN_JPEG] = util_bitcount(vcn_jpeg.available_rings);
+
+ /* This is "align_mask" copied from the kernel, maximums of all IP versions. */
+ info->ib_pad_dw_mask[RING_GFX] = 0xff;
+ info->ib_pad_dw_mask[RING_COMPUTE] = 0xff;
+ info->ib_pad_dw_mask[RING_DMA] = 0xf;
+ info->ib_pad_dw_mask[RING_UVD] = 0xf;
+ info->ib_pad_dw_mask[RING_VCE] = 0x3f;
+ info->ib_pad_dw_mask[RING_UVD_ENC] = 0x3f;
+ info->ib_pad_dw_mask[RING_VCN_DEC] = 0xf;
+ info->ib_pad_dw_mask[RING_VCN_ENC] = 0x3f;
+ info->ib_pad_dw_mask[RING_VCN_JPEG] = 0xf;
+
+ /* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
+ * on GFX6. Some CLEAR_STATE cause asic hang on radeon kernel, etc.
+ * SPI_VS_OUT_CONFIG. So only enable GFX7 CLEAR_STATE on amdgpu kernel.
+ */
+ info->has_clear_state = info->chip_class >= GFX7;
+
+ info->has_distributed_tess =
+ info->chip_class >= GFX10 || (info->chip_class >= GFX8 && info->max_se >= 2);
+
+ info->has_dcc_constant_encode =
+ info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->chip_class >= GFX10;
+
+ info->has_rbplus = info->family == CHIP_STONEY || info->chip_class >= GFX9;
+
+ /* Some chips have RB+ registers, but don't support RB+. Those must
+ * always disable it.
+ */
+ info->rbplus_allowed =
+ info->has_rbplus &&
+ (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN ||
+ info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->chip_class >= GFX10_3);
+
+ info->has_out_of_order_rast =
+ info->chip_class >= GFX8 && info->chip_class <= GFX9 && info->max_se >= 2;
+
+ /* Whether chips support double rate packed math instructions. */
+ info->has_packed_math_16bit = info->chip_class >= GFX9;
+
+ /* TODO: Figure out how to use LOAD_CONTEXT_REG on GFX6-GFX7. */
+ info->has_load_ctx_reg_pkt =
+ info->chip_class >= GFX9 || (info->chip_class >= GFX8 && info->me_fw_feature >= 41);
+
+ info->cpdma_prefetch_writes_memory = info->chip_class <= GFX8;
+
+ info->has_gfx9_scissor_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
+
+ info->has_tc_compat_zrange_bug = info->chip_class >= GFX8 && info->chip_class <= GFX9;
+
+ info->has_msaa_sample_loc_bug =
+ (info->family >= CHIP_POLARIS10 && info->family <= CHIP_POLARIS12) ||
+ info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
+
+ info->has_ls_vgpr_init_bug = info->family == CHIP_VEGA10 || info->family == CHIP_RAVEN;
+
+ /* Get the number of good compute units. */
+ info->num_good_compute_units = 0;
+ for (i = 0; i < info->max_se; i++) {
+ for (j = 0; j < info->max_sh_per_se; j++) {
+ /*
+ * The cu bitmap in amd gpu info structure is
+ * 4x4 size array, and it's usually suitable for Vega
+ * ASICs which has 4*2 SE/SH layout.
+ * But for Arcturus, SE/SH layout is changed to 8*1.
+ * To mostly reduce the impact, we make it compatible
+ * with current bitmap array as below:
+ * SE4,SH0 --> cu_bitmap[0][1]
+ * SE5,SH0 --> cu_bitmap[1][1]
+ * SE6,SH0 --> cu_bitmap[2][1]
+ * SE7,SH0 --> cu_bitmap[3][1]
+ */
+ info->cu_mask[i % 4][j + i / 4] = amdinfo->cu_bitmap[i % 4][j + i / 4];
+ info->num_good_compute_units += util_bitcount(info->cu_mask[i][j]);
+ }
+ }
+
+ /* On GFX10, only whole WGPs (in units of 2 CUs) can be disabled,
+ * and max - min <= 2.
+ */
+ unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1;
+ info->max_good_cu_per_sa =
+ DIV_ROUND_UP(info->num_good_compute_units, (info->max_se * info->max_sh_per_se * cu_group)) *
+ cu_group;
+ info->min_good_cu_per_sa =
+ (info->num_good_compute_units / (info->max_se * info->max_sh_per_se * cu_group)) * cu_group;
+
+ memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode, sizeof(amdinfo->gb_tile_mode));
+ info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask;
+
+ memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode,
+ sizeof(amdinfo->gb_macro_tile_mode));
+
+ info->pte_fragment_size = alignment_info.size_local;
+ info->gart_page_size = alignment_info.size_remote;
+
+ if (info->chip_class == GFX6)
+ info->gfx_ib_pad_with_type2 = true;
+
+ unsigned ib_align = 0;
+ ib_align = MAX2(ib_align, gfx.ib_start_alignment);
+ ib_align = MAX2(ib_align, gfx.ib_size_alignment);
+ ib_align = MAX2(ib_align, compute.ib_start_alignment);
+ ib_align = MAX2(ib_align, compute.ib_size_alignment);
+ ib_align = MAX2(ib_align, dma.ib_start_alignment);
+ ib_align = MAX2(ib_align, dma.ib_size_alignment);
+ ib_align = MAX2(ib_align, uvd.ib_start_alignment);
+ ib_align = MAX2(ib_align, uvd.ib_size_alignment);
+ ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment);
+ ib_align = MAX2(ib_align, uvd_enc.ib_size_alignment);
+ ib_align = MAX2(ib_align, vce.ib_start_alignment);
+ ib_align = MAX2(ib_align, vce.ib_size_alignment);
+ ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment);
+ ib_align = MAX2(ib_align, vcn_dec.ib_size_alignment);
+ ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment);
+ ib_align = MAX2(ib_align, vcn_enc.ib_size_alignment);
+ ib_align = MAX2(ib_align, vcn_jpeg.ib_start_alignment);
+ ib_align = MAX2(ib_align, vcn_jpeg.ib_size_alignment);
+ /* GFX10 and maybe GFX9 need this alignment for cache coherency. */
+ if (info->chip_class >= GFX9)
+ ib_align = MAX2(ib_align, info->tcc_cache_line_size);
+ /* The kernel pads gfx and compute IBs to 256 dwords since:
+ * 66f3b2d527154bd258a57c8815004b5964aa1cf5
+ * Do the same.
+ */
+ ib_align = MAX2(ib_align, 1024);
+ info->ib_alignment = ib_align;
+
+ if ((info->drm_minor >= 31 && (info->family == CHIP_RAVEN || info->family == CHIP_RAVEN2 ||
+ info->family == CHIP_RENOIR)) ||
+ (info->drm_minor >= 34 && (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14)) ||
+ info->chip_class >= GFX10_3) {
+ if (info->num_render_backends == 1)
+ info->use_display_dcc_unaligned = true;
+ else
+ info->use_display_dcc_with_retile_blit = true;
+ }
+
+ info->has_gds_ordered_append = info->chip_class >= GFX7 && info->drm_minor >= 29;
+
+ if (info->chip_class >= GFX9) {
+ unsigned pc_lines = 0;
+
+ switch (info->family) {
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_VEGA20:
+ pc_lines = 2048;
+ break;
+ case CHIP_RAVEN:
+ case CHIP_RAVEN2:
+ case CHIP_RENOIR:
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_SIENNA_CICHLID:
+ case CHIP_NAVY_FLOUNDER:
+ pc_lines = 1024;
+ break;
+ case CHIP_NAVI14:
+ pc_lines = 512;
+ break;
+ case CHIP_ARCTURUS:
+ break;
+ default:
+ assert(0);
+ }
+
+ info->pc_lines = pc_lines;
+
+ if (info->chip_class >= GFX10) {
+ info->pbb_max_alloc_count = pc_lines / 3;
+ } else {
+ info->pbb_max_alloc_count = MIN2(128, pc_lines / (4 * info->max_se));
+ }
+ }
+
+ /* The number of SDPs is the same as the number of TCCs for now. */
+ if (info->chip_class >= GFX10)
+ info->num_sdp_interfaces = device_info.num_tcc_blocks;
+
+ if (info->chip_class >= GFX10_3)
+ info->max_wave64_per_simd = 16;
+ else if (info->chip_class == GFX10)
+ info->max_wave64_per_simd = 20;
+ else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
+ info->max_wave64_per_simd = 8;
+ else
+ info->max_wave64_per_simd = 10;
+
+ if (info->chip_class >= GFX10) {
+ info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd;
+ info->min_sgpr_alloc = 128;
+ info->sgpr_alloc_granularity = 128;
+ /* Don't use late alloc on small chips. */
+ info->use_late_alloc = info->num_render_backends > 4;
+ } else if (info->chip_class >= GFX8) {
+ info->num_physical_sgprs_per_simd = 800;
+ info->min_sgpr_alloc = 16;
+ info->sgpr_alloc_granularity = 16;
+ info->use_late_alloc = true;
+ } else {
+ info->num_physical_sgprs_per_simd = 512;
+ info->min_sgpr_alloc = 8;
+ info->sgpr_alloc_granularity = 8;
+ /* Potential hang on Kabini: */
+ info->use_late_alloc = info->family != CHIP_KABINI;
+ }
+
+ info->max_sgpr_alloc = info->family == CHIP_TONGA || info->family == CHIP_ICELAND ? 96 : 104;
+
+ info->min_wave64_vgpr_alloc = 4;
+ info->max_vgpr_alloc = 256;
+ info->wave64_vgpr_alloc_granularity = 4;
+
+ info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
+ info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
+
+ return true;
}
void ac_compute_driver_uuid(char *uuid, size_t size)
{
- char amd_uuid[] = "AMD-MESA-DRV";
+ char amd_uuid[] = "AMD-MESA-DRV";
- assert(size >= sizeof(amd_uuid));
+ assert(size >= sizeof(amd_uuid));
- memset(uuid, 0, size);
- strncpy(uuid, amd_uuid, size);
+ memset(uuid, 0, size);
+ strncpy(uuid, amd_uuid, size);
}
void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size)
{
- uint32_t *uint_uuid = (uint32_t*)uuid;
-
- assert(size >= sizeof(uint32_t)*4);
-
- /**
- * Use the device info directly instead of using a sha1. GL/VK UUIDs
- * are 16 byte vs 20 byte for sha1, and the truncation that would be
- * required would get rid of part of the little entropy we have.
- * */
- memset(uuid, 0, size);
- uint_uuid[0] = info->pci_domain;
- uint_uuid[1] = info->pci_bus;
- uint_uuid[2] = info->pci_dev;
- uint_uuid[3] = info->pci_func;
+ uint32_t *uint_uuid = (uint32_t *)uuid;
+
+ assert(size >= sizeof(uint32_t) * 4);
+
+ /**
+ * Use the device info directly instead of using a sha1. GL/VK UUIDs
+ * are 16 byte vs 20 byte for sha1, and the truncation that would be
+ * required would get rid of part of the little entropy we have.
+ * */
+ memset(uuid, 0, size);
+ uint_uuid[0] = info->pci_domain;
+ uint_uuid[1] = info->pci_bus;
+ uint_uuid[2] = info->pci_dev;
+ uint_uuid[3] = info->pci_func;
}
void ac_print_gpu_info(struct radeon_info *info)
{
- printf("Device info:\n");
- printf(" pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n",
- info->pci_domain, info->pci_bus,
- info->pci_dev, info->pci_func);
-
- printf(" name = %s\n", info->name);
- printf(" marketing_name = %s\n", info->marketing_name);
- printf(" is_pro_graphics = %u\n", info->is_pro_graphics);
- printf(" pci_id = 0x%x\n", info->pci_id);
- printf(" pci_rev_id = 0x%x\n", info->pci_rev_id);
- printf(" family = %i\n", info->family);
- printf(" chip_class = %i\n", info->chip_class);
- printf(" family_id = %i\n", info->family_id);
- printf(" chip_external_rev = %i\n", info->chip_external_rev);
- printf(" clock_crystal_freq = %i\n", info->clock_crystal_freq);
-
- printf("Features:\n");
- printf(" has_graphics = %i\n", info->has_graphics);
- printf(" num_rings[RING_GFX] = %i\n", info->num_rings[RING_GFX]);
- printf(" num_rings[RING_DMA] = %i\n", info->num_rings[RING_DMA]);
- printf(" num_rings[RING_COMPUTE] = %u\n", info->num_rings[RING_COMPUTE]);
- printf(" num_rings[RING_UVD] = %i\n", info->num_rings[RING_UVD]);
- printf(" num_rings[RING_VCE] = %i\n", info->num_rings[RING_VCE]);
- printf(" num_rings[RING_UVD_ENC] = %i\n", info->num_rings[RING_UVD_ENC]);
- printf(" num_rings[RING_VCN_DEC] = %i\n", info->num_rings[RING_VCN_DEC]);
- printf(" num_rings[RING_VCN_ENC] = %i\n", info->num_rings[RING_VCN_ENC]);
- printf(" num_rings[RING_VCN_JPEG] = %i\n", info->num_rings[RING_VCN_JPEG]);
- printf(" has_clear_state = %u\n", info->has_clear_state);
- printf(" has_distributed_tess = %u\n", info->has_distributed_tess);
- printf(" has_dcc_constant_encode = %u\n", info->has_dcc_constant_encode);
- printf(" has_rbplus = %u\n", info->has_rbplus);
- printf(" rbplus_allowed = %u\n", info->rbplus_allowed);
- printf(" has_load_ctx_reg_pkt = %u\n", info->has_load_ctx_reg_pkt);
- printf(" has_out_of_order_rast = %u\n", info->has_out_of_order_rast);
- printf(" cpdma_prefetch_writes_memory = %u\n", info->cpdma_prefetch_writes_memory);
- printf(" has_gfx9_scissor_bug = %i\n", info->has_gfx9_scissor_bug);
- printf(" has_tc_compat_zrange_bug = %i\n", info->has_tc_compat_zrange_bug);
- printf(" has_msaa_sample_loc_bug = %i\n", info->has_msaa_sample_loc_bug);
- printf(" has_ls_vgpr_init_bug = %i\n", info->has_ls_vgpr_init_bug);
-
- printf("Display features:\n");
- printf(" use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned);
- printf(" use_display_dcc_with_retile_blit = %u\n", info->use_display_dcc_with_retile_blit);
-
- printf("Memory info:\n");
- printf(" pte_fragment_size = %u\n", info->pte_fragment_size);
- printf(" gart_page_size = %u\n", info->gart_page_size);
- printf(" gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 1024*1024));
- printf(" vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024*1024));
- printf(" vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024*1024));
- printf(" vram_type = %i\n", info->vram_type);
- printf(" vram_bit_width = %i\n", info->vram_bit_width);
- printf(" gds_size = %u kB\n", info->gds_size / 1024);
- printf(" gds_gfx_partition_size = %u kB\n", info->gds_gfx_partition_size / 1024);
- printf(" max_alloc_size = %i MB\n",
- (int)DIV_ROUND_UP(info->max_alloc_size, 1024*1024));
- printf(" min_alloc_size = %u\n", info->min_alloc_size);
- printf(" address32_hi = %u\n", info->address32_hi);
- printf(" has_dedicated_vram = %u\n", info->has_dedicated_vram);
- printf(" num_sdp_interfaces = %u\n", info->num_sdp_interfaces);
- printf(" num_tcc_blocks = %i\n", info->num_tcc_blocks);
- printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
- printf(" tcc_harvested = %u\n", info->tcc_harvested);
- printf(" pc_lines = %u\n", info->pc_lines);
- printf(" lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
- printf(" lds_granularity = %i\n", info->lds_granularity);
- printf(" max_memory_clock = %i\n", info->max_memory_clock);
- printf(" ce_ram_size = %i\n", info->ce_ram_size);
- printf(" l1_cache_size = %i\n", info->l1_cache_size);
- printf(" l2_cache_size = %i\n", info->l2_cache_size);
-
- printf("CP info:\n");
- printf(" gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2);
- printf(" ib_alignment = %u\n", info->ib_alignment);
- printf(" me_fw_version = %i\n", info->me_fw_version);
- printf(" me_fw_feature = %i\n", info->me_fw_feature);
- printf(" pfp_fw_version = %i\n", info->pfp_fw_version);
- printf(" pfp_fw_feature = %i\n", info->pfp_fw_feature);
- printf(" ce_fw_version = %i\n", info->ce_fw_version);
- printf(" ce_fw_feature = %i\n", info->ce_fw_feature);
-
- printf("Multimedia info:\n");
- printf(" has_hw_decode = %u\n", info->has_hw_decode);
- printf(" uvd_enc_supported = %u\n", info->uvd_enc_supported);
- printf(" uvd_fw_version = %u\n", info->uvd_fw_version);
- printf(" vce_fw_version = %u\n", info->vce_fw_version);
- printf(" vce_harvest_config = %i\n", info->vce_harvest_config);
-
- printf("Kernel & winsys capabilities:\n");
- printf(" drm = %i.%i.%i\n", info->drm_major,
- info->drm_minor, info->drm_patchlevel);
- printf(" has_userptr = %i\n", info->has_userptr);
- printf(" has_syncobj = %u\n", info->has_syncobj);
- printf(" has_syncobj_wait_for_submit = %u\n", info->has_syncobj_wait_for_submit);
- printf(" has_timeline_syncobj = %u\n", info->has_timeline_syncobj);
- printf(" has_fence_to_handle = %u\n", info->has_fence_to_handle);
- printf(" has_ctx_priority = %u\n", info->has_ctx_priority);
- printf(" has_local_buffers = %u\n", info->has_local_buffers);
- printf(" kernel_flushes_hdp_before_ib = %u\n", info->kernel_flushes_hdp_before_ib);
- printf(" htile_cmask_support_1d_tiling = %u\n", info->htile_cmask_support_1d_tiling);
- printf(" si_TA_CS_BC_BASE_ADDR_allowed = %u\n", info->si_TA_CS_BC_BASE_ADDR_allowed);
- printf(" has_bo_metadata = %u\n", info->has_bo_metadata);
- printf(" has_gpu_reset_status_query = %u\n", info->has_gpu_reset_status_query);
- printf(" has_eqaa_surface_allocator = %u\n", info->has_eqaa_surface_allocator);
- printf(" has_format_bc1_through_bc7 = %u\n", info->has_format_bc1_through_bc7);
- printf(" kernel_flushes_tc_l2_after_ib = %u\n", info->kernel_flushes_tc_l2_after_ib);
- printf(" has_indirect_compute_dispatch = %u\n", info->has_indirect_compute_dispatch);
- printf(" has_unaligned_shader_loads = %u\n", info->has_unaligned_shader_loads);
- printf(" has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings);
- printf(" has_2d_tiling = %u\n", info->has_2d_tiling);
- printf(" has_read_registers_query = %u\n", info->has_read_registers_query);
- printf(" has_gds_ordered_append = %u\n", info->has_gds_ordered_append);
- printf(" has_scheduled_fence_dependency = %u\n", info->has_scheduled_fence_dependency);
- printf(" mid_command_buffer_preemption_enabled = %u\n", info->mid_command_buffer_preemption_enabled);
-
- printf("Shader core info:\n");
- printf(" max_shader_clock = %i\n", info->max_shader_clock);
- printf(" num_good_compute_units = %i\n", info->num_good_compute_units);
- printf(" max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
- printf(" min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
- printf(" max_se = %i\n", info->max_se);
- printf(" max_sh_per_se = %i\n", info->max_sh_per_se);
- printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
- printf(" num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd);
- printf(" num_physical_wave64_vgprs_per_simd = %i\n", info->num_physical_wave64_vgprs_per_simd);
- printf(" num_simd_per_compute_unit = %i\n", info->num_simd_per_compute_unit);
- printf(" min_sgpr_alloc = %i\n", info->min_sgpr_alloc);
- printf(" max_sgpr_alloc = %i\n", info->max_sgpr_alloc);
- printf(" sgpr_alloc_granularity = %i\n", info->sgpr_alloc_granularity);
- printf(" min_wave64_vgpr_alloc = %i\n", info->min_wave64_vgpr_alloc);
- printf(" max_vgpr_alloc = %i\n", info->max_vgpr_alloc);
- printf(" wave64_vgpr_alloc_granularity = %i\n", info->wave64_vgpr_alloc_granularity);
-
- printf("Render backend info:\n");
- printf(" pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override);
- printf(" num_render_backends = %i\n", info->num_render_backends);
- printf(" num_tile_pipes = %i\n", info->num_tile_pipes);
- printf(" pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes);
- printf(" enabled_rb_mask = 0x%x\n", info->enabled_rb_mask);
- printf(" max_alignment = %u\n", (unsigned)info->max_alignment);
- printf(" pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count);
-
- printf("GB_ADDR_CONFIG: 0x%08x\n", info->gb_addr_config);
- if (info->chip_class >= GFX10) {
- printf(" num_pipes = %u\n",
- 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
- printf(" pipe_interleave_size = %u\n",
- 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config));
- printf(" max_compressed_frags = %u\n",
- 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config));
- } else if (info->chip_class == GFX9) {
- printf(" num_pipes = %u\n",
- 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
- printf(" pipe_interleave_size = %u\n",
- 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config));
- printf(" max_compressed_frags = %u\n",
- 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config));
- printf(" bank_interleave_size = %u\n",
- 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config));
- printf(" num_banks = %u\n",
- 1 << G_0098F8_NUM_BANKS(info->gb_addr_config));
- printf(" shader_engine_tile_size = %u\n",
- 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config));
- printf(" num_shader_engines = %u\n",
- 1 << G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config));
- printf(" num_gpus = %u (raw)\n",
- G_0098F8_NUM_GPUS_GFX9(info->gb_addr_config));
- printf(" multi_gpu_tile_size = %u (raw)\n",
- G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config));
- printf(" num_rb_per_se = %u\n",
- 1 << G_0098F8_NUM_RB_PER_SE(info->gb_addr_config));
- printf(" row_size = %u\n",
- 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config));
- printf(" num_lower_pipes = %u (raw)\n",
- G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config));
- printf(" se_enable = %u (raw)\n",
- G_0098F8_SE_ENABLE(info->gb_addr_config));
- } else {
- printf(" num_pipes = %u\n",
- 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
- printf(" pipe_interleave_size = %u\n",
- 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config));
- printf(" bank_interleave_size = %u\n",
- 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config));
- printf(" num_shader_engines = %u\n",
- 1 << G_0098F8_NUM_SHADER_ENGINES_GFX6(info->gb_addr_config));
- printf(" shader_engine_tile_size = %u\n",
- 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config));
- printf(" num_gpus = %u (raw)\n",
- G_0098F8_NUM_GPUS_GFX6(info->gb_addr_config));
- printf(" multi_gpu_tile_size = %u (raw)\n",
- G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config));
- printf(" row_size = %u\n",
- 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config));
- printf(" num_lower_pipes = %u (raw)\n",
- G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config));
- }
+ printf("Device info:\n");
+ printf(" pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", info->pci_domain, info->pci_bus,
+ info->pci_dev, info->pci_func);
+
+ printf(" name = %s\n", info->name);
+ printf(" marketing_name = %s\n", info->marketing_name);
+ printf(" is_pro_graphics = %u\n", info->is_pro_graphics);
+ printf(" pci_id = 0x%x\n", info->pci_id);
+ printf(" pci_rev_id = 0x%x\n", info->pci_rev_id);
+ printf(" family = %i\n", info->family);
+ printf(" chip_class = %i\n", info->chip_class);
+ printf(" family_id = %i\n", info->family_id);
+ printf(" chip_external_rev = %i\n", info->chip_external_rev);
+ printf(" clock_crystal_freq = %i\n", info->clock_crystal_freq);
+
+ printf("Features:\n");
+ printf(" has_graphics = %i\n", info->has_graphics);
+ printf(" num_rings[RING_GFX] = %i\n", info->num_rings[RING_GFX]);
+ printf(" num_rings[RING_DMA] = %i\n", info->num_rings[RING_DMA]);
+ printf(" num_rings[RING_COMPUTE] = %u\n", info->num_rings[RING_COMPUTE]);
+ printf(" num_rings[RING_UVD] = %i\n", info->num_rings[RING_UVD]);
+ printf(" num_rings[RING_VCE] = %i\n", info->num_rings[RING_VCE]);
+ printf(" num_rings[RING_UVD_ENC] = %i\n", info->num_rings[RING_UVD_ENC]);
+ printf(" num_rings[RING_VCN_DEC] = %i\n", info->num_rings[RING_VCN_DEC]);
+ printf(" num_rings[RING_VCN_ENC] = %i\n", info->num_rings[RING_VCN_ENC]);
+ printf(" num_rings[RING_VCN_JPEG] = %i\n", info->num_rings[RING_VCN_JPEG]);
+ printf(" has_clear_state = %u\n", info->has_clear_state);
+ printf(" has_distributed_tess = %u\n", info->has_distributed_tess);
+ printf(" has_dcc_constant_encode = %u\n", info->has_dcc_constant_encode);
+ printf(" has_rbplus = %u\n", info->has_rbplus);
+ printf(" rbplus_allowed = %u\n", info->rbplus_allowed);
+ printf(" has_load_ctx_reg_pkt = %u\n", info->has_load_ctx_reg_pkt);
+ printf(" has_out_of_order_rast = %u\n", info->has_out_of_order_rast);
+ printf(" cpdma_prefetch_writes_memory = %u\n", info->cpdma_prefetch_writes_memory);
+ printf(" has_gfx9_scissor_bug = %i\n", info->has_gfx9_scissor_bug);
+ printf(" has_tc_compat_zrange_bug = %i\n", info->has_tc_compat_zrange_bug);
+ printf(" has_msaa_sample_loc_bug = %i\n", info->has_msaa_sample_loc_bug);
+ printf(" has_ls_vgpr_init_bug = %i\n", info->has_ls_vgpr_init_bug);
+
+ printf("Display features:\n");
+ printf(" use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned);
+ printf(" use_display_dcc_with_retile_blit = %u\n", info->use_display_dcc_with_retile_blit);
+
+ printf("Memory info:\n");
+ printf(" pte_fragment_size = %u\n", info->pte_fragment_size);
+ printf(" gart_page_size = %u\n", info->gart_page_size);
+ printf(" gart_size = %i MB\n", (int)DIV_ROUND_UP(info->gart_size, 1024 * 1024));
+ printf(" vram_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_size, 1024 * 1024));
+ printf(" vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(info->vram_vis_size, 1024 * 1024));
+ printf(" vram_type = %i\n", info->vram_type);
+ printf(" vram_bit_width = %i\n", info->vram_bit_width);
+ printf(" gds_size = %u kB\n", info->gds_size / 1024);
+ printf(" gds_gfx_partition_size = %u kB\n", info->gds_gfx_partition_size / 1024);
+ printf(" max_alloc_size = %i MB\n", (int)DIV_ROUND_UP(info->max_alloc_size, 1024 * 1024));
+ printf(" min_alloc_size = %u\n", info->min_alloc_size);
+ printf(" address32_hi = %u\n", info->address32_hi);
+ printf(" has_dedicated_vram = %u\n", info->has_dedicated_vram);
+ printf(" num_sdp_interfaces = %u\n", info->num_sdp_interfaces);
+ printf(" num_tcc_blocks = %i\n", info->num_tcc_blocks);
+ printf(" tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
+ printf(" tcc_harvested = %u\n", info->tcc_harvested);
+ printf(" pc_lines = %u\n", info->pc_lines);
+ printf(" lds_size_per_workgroup = %u\n", info->lds_size_per_workgroup);
+ printf(" lds_granularity = %i\n", info->lds_granularity);
+ printf(" max_memory_clock = %i\n", info->max_memory_clock);
+ printf(" ce_ram_size = %i\n", info->ce_ram_size);
+ printf(" l1_cache_size = %i\n", info->l1_cache_size);
+ printf(" l2_cache_size = %i\n", info->l2_cache_size);
+
+ printf("CP info:\n");
+ printf(" gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2);
+ printf(" ib_alignment = %u\n", info->ib_alignment);
+ printf(" me_fw_version = %i\n", info->me_fw_version);
+ printf(" me_fw_feature = %i\n", info->me_fw_feature);
+ printf(" pfp_fw_version = %i\n", info->pfp_fw_version);
+ printf(" pfp_fw_feature = %i\n", info->pfp_fw_feature);
+ printf(" ce_fw_version = %i\n", info->ce_fw_version);
+ printf(" ce_fw_feature = %i\n", info->ce_fw_feature);
+
+ printf("Multimedia info:\n");
+ printf(" has_hw_decode = %u\n", info->has_hw_decode);
+ printf(" uvd_enc_supported = %u\n", info->uvd_enc_supported);
+ printf(" uvd_fw_version = %u\n", info->uvd_fw_version);
+ printf(" vce_fw_version = %u\n", info->vce_fw_version);
+ printf(" vce_harvest_config = %i\n", info->vce_harvest_config);
+
+ printf("Kernel & winsys capabilities:\n");
+ printf(" drm = %i.%i.%i\n", info->drm_major, info->drm_minor, info->drm_patchlevel);
+ printf(" has_userptr = %i\n", info->has_userptr);
+ printf(" has_syncobj = %u\n", info->has_syncobj);
+ printf(" has_syncobj_wait_for_submit = %u\n", info->has_syncobj_wait_for_submit);
+ printf(" has_timeline_syncobj = %u\n", info->has_timeline_syncobj);
+ printf(" has_fence_to_handle = %u\n", info->has_fence_to_handle);
+ printf(" has_ctx_priority = %u\n", info->has_ctx_priority);
+ printf(" has_local_buffers = %u\n", info->has_local_buffers);
+ printf(" kernel_flushes_hdp_before_ib = %u\n", info->kernel_flushes_hdp_before_ib);
+ printf(" htile_cmask_support_1d_tiling = %u\n", info->htile_cmask_support_1d_tiling);
+ printf(" si_TA_CS_BC_BASE_ADDR_allowed = %u\n", info->si_TA_CS_BC_BASE_ADDR_allowed);
+ printf(" has_bo_metadata = %u\n", info->has_bo_metadata);
+ printf(" has_gpu_reset_status_query = %u\n", info->has_gpu_reset_status_query);
+ printf(" has_eqaa_surface_allocator = %u\n", info->has_eqaa_surface_allocator);
+ printf(" has_format_bc1_through_bc7 = %u\n", info->has_format_bc1_through_bc7);
+ printf(" kernel_flushes_tc_l2_after_ib = %u\n", info->kernel_flushes_tc_l2_after_ib);
+ printf(" has_indirect_compute_dispatch = %u\n", info->has_indirect_compute_dispatch);
+ printf(" has_unaligned_shader_loads = %u\n", info->has_unaligned_shader_loads);
+ printf(" has_sparse_vm_mappings = %u\n", info->has_sparse_vm_mappings);
+ printf(" has_2d_tiling = %u\n", info->has_2d_tiling);
+ printf(" has_read_registers_query = %u\n", info->has_read_registers_query);
+ printf(" has_gds_ordered_append = %u\n", info->has_gds_ordered_append);
+ printf(" has_scheduled_fence_dependency = %u\n", info->has_scheduled_fence_dependency);
+ printf(" mid_command_buffer_preemption_enabled = %u\n",
+ info->mid_command_buffer_preemption_enabled);
+
+ printf("Shader core info:\n");
+ printf(" max_shader_clock = %i\n", info->max_shader_clock);
+ printf(" num_good_compute_units = %i\n", info->num_good_compute_units);
+ printf(" max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
+ printf(" min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
+ printf(" max_se = %i\n", info->max_se);
+ printf(" max_sh_per_se = %i\n", info->max_sh_per_se);
+ printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
+ printf(" num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd);
+ printf(" num_physical_wave64_vgprs_per_simd = %i\n",
+ info->num_physical_wave64_vgprs_per_simd);
+ printf(" num_simd_per_compute_unit = %i\n", info->num_simd_per_compute_unit);
+ printf(" min_sgpr_alloc = %i\n", info->min_sgpr_alloc);
+ printf(" max_sgpr_alloc = %i\n", info->max_sgpr_alloc);
+ printf(" sgpr_alloc_granularity = %i\n", info->sgpr_alloc_granularity);
+ printf(" min_wave64_vgpr_alloc = %i\n", info->min_wave64_vgpr_alloc);
+ printf(" max_vgpr_alloc = %i\n", info->max_vgpr_alloc);
+ printf(" wave64_vgpr_alloc_granularity = %i\n", info->wave64_vgpr_alloc_granularity);
+
+ printf("Render backend info:\n");
+ printf(" pa_sc_tile_steering_override = 0x%x\n", info->pa_sc_tile_steering_override);
+ printf(" num_render_backends = %i\n", info->num_render_backends);
+ printf(" num_tile_pipes = %i\n", info->num_tile_pipes);
+ printf(" pipe_interleave_bytes = %i\n", info->pipe_interleave_bytes);
+ printf(" enabled_rb_mask = 0x%x\n", info->enabled_rb_mask);
+ printf(" max_alignment = %u\n", (unsigned)info->max_alignment);
+ printf(" pbb_max_alloc_count = %u\n", info->pbb_max_alloc_count);
+
+ printf("GB_ADDR_CONFIG: 0x%08x\n", info->gb_addr_config);
+ if (info->chip_class >= GFX10) {
+ printf(" num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
+ printf(" pipe_interleave_size = %u\n",
+ 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config));
+ printf(" max_compressed_frags = %u\n",
+ 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config));
+ } else if (info->chip_class == GFX9) {
+ printf(" num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
+ printf(" pipe_interleave_size = %u\n",
+ 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config));
+ printf(" max_compressed_frags = %u\n",
+ 1 << G_0098F8_MAX_COMPRESSED_FRAGS(info->gb_addr_config));
+ printf(" bank_interleave_size = %u\n",
+ 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config));
+ printf(" num_banks = %u\n", 1 << G_0098F8_NUM_BANKS(info->gb_addr_config));
+ printf(" shader_engine_tile_size = %u\n",
+ 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config));
+ printf(" num_shader_engines = %u\n",
+ 1 << G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config));
+ printf(" num_gpus = %u (raw)\n", G_0098F8_NUM_GPUS_GFX9(info->gb_addr_config));
+ printf(" multi_gpu_tile_size = %u (raw)\n",
+ G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config));
+ printf(" num_rb_per_se = %u\n", 1 << G_0098F8_NUM_RB_PER_SE(info->gb_addr_config));
+ printf(" row_size = %u\n", 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config));
+ printf(" num_lower_pipes = %u (raw)\n", G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config));
+ printf(" se_enable = %u (raw)\n", G_0098F8_SE_ENABLE(info->gb_addr_config));
+ } else {
+ printf(" num_pipes = %u\n", 1 << G_0098F8_NUM_PIPES(info->gb_addr_config));
+ printf(" pipe_interleave_size = %u\n",
+ 256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(info->gb_addr_config));
+ printf(" bank_interleave_size = %u\n",
+ 1 << G_0098F8_BANK_INTERLEAVE_SIZE(info->gb_addr_config));
+ printf(" num_shader_engines = %u\n",
+ 1 << G_0098F8_NUM_SHADER_ENGINES_GFX6(info->gb_addr_config));
+ printf(" shader_engine_tile_size = %u\n",
+ 16 << G_0098F8_SHADER_ENGINE_TILE_SIZE(info->gb_addr_config));
+ printf(" num_gpus = %u (raw)\n", G_0098F8_NUM_GPUS_GFX6(info->gb_addr_config));
+ printf(" multi_gpu_tile_size = %u (raw)\n",
+ G_0098F8_MULTI_GPU_TILE_SIZE(info->gb_addr_config));
+ printf(" row_size = %u\n", 1024 << G_0098F8_ROW_SIZE(info->gb_addr_config));
+ printf(" num_lower_pipes = %u (raw)\n", G_0098F8_NUM_LOWER_PIPES(info->gb_addr_config));
+ }
}
-int
-ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family)
+int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family)
{
- if (chip_class >= GFX9)
- return -1;
-
- switch (family) {
- case CHIP_OLAND:
- case CHIP_HAINAN:
- case CHIP_KAVERI:
- case CHIP_KABINI:
- case CHIP_ICELAND:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- return 16;
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_POLARIS10:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- return 32;
- default:
- unreachable("Unknown GPU");
- }
+ if (chip_class >= GFX9)
+ return -1;
+
+ switch (family) {
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_ICELAND:
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ return 16;
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ return 32;
+ default:
+ unreachable("Unknown GPU");
+ }
}
-void
-ac_get_raster_config(struct radeon_info *info,
- uint32_t *raster_config_p,
- uint32_t *raster_config_1_p,
- uint32_t *se_tile_repeat_p)
+void ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p,
+ uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p)
{
- unsigned raster_config, raster_config_1, se_tile_repeat;
-
- switch (info->family) {
- /* 1 SE / 1 RB */
- case CHIP_HAINAN:
- case CHIP_KABINI:
- case CHIP_STONEY:
- raster_config = 0x00000000;
- raster_config_1 = 0x00000000;
- break;
- /* 1 SE / 4 RBs */
- case CHIP_VERDE:
- raster_config = 0x0000124a;
- raster_config_1 = 0x00000000;
- break;
- /* 1 SE / 2 RBs (Oland is special) */
- case CHIP_OLAND:
- raster_config = 0x00000082;
- raster_config_1 = 0x00000000;
- break;
- /* 1 SE / 2 RBs */
- case CHIP_KAVERI:
- case CHIP_ICELAND:
- case CHIP_CARRIZO:
- raster_config = 0x00000002;
- raster_config_1 = 0x00000000;
- break;
- /* 2 SEs / 4 RBs */
- case CHIP_BONAIRE:
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- raster_config = 0x16000012;
- raster_config_1 = 0x00000000;
- break;
- /* 2 SEs / 8 RBs */
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- raster_config = 0x2a00126a;
- raster_config_1 = 0x00000000;
- break;
- /* 4 SEs / 8 RBs */
- case CHIP_TONGA:
- case CHIP_POLARIS10:
- raster_config = 0x16000012;
- raster_config_1 = 0x0000002a;
- break;
- /* 4 SEs / 16 RBs */
- case CHIP_HAWAII:
- case CHIP_FIJI:
- case CHIP_VEGAM:
- raster_config = 0x3a00161a;
- raster_config_1 = 0x0000002e;
- break;
- default:
- fprintf(stderr,
- "ac: Unknown GPU, using 0 for raster_config\n");
- raster_config = 0x00000000;
- raster_config_1 = 0x00000000;
- break;
- }
-
- /* drm/radeon on Kaveri is buggy, so disable 1 RB to work around it.
- * This decreases performance by up to 50% when the RB is the bottleneck.
- */
- if (info->family == CHIP_KAVERI && !info->is_amdgpu)
- raster_config = 0x00000000;
-
- /* Fiji: Old kernels have incorrect tiling config. This decreases
- * RB performance by 25%. (it disables 1 RB in the second packer)
- */
- if (info->family == CHIP_FIJI &&
- info->cik_macrotile_mode_array[0] == 0x000000e8) {
- raster_config = 0x16000012;
- raster_config_1 = 0x0000002a;
- }
-
- unsigned se_width = 8 << G_028350_SE_XSEL_GFX6(raster_config);
- unsigned se_height = 8 << G_028350_SE_YSEL_GFX6(raster_config);
-
- /* I don't know how to calculate this, though this is probably a good guess. */
- se_tile_repeat = MAX2(se_width, se_height) * info->max_se;
-
- *raster_config_p = raster_config;
- *raster_config_1_p = raster_config_1;
- if (se_tile_repeat_p)
- *se_tile_repeat_p = se_tile_repeat;
+ unsigned raster_config, raster_config_1, se_tile_repeat;
+
+ switch (info->family) {
+ /* 1 SE / 1 RB */
+ case CHIP_HAINAN:
+ case CHIP_KABINI:
+ case CHIP_STONEY:
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ /* 1 SE / 4 RBs */
+ case CHIP_VERDE:
+ raster_config = 0x0000124a;
+ raster_config_1 = 0x00000000;
+ break;
+ /* 1 SE / 2 RBs (Oland is special) */
+ case CHIP_OLAND:
+ raster_config = 0x00000082;
+ raster_config_1 = 0x00000000;
+ break;
+ /* 1 SE / 2 RBs */
+ case CHIP_KAVERI:
+ case CHIP_ICELAND:
+ case CHIP_CARRIZO:
+ raster_config = 0x00000002;
+ raster_config_1 = 0x00000000;
+ break;
+ /* 2 SEs / 4 RBs */
+ case CHIP_BONAIRE:
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ raster_config = 0x16000012;
+ raster_config_1 = 0x00000000;
+ break;
+ /* 2 SEs / 8 RBs */
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ raster_config = 0x2a00126a;
+ raster_config_1 = 0x00000000;
+ break;
+ /* 4 SEs / 8 RBs */
+ case CHIP_TONGA:
+ case CHIP_POLARIS10:
+ raster_config = 0x16000012;
+ raster_config_1 = 0x0000002a;
+ break;
+ /* 4 SEs / 16 RBs */
+ case CHIP_HAWAII:
+ case CHIP_FIJI:
+ case CHIP_VEGAM:
+ raster_config = 0x3a00161a;
+ raster_config_1 = 0x0000002e;
+ break;
+ default:
+ fprintf(stderr, "ac: Unknown GPU, using 0 for raster_config\n");
+ raster_config = 0x00000000;
+ raster_config_1 = 0x00000000;
+ break;
+ }
+
+ /* drm/radeon on Kaveri is buggy, so disable 1 RB to work around it.
+ * This decreases performance by up to 50% when the RB is the bottleneck.
+ */
+ if (info->family == CHIP_KAVERI && !info->is_amdgpu)
+ raster_config = 0x00000000;
+
+ /* Fiji: Old kernels have incorrect tiling config. This decreases
+ * RB performance by 25%. (it disables 1 RB in the second packer)
+ */
+ if (info->family == CHIP_FIJI && info->cik_macrotile_mode_array[0] == 0x000000e8) {
+ raster_config = 0x16000012;
+ raster_config_1 = 0x0000002a;
+ }
+
+ unsigned se_width = 8 << G_028350_SE_XSEL_GFX6(raster_config);
+ unsigned se_height = 8 << G_028350_SE_YSEL_GFX6(raster_config);
+
+ /* I don't know how to calculate this, though this is probably a good guess. */
+ se_tile_repeat = MAX2(se_width, se_height) * info->max_se;
+
+ *raster_config_p = raster_config;
+ *raster_config_1_p = raster_config_1;
+ if (se_tile_repeat_p)
+ *se_tile_repeat_p = se_tile_repeat;
}
-void
-ac_get_harvested_configs(struct radeon_info *info,
- unsigned raster_config,
- unsigned *cik_raster_config_1_p,
- unsigned *raster_config_se)
+void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
+ unsigned *cik_raster_config_1_p, unsigned *raster_config_se)
{
- unsigned sh_per_se = MAX2(info->max_sh_per_se, 1);
- unsigned num_se = MAX2(info->max_se, 1);
- unsigned rb_mask = info->enabled_rb_mask;
- unsigned num_rb = MIN2(info->num_render_backends, 16);
- unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
- unsigned rb_per_se = num_rb / num_se;
- unsigned se_mask[4];
- unsigned se;
-
- se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
- se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
- se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
- se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
-
- assert(num_se == 1 || num_se == 2 || num_se == 4);
- assert(sh_per_se == 1 || sh_per_se == 2);
- assert(rb_per_pkr == 1 || rb_per_pkr == 2);
-
-
- if (info->chip_class >= GFX7) {
- unsigned raster_config_1 = *cik_raster_config_1_p;
- if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
- (!se_mask[2] && !se_mask[3]))) {
- raster_config_1 &= C_028354_SE_PAIR_MAP;
-
- if (!se_mask[0] && !se_mask[1]) {
- raster_config_1 |=
- S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
- } else {
- raster_config_1 |=
- S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
- }
- *cik_raster_config_1_p = raster_config_1;
- }
- }
-
- for (se = 0; se < num_se; se++) {
- unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
- unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
- int idx = (se / 2) * 2;
-
- raster_config_se[se] = raster_config;
- if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
- raster_config_se[se] &= C_028350_SE_MAP;
-
- if (!se_mask[idx]) {
- raster_config_se[se] |=
- S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
- } else {
- raster_config_se[se] |=
- S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
- }
- }
-
- pkr0_mask &= rb_mask;
- pkr1_mask &= rb_mask;
- if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
- raster_config_se[se] &= C_028350_PKR_MAP;
-
- if (!pkr0_mask) {
- raster_config_se[se] |=
- S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
- } else {
- raster_config_se[se] |=
- S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
- }
- }
-
- if (rb_per_se >= 2) {
- unsigned rb0_mask = 1 << (se * rb_per_se);
- unsigned rb1_mask = rb0_mask << 1;
-
- rb0_mask &= rb_mask;
- rb1_mask &= rb_mask;
- if (!rb0_mask || !rb1_mask) {
- raster_config_se[se] &= C_028350_RB_MAP_PKR0;
-
- if (!rb0_mask) {
- raster_config_se[se] |=
- S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
- } else {
- raster_config_se[se] |=
- S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
- }
- }
-
- if (rb_per_se > 2) {
- rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
- rb1_mask = rb0_mask << 1;
- rb0_mask &= rb_mask;
- rb1_mask &= rb_mask;
- if (!rb0_mask || !rb1_mask) {
- raster_config_se[se] &= C_028350_RB_MAP_PKR1;
-
- if (!rb0_mask) {
- raster_config_se[se] |=
- S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
- } else {
- raster_config_se[se] |=
- S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
- }
- }
- }
- }
- }
+ unsigned sh_per_se = MAX2(info->max_sh_per_se, 1);
+ unsigned num_se = MAX2(info->max_se, 1);
+ unsigned rb_mask = info->enabled_rb_mask;
+ unsigned num_rb = MIN2(info->num_render_backends, 16);
+ unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
+ unsigned rb_per_se = num_rb / num_se;
+ unsigned se_mask[4];
+ unsigned se;
+
+ se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
+ se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
+ se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
+ se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
+
+ assert(num_se == 1 || num_se == 2 || num_se == 4);
+ assert(sh_per_se == 1 || sh_per_se == 2);
+ assert(rb_per_pkr == 1 || rb_per_pkr == 2);
+
+ if (info->chip_class >= GFX7) {
+ unsigned raster_config_1 = *cik_raster_config_1_p;
+ if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || (!se_mask[2] && !se_mask[3]))) {
+ raster_config_1 &= C_028354_SE_PAIR_MAP;
+
+ if (!se_mask[0] && !se_mask[1]) {
+ raster_config_1 |= S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_3);
+ } else {
+ raster_config_1 |= S_028354_SE_PAIR_MAP(V_028354_RASTER_CONFIG_SE_PAIR_MAP_0);
+ }
+ *cik_raster_config_1_p = raster_config_1;
+ }
+ }
+
+ for (se = 0; se < num_se; se++) {
+ unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
+ unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
+ int idx = (se / 2) * 2;
+
+ raster_config_se[se] = raster_config;
+ if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
+ raster_config_se[se] &= C_028350_SE_MAP;
+
+ if (!se_mask[idx]) {
+ raster_config_se[se] |= S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_3);
+ } else {
+ raster_config_se[se] |= S_028350_SE_MAP(V_028350_RASTER_CONFIG_SE_MAP_0);
+ }
+ }
+
+ pkr0_mask &= rb_mask;
+ pkr1_mask &= rb_mask;
+ if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
+ raster_config_se[se] &= C_028350_PKR_MAP;
+
+ if (!pkr0_mask) {
+ raster_config_se[se] |= S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_3);
+ } else {
+ raster_config_se[se] |= S_028350_PKR_MAP(V_028350_RASTER_CONFIG_PKR_MAP_0);
+ }
+ }
+
+ if (rb_per_se >= 2) {
+ unsigned rb0_mask = 1 << (se * rb_per_se);
+ unsigned rb1_mask = rb0_mask << 1;
+
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se[se] &= C_028350_RB_MAP_PKR0;
+
+ if (!rb0_mask) {
+ raster_config_se[se] |= S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se[se] |= S_028350_RB_MAP_PKR0(V_028350_RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+
+ if (rb_per_se > 2) {
+ rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
+ rb1_mask = rb0_mask << 1;
+ rb0_mask &= rb_mask;
+ rb1_mask &= rb_mask;
+ if (!rb0_mask || !rb1_mask) {
+ raster_config_se[se] &= C_028350_RB_MAP_PKR1;
+
+ if (!rb0_mask) {
+ raster_config_se[se] |= S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_3);
+ } else {
+ raster_config_se[se] |= S_028350_RB_MAP_PKR1(V_028350_RASTER_CONFIG_RB_MAP_0);
+ }
+ }
+ }
+ }
+ }
}
-unsigned ac_get_compute_resource_limits(struct radeon_info *info,
- unsigned waves_per_threadgroup,
- unsigned max_waves_per_sh,
- unsigned threadgroups_per_cu)
+unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves_per_threadgroup,
+ unsigned max_waves_per_sh, unsigned threadgroups_per_cu)
{
- unsigned compute_resource_limits =
- S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
-
- if (info->chip_class >= GFX7) {
- unsigned num_cu_per_se = info->num_good_compute_units /
- info->max_se;
-
- /* Force even distribution on all SIMDs in CU if the workgroup
- * size is 64. This has shown some good improvements if # of CUs
- * per SE is not a multiple of 4.
- */
- if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
- compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
-
- assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
- compute_resource_limits |= S_00B854_WAVES_PER_SH(max_waves_per_sh) |
- S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
- } else {
- /* GFX6 */
- if (max_waves_per_sh) {
- unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16);
- compute_resource_limits |= S_00B854_WAVES_PER_SH_GFX6(limit_div16);
- }
- }
- return compute_resource_limits;
+ unsigned compute_resource_limits = S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
+
+ if (info->chip_class >= GFX7) {
+ unsigned num_cu_per_se = info->num_good_compute_units / info->max_se;
+
+ /* Force even distribution on all SIMDs in CU if the workgroup
+ * size is 64. This has shown some good improvements if # of CUs
+ * per SE is not a multiple of 4.
+ */
+ if (num_cu_per_se % 4 && waves_per_threadgroup == 1)
+ compute_resource_limits |= S_00B854_FORCE_SIMD_DIST(1);
+
+ assert(threadgroups_per_cu >= 1 && threadgroups_per_cu <= 8);
+ compute_resource_limits |=
+ S_00B854_WAVES_PER_SH(max_waves_per_sh) | S_00B854_CU_GROUP_COUNT(threadgroups_per_cu - 1);
+ } else {
+ /* GFX6 */
+ if (max_waves_per_sh) {
+ unsigned limit_div16 = DIV_ROUND_UP(max_waves_per_sh, 16);
+ compute_resource_limits |= S_00B854_WAVES_PER_SH_GFX6(limit_div16);
+ }
+ }
+ return compute_resource_limits;
}
#ifndef AC_GPU_INFO_H
#define AC_GPU_INFO_H
+#include "amd_family.h"
+
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#include <stdbool.h>
-#include "amd_family.h"
#ifdef __cplusplus
extern "C" {
struct amdgpu_gpu_info;
struct radeon_info {
- /* PCI info: domain:bus:dev:func */
- uint32_t pci_domain;
- uint32_t pci_bus;
- uint32_t pci_dev;
- uint32_t pci_func;
-
- /* Device info. */
- const char *name;
- const char *marketing_name;
- bool is_pro_graphics;
- uint32_t pci_id;
- uint32_t pci_rev_id;
- enum radeon_family family;
- enum chip_class chip_class;
- uint32_t family_id;
- uint32_t chip_external_rev;
- uint32_t clock_crystal_freq;
-
- /* Features. */
- bool has_graphics; /* false if the chip is compute-only */
- uint32_t num_rings[NUM_RING_TYPES];
- uint32_t ib_pad_dw_mask[NUM_RING_TYPES];
- bool has_clear_state;
- bool has_distributed_tess;
- bool has_dcc_constant_encode;
- bool has_rbplus; /* if RB+ registers exist */
- bool rbplus_allowed; /* if RB+ is allowed */
- bool has_load_ctx_reg_pkt;
- bool has_out_of_order_rast;
- bool has_packed_math_16bit;
- bool cpdma_prefetch_writes_memory;
- bool has_gfx9_scissor_bug;
- bool has_tc_compat_zrange_bug;
- bool has_msaa_sample_loc_bug;
- bool has_ls_vgpr_init_bug;
-
- /* Display features. */
- /* There are 2 display DCC codepaths, because display expects unaligned DCC. */
- /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
- bool use_display_dcc_unaligned;
- /* Allocate both aligned and unaligned DCC and use the retile blit. */
- bool use_display_dcc_with_retile_blit;
-
- /* Memory info. */
- uint32_t pte_fragment_size;
- uint32_t gart_page_size;
- uint64_t gart_size;
- uint64_t vram_size;
- uint64_t vram_vis_size;
- uint32_t vram_bit_width;
- uint32_t vram_type;
- unsigned gds_size;
- unsigned gds_gfx_partition_size;
- uint64_t max_alloc_size;
- uint32_t min_alloc_size;
- uint32_t address32_hi;
- bool has_dedicated_vram;
- bool has_l2_uncached;
- bool r600_has_virtual_memory;
- uint32_t num_sdp_interfaces;
- uint32_t num_tcc_blocks;
- uint32_t tcc_cache_line_size;
- bool tcc_harvested;
- unsigned pc_lines;
- uint32_t lds_size_per_workgroup;
- uint32_t lds_granularity;
- uint32_t max_memory_clock;
- uint32_t ce_ram_size;
- uint32_t l1_cache_size;
- uint32_t l2_cache_size;
-
- /* CP info. */
- bool gfx_ib_pad_with_type2;
- unsigned ib_alignment; /* both start and size alignment */
- uint32_t me_fw_version;
- uint32_t me_fw_feature;
- uint32_t pfp_fw_version;
- uint32_t pfp_fw_feature;
- uint32_t ce_fw_version;
- uint32_t ce_fw_feature;
-
- /* Multimedia info. */
- bool has_hw_decode;
- bool uvd_enc_supported;
- uint32_t uvd_fw_version;
- uint32_t vce_fw_version;
- uint32_t vce_harvest_config;
-
- /* Kernel & winsys capabilities. */
- uint32_t drm_major; /* version */
- uint32_t drm_minor;
- uint32_t drm_patchlevel;
- bool is_amdgpu;
- bool has_userptr;
- bool has_syncobj;
- bool has_syncobj_wait_for_submit;
- bool has_timeline_syncobj;
- bool has_fence_to_handle;
- bool has_ctx_priority;
- bool has_local_buffers;
- bool kernel_flushes_hdp_before_ib;
- bool htile_cmask_support_1d_tiling;
- bool si_TA_CS_BC_BASE_ADDR_allowed;
- bool has_bo_metadata;
- bool has_gpu_reset_status_query;
- bool has_eqaa_surface_allocator;
- bool has_format_bc1_through_bc7;
- bool kernel_flushes_tc_l2_after_ib;
- bool has_indirect_compute_dispatch;
- bool has_unaligned_shader_loads;
- bool has_sparse_vm_mappings;
- bool has_2d_tiling;
- bool has_read_registers_query;
- bool has_gds_ordered_append;
- bool has_scheduled_fence_dependency;
- /* Whether SR-IOV is enabled or amdgpu.mcbp=1 was set on the kernel command line. */
- bool mid_command_buffer_preemption_enabled;
-
- /* Shader cores. */
- uint32_t cu_mask[4][2];
- uint32_t r600_max_quad_pipes; /* wave size / 16 */
- uint32_t max_shader_clock;
- uint32_t num_good_compute_units;
- uint32_t max_good_cu_per_sa;
- uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
- uint32_t max_se; /* shader engines */
- uint32_t max_sh_per_se; /* shader arrays per shader engine */
- uint32_t max_wave64_per_simd;
- uint32_t num_physical_sgprs_per_simd;
- uint32_t num_physical_wave64_vgprs_per_simd;
- uint32_t num_simd_per_compute_unit;
- uint32_t min_sgpr_alloc;
- uint32_t max_sgpr_alloc;
- uint32_t sgpr_alloc_granularity;
- uint32_t min_wave64_vgpr_alloc;
- uint32_t max_vgpr_alloc;
- uint32_t wave64_vgpr_alloc_granularity;
- bool use_late_alloc; /* VS and GS: late pos/param allocation */
-
- /* Render backends (color + depth blocks). */
- uint32_t r300_num_gb_pipes;
- uint32_t r300_num_z_pipes;
- uint32_t r600_gb_backend_map; /* R600 harvest config */
- bool r600_gb_backend_map_valid;
- uint32_t r600_num_banks;
- uint32_t gb_addr_config;
- uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
- uint32_t num_render_backends;
- uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
- uint32_t pipe_interleave_bytes;
- uint32_t enabled_rb_mask; /* GCN harvest config */
- uint64_t max_alignment; /* from addrlib */
- uint32_t pbb_max_alloc_count;
-
- /* Tile modes. */
- uint32_t si_tile_mode_array[32];
- uint32_t cik_macrotile_mode_array[16];
+ /* PCI info: domain:bus:dev:func */
+ uint32_t pci_domain;
+ uint32_t pci_bus;
+ uint32_t pci_dev;
+ uint32_t pci_func;
+
+ /* Device info. */
+ const char *name;
+ const char *marketing_name;
+ bool is_pro_graphics;
+ uint32_t pci_id;
+ uint32_t pci_rev_id;
+ enum radeon_family family;
+ enum chip_class chip_class;
+ uint32_t family_id;
+ uint32_t chip_external_rev;
+ uint32_t clock_crystal_freq;
+
+ /* Features. */
+ bool has_graphics; /* false if the chip is compute-only */
+ uint32_t num_rings[NUM_RING_TYPES];
+ uint32_t ib_pad_dw_mask[NUM_RING_TYPES];
+ bool has_clear_state;
+ bool has_distributed_tess;
+ bool has_dcc_constant_encode;
+ bool has_rbplus; /* if RB+ registers exist */
+ bool rbplus_allowed; /* if RB+ is allowed */
+ bool has_load_ctx_reg_pkt;
+ bool has_out_of_order_rast;
+ bool has_packed_math_16bit;
+ bool cpdma_prefetch_writes_memory;
+ bool has_gfx9_scissor_bug;
+ bool has_tc_compat_zrange_bug;
+ bool has_msaa_sample_loc_bug;
+ bool has_ls_vgpr_init_bug;
+
+ /* Display features. */
+ /* There are 2 display DCC codepaths, because display expects unaligned DCC. */
+ /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
+ bool use_display_dcc_unaligned;
+ /* Allocate both aligned and unaligned DCC and use the retile blit. */
+ bool use_display_dcc_with_retile_blit;
+
+ /* Memory info. */
+ uint32_t pte_fragment_size;
+ uint32_t gart_page_size;
+ uint64_t gart_size;
+ uint64_t vram_size;
+ uint64_t vram_vis_size;
+ uint32_t vram_bit_width;
+ uint32_t vram_type;
+ unsigned gds_size;
+ unsigned gds_gfx_partition_size;
+ uint64_t max_alloc_size;
+ uint32_t min_alloc_size;
+ uint32_t address32_hi;
+ bool has_dedicated_vram;
+ bool has_l2_uncached;
+ bool r600_has_virtual_memory;
+ uint32_t num_sdp_interfaces;
+ uint32_t num_tcc_blocks;
+ uint32_t tcc_cache_line_size;
+ bool tcc_harvested;
+ unsigned pc_lines;
+ uint32_t lds_size_per_workgroup;
+ uint32_t lds_granularity;
+ uint32_t max_memory_clock;
+ uint32_t ce_ram_size;
+ uint32_t l1_cache_size;
+ uint32_t l2_cache_size;
+
+ /* CP info. */
+ bool gfx_ib_pad_with_type2;
+ unsigned ib_alignment; /* both start and size alignment */
+ uint32_t me_fw_version;
+ uint32_t me_fw_feature;
+ uint32_t pfp_fw_version;
+ uint32_t pfp_fw_feature;
+ uint32_t ce_fw_version;
+ uint32_t ce_fw_feature;
+
+ /* Multimedia info. */
+ bool has_hw_decode;
+ bool uvd_enc_supported;
+ uint32_t uvd_fw_version;
+ uint32_t vce_fw_version;
+ uint32_t vce_harvest_config;
+
+ /* Kernel & winsys capabilities. */
+ uint32_t drm_major; /* version */
+ uint32_t drm_minor;
+ uint32_t drm_patchlevel;
+ bool is_amdgpu;
+ bool has_userptr;
+ bool has_syncobj;
+ bool has_syncobj_wait_for_submit;
+ bool has_timeline_syncobj;
+ bool has_fence_to_handle;
+ bool has_ctx_priority;
+ bool has_local_buffers;
+ bool kernel_flushes_hdp_before_ib;
+ bool htile_cmask_support_1d_tiling;
+ bool si_TA_CS_BC_BASE_ADDR_allowed;
+ bool has_bo_metadata;
+ bool has_gpu_reset_status_query;
+ bool has_eqaa_surface_allocator;
+ bool has_format_bc1_through_bc7;
+ bool kernel_flushes_tc_l2_after_ib;
+ bool has_indirect_compute_dispatch;
+ bool has_unaligned_shader_loads;
+ bool has_sparse_vm_mappings;
+ bool has_2d_tiling;
+ bool has_read_registers_query;
+ bool has_gds_ordered_append;
+ bool has_scheduled_fence_dependency;
+ /* Whether SR-IOV is enabled or amdgpu.mcbp=1 was set on the kernel command line. */
+ bool mid_command_buffer_preemption_enabled;
+
+ /* Shader cores. */
+ uint32_t cu_mask[4][2];
+ uint32_t r600_max_quad_pipes; /* wave size / 16 */
+ uint32_t max_shader_clock;
+ uint32_t num_good_compute_units;
+ uint32_t max_good_cu_per_sa;
+ uint32_t min_good_cu_per_sa; /* min != max if SAs have different # of CUs */
+ uint32_t max_se; /* shader engines */
+ uint32_t max_sh_per_se; /* shader arrays per shader engine */
+ uint32_t max_wave64_per_simd;
+ uint32_t num_physical_sgprs_per_simd;
+ uint32_t num_physical_wave64_vgprs_per_simd;
+ uint32_t num_simd_per_compute_unit;
+ uint32_t min_sgpr_alloc;
+ uint32_t max_sgpr_alloc;
+ uint32_t sgpr_alloc_granularity;
+ uint32_t min_wave64_vgpr_alloc;
+ uint32_t max_vgpr_alloc;
+ uint32_t wave64_vgpr_alloc_granularity;
+ bool use_late_alloc; /* VS and GS: late pos/param allocation */
+
+ /* Render backends (color + depth blocks). */
+ uint32_t r300_num_gb_pipes;
+ uint32_t r300_num_z_pipes;
+ uint32_t r600_gb_backend_map; /* R600 harvest config */
+ bool r600_gb_backend_map_valid;
+ uint32_t r600_num_banks;
+ uint32_t gb_addr_config;
+ uint32_t pa_sc_tile_steering_override; /* CLEAR_STATE also sets this */
+ uint32_t num_render_backends;
+ uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
+ uint32_t pipe_interleave_bytes;
+ uint32_t enabled_rb_mask; /* GCN harvest config */
+ uint64_t max_alignment; /* from addrlib */
+ uint32_t pbb_max_alloc_count;
+
+ /* Tile modes. */
+ uint32_t si_tile_mode_array[32];
+ uint32_t cik_macrotile_mode_array[16];
};
-bool ac_query_gpu_info(int fd, void *dev_p,
- struct radeon_info *info,
- struct amdgpu_gpu_info *amdinfo);
+bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
+ struct amdgpu_gpu_info *amdinfo);
void ac_compute_driver_uuid(char *uuid, size_t size);
void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size);
void ac_print_gpu_info(struct radeon_info *info);
int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family);
-void ac_get_raster_config(struct radeon_info *info,
- uint32_t *raster_config_p,
- uint32_t *raster_config_1_p,
- uint32_t *se_tile_repeat_p);
-void ac_get_harvested_configs(struct radeon_info *info,
- unsigned raster_config,
- unsigned *cik_raster_config_1_p,
- unsigned *raster_config_se);
-unsigned ac_get_compute_resource_limits(struct radeon_info *info,
- unsigned waves_per_threadgroup,
- unsigned max_waves_per_sh,
- unsigned threadgroups_per_cu);
+void ac_get_raster_config(struct radeon_info *info, uint32_t *raster_config_p,
+ uint32_t *raster_config_1_p, uint32_t *se_tile_repeat_p);
+void ac_get_harvested_configs(struct radeon_info *info, unsigned raster_config,
+ unsigned *cik_raster_config_1_p, unsigned *raster_config_se);
+unsigned ac_get_compute_resource_limits(struct radeon_info *info, unsigned waves_per_threadgroup,
+ unsigned max_waves_per_sh, unsigned threadgroups_per_cu);
#ifdef __cplusplus
}
#include "ac_rtld.h"
+#include "ac_binary.h"
+#include "ac_gpu_info.h"
+#include "util/u_dynarray.h"
+#include "util/u_math.h"
+
#include <gelf.h>
#include <libelf.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
-#include "ac_binary.h"
-#include "ac_gpu_info.h"
-#include "util/u_dynarray.h"
-#include "util/u_math.h"
-
// Old distributions may not have this enum constant
#define MY_EM_AMDGPU 224
#endif
#ifndef R_AMDGPU_NONE
-#define R_AMDGPU_NONE 0
-#define R_AMDGPU_ABS32_LO 1
-#define R_AMDGPU_ABS32_HI 2
-#define R_AMDGPU_ABS64 3
-#define R_AMDGPU_REL32 4
-#define R_AMDGPU_REL64 5
-#define R_AMDGPU_ABS32 6
-#define R_AMDGPU_GOTPCREL 7
+#define R_AMDGPU_NONE 0
+#define R_AMDGPU_ABS32_LO 1
+#define R_AMDGPU_ABS32_HI 2
+#define R_AMDGPU_ABS64 3
+#define R_AMDGPU_REL32 4
+#define R_AMDGPU_REL64 5
+#define R_AMDGPU_ABS32 6
+#define R_AMDGPU_GOTPCREL 7
#define R_AMDGPU_GOTPCREL32_LO 8
#define R_AMDGPU_GOTPCREL32_HI 9
-#define R_AMDGPU_REL32_LO 10
-#define R_AMDGPU_REL32_HI 11
-#define R_AMDGPU_RELATIVE64 13
+#define R_AMDGPU_REL32_LO 10
+#define R_AMDGPU_REL32_HI 11
+#define R_AMDGPU_RELATIVE64 13
#endif
/* For the UMR disassembler. */
-#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
-#define DEBUGGER_NUM_MARKERS 5
+#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
+#define DEBUGGER_NUM_MARKERS 5
struct ac_rtld_section {
- bool is_rx : 1;
- bool is_pasted_text : 1;
- uint64_t offset;
- const char *name;
+ bool is_rx : 1;
+ bool is_pasted_text : 1;
+ uint64_t offset;
+ const char *name;
};
struct ac_rtld_part {
- Elf *elf;
- struct ac_rtld_section *sections;
- unsigned num_sections;
+ Elf *elf;
+ struct ac_rtld_section *sections;
+ unsigned num_sections;
};
static void report_erroraf(const char *fmt, va_list va)
{
- char *msg;
- int ret = vasprintf(&msg, fmt, va);
- if (ret < 0)
- msg = "(vasprintf failed)";
+ char *msg;
+ int ret = vasprintf(&msg, fmt, va);
+ if (ret < 0)
+ msg = "(vasprintf failed)";
- fprintf(stderr, "ac_rtld error: %s\n", msg);
+ fprintf(stderr, "ac_rtld error: %s\n", msg);
- if (ret >= 0)
- free(msg);
+ if (ret >= 0)
+ free(msg);
}
static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
static void report_errorf(const char *fmt, ...)
{
- va_list va;
- va_start(va, fmt);
- report_erroraf(fmt, va);
- va_end(va);
+ va_list va;
+ va_start(va, fmt);
+ report_erroraf(fmt, va);
+ va_end(va);
}
static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
static void report_elf_errorf(const char *fmt, ...)
{
- va_list va;
- va_start(va, fmt);
- report_erroraf(fmt, va);
- va_end(va);
+ va_list va;
+ va_start(va, fmt);
+ report_erroraf(fmt, va);
+ va_end(va);
- fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
+ fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
}
/**
* \p part_idx.
*/
static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
- const char *name, unsigned part_idx)
+ const char *name, unsigned part_idx)
{
- util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
- if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) &&
- !strcmp(name, symbol->name))
- return symbol;
- }
- return 0;
+ util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
+ if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
+ return symbol;
+ }
+ return 0;
}
static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
{
- const struct ac_rtld_symbol *lhs = lhsp;
- const struct ac_rtld_symbol *rhs = rhsp;
- if (rhs->align > lhs->align)
- return 1;
- if (rhs->align < lhs->align)
- return -1;
- return 0;
+ const struct ac_rtld_symbol *lhs = lhsp;
+ const struct ac_rtld_symbol *rhs = rhsp;
+ if (rhs->align > lhs->align)
+ return 1;
+ if (rhs->align < lhs->align)
+ return -1;
+ return 0;
}
/**
* Sort the given symbol list by decreasing alignment and assign offsets.
*/
static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
- uint64_t *ptotal_size)
+ uint64_t *ptotal_size)
{
- qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
+ qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
- uint64_t total_size = *ptotal_size;
+ uint64_t total_size = *ptotal_size;
- for (unsigned i = 0; i < num_symbols; ++i) {
- struct ac_rtld_symbol *s = &symbols[i];
- assert(util_is_power_of_two_nonzero(s->align));
+ for (unsigned i = 0; i < num_symbols; ++i) {
+ struct ac_rtld_symbol *s = &symbols[i];
+ assert(util_is_power_of_two_nonzero(s->align));
- total_size = align64(total_size, s->align);
- s->offset = total_size;
+ total_size = align64(total_size, s->align);
+ s->offset = total_size;
- if (total_size + s->size < total_size) {
- report_errorf("%s: size overflow", __FUNCTION__);
- return false;
- }
+ if (total_size + s->size < total_size) {
+ report_errorf("%s: size overflow", __FUNCTION__);
+ return false;
+ }
- total_size += s->size;
- }
+ total_size += s->size;
+ }
- *ptotal_size = total_size;
- return true;
+ *ptotal_size = total_size;
+ return true;
}
/**
*
* Shared LDS symbols are filtered out.
*/
-static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
- unsigned part_idx,
- Elf_Scn *section,
- uint32_t *lds_end_align)
+static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
+ Elf_Scn *section, uint32_t *lds_end_align)
{
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_elf_errorf(#cond); \
- return false; \
- } \
- } while (false)
-
- struct ac_rtld_part *part = &binary->parts[part_idx];
- Elf64_Shdr *shdr = elf64_getshdr(section);
- uint32_t strtabidx = shdr->sh_link;
- Elf_Data *symbols_data = elf_getdata(section, NULL);
- report_elf_if(!symbols_data);
-
- const Elf64_Sym *symbol = symbols_data->d_buf;
- size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
-
- for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
- struct ac_rtld_symbol s = {};
-
- if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
- /* old-style LDS symbols from initial prototype -- remove eventually */
- s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
- } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
- s.align = MIN2(symbol->st_value, 1u << 16);
- report_if(!util_is_power_of_two_nonzero(s.align));
- } else
- continue;
-
- report_if(symbol->st_size > 1u << 29);
-
- s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
- s.size = symbol->st_size;
- s.part_idx = part_idx;
-
- if (!strcmp(s.name, "__lds_end")) {
- report_elf_if(s.size != 0);
- *lds_end_align = MAX2(*lds_end_align, s.align);
- continue;
- }
-
- const struct ac_rtld_symbol *shared =
- find_symbol(&binary->lds_symbols, s.name, part_idx);
- if (shared) {
- report_elf_if(s.align > shared->align);
- report_elf_if(s.size > shared->size);
- continue;
- }
-
- util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
- }
-
- return true;
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_elf_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ uint32_t strtabidx = shdr->sh_link;
+ Elf_Data *symbols_data = elf_getdata(section, NULL);
+ report_elf_if(!symbols_data);
+
+ const Elf64_Sym *symbol = symbols_data->d_buf;
+ size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
+
+ for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
+ struct ac_rtld_symbol s = {};
+
+ if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
+ /* old-style LDS symbols from initial prototype -- remove eventually */
+ s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
+ } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
+ s.align = MIN2(symbol->st_value, 1u << 16);
+ report_if(!util_is_power_of_two_nonzero(s.align));
+ } else
+ continue;
+
+ report_if(symbol->st_size > 1u << 29);
+
+ s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
+ s.size = symbol->st_size;
+ s.part_idx = part_idx;
+
+ if (!strcmp(s.name, "__lds_end")) {
+ report_elf_if(s.size != 0);
+ *lds_end_align = MAX2(*lds_end_align, s.align);
+ continue;
+ }
+
+ const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
+ if (shared) {
+ report_elf_if(s.align > shared->align);
+ report_elf_if(s.size > shared->size);
+ continue;
+ }
+
+ util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
+ }
+
+ return true;
#undef report_if
#undef report_elf_if
* \param binary the uninitialized struct
* \param i binary opening parameters
*/
-bool ac_rtld_open(struct ac_rtld_binary *binary,
- struct ac_rtld_open_info i)
+bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
{
- /* One of the libelf implementations
- * (http://www.mr511.de/software/english.htm) requires calling
- * elf_version() before elf_memory().
- */
- elf_version(EV_CURRENT);
-
- memset(binary, 0, sizeof(*binary));
- memcpy(&binary->options, &i.options, sizeof(binary->options));
- binary->wave_size = i.wave_size;
- binary->num_parts = i.num_parts;
- binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
- if (!binary->parts)
- return false;
-
- uint64_t pasted_text_size = 0;
- uint64_t rx_align = 1;
- uint64_t rx_size = 0;
- uint64_t exec_size = 0;
-
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- goto fail; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_elf_errorf(#cond); \
- goto fail; \
- } \
- } while (false)
-
- /* Copy and layout shared LDS symbols. */
- if (i.num_shared_lds_symbols) {
- if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
- i.num_shared_lds_symbols))
- goto fail;
-
- memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
- }
-
- util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol, symbol)
- symbol->part_idx = ~0u;
-
- unsigned max_lds_size = 64 * 1024;
-
- if (i.info->chip_class == GFX6 ||
- (i.shader_type != MESA_SHADER_COMPUTE &&
- i.shader_type != MESA_SHADER_FRAGMENT))
- max_lds_size = 32 * 1024;
-
- uint64_t shared_lds_size = 0;
- if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
- goto fail;
-
- if (shared_lds_size > max_lds_size) {
- fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
- (unsigned)shared_lds_size, max_lds_size);
- goto fail;
- }
- binary->lds_size = shared_lds_size;
-
- /* First pass over all parts: open ELFs, pre-determine the placement of
- * sections in the memory image, and collect and layout private LDS symbols. */
- uint32_t lds_end_align = 0;
-
- if (binary->options.halt_at_entry)
- pasted_text_size += 4;
-
- for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
- struct ac_rtld_part *part = &binary->parts[part_idx];
- unsigned part_lds_symbols_begin =
- util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
-
- part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
- report_elf_if(!part->elf);
-
- const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
- report_elf_if(!ehdr);
- report_if(ehdr->e_machine != MY_EM_AMDGPU);
-
- size_t section_str_index;
- size_t num_shdrs;
- report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);
- report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
-
- part->num_sections = num_shdrs;
- part->sections = calloc(sizeof(*part->sections), num_shdrs);
- report_if(!part->sections);
-
- Elf_Scn *section = NULL;
- while ((section = elf_nextscn(part->elf, section))) {
- Elf64_Shdr *shdr = elf64_getshdr(section);
- struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
- s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
- report_elf_if(!s->name);
-
- /* Cannot actually handle linked objects yet */
- report_elf_if(shdr->sh_addr != 0);
-
- /* Alignment must be 0 or a power of two */
- report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
- uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
-
- if (shdr->sh_flags & SHF_ALLOC &&
- shdr->sh_type != SHT_NOTE) {
- report_if(shdr->sh_flags & SHF_WRITE);
-
- s->is_rx = true;
-
- if (shdr->sh_flags & SHF_EXECINSTR) {
- report_elf_if(shdr->sh_size & 3);
-
- if (!strcmp(s->name, ".text"))
- s->is_pasted_text = true;
-
- exec_size += shdr->sh_size;
- }
-
- if (s->is_pasted_text) {
- s->offset = pasted_text_size;
- pasted_text_size += shdr->sh_size;
- } else {
- rx_align = align(rx_align, sh_align);
- rx_size = align(rx_size, sh_align);
- s->offset = rx_size;
- rx_size += shdr->sh_size;
- }
- } else if (shdr->sh_type == SHT_SYMTAB) {
- if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
- goto fail;
- }
- }
-
- uint64_t part_lds_size = shared_lds_size;
- if (!layout_symbols(
- util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, part_lds_symbols_begin),
- util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - part_lds_symbols_begin,
- &part_lds_size))
- goto fail;
- binary->lds_size = MAX2(binary->lds_size, part_lds_size);
- }
-
- binary->rx_end_markers = pasted_text_size;
- pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
-
- /* __lds_end is a special symbol that points at the end of the memory
- * occupied by other LDS symbols. Its alignment is taken as the
- * maximum of its alignment over all shader parts where it occurs.
- */
- if (lds_end_align) {
- binary->lds_size = align(binary->lds_size, lds_end_align);
-
- struct ac_rtld_symbol *lds_end =
- util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
- lds_end->name = "__lds_end";
- lds_end->size = 0;
- lds_end->align = lds_end_align;
- lds_end->offset = binary->lds_size;
- lds_end->part_idx = ~0u;
- }
-
- if (binary->lds_size > max_lds_size) {
- fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
- (unsigned)binary->lds_size, max_lds_size);
- goto fail;
- }
-
- /* Second pass: Adjust offsets of non-pasted text sections. */
- binary->rx_size = pasted_text_size;
- binary->rx_size = align(binary->rx_size, rx_align);
-
- for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
- struct ac_rtld_part *part = &binary->parts[part_idx];
- size_t num_shdrs;
- elf_getshdrnum(part->elf, &num_shdrs);
-
- for (unsigned j = 0; j < num_shdrs; ++j) {
- struct ac_rtld_section *s = &part->sections[j];
- if (s->is_rx && !s->is_pasted_text)
- s->offset += binary->rx_size;
- }
- }
-
- binary->rx_size += rx_size;
- binary->exec_size = exec_size;
-
- if (i.info->chip_class >= GFX10) {
- /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
- * ahead of the PC, configurable by SH_MEM_CONFIG and
- * S_INST_PREFETCH. This can cause two issues:
- *
- * (1) Crossing a page boundary to an unmapped page. The logic
- * does not distinguish between a required fetch and a "mere"
- * prefetch and will fault.
- *
- * (2) Prefetching instructions that will be changed for a
- * different shader.
- *
- * (2) is not currently an issue because we flush the I$ at IB
- * boundaries, but (1) needs to be addressed. Due to buffer
- * suballocation, we just play it safe.
- */
- binary->rx_size = align(binary->rx_size + 3 * 64, 64);
- }
-
- return true;
+ /* One of the libelf implementations
+ * (http://www.mr511.de/software/english.htm) requires calling
+ * elf_version() before elf_memory().
+ */
+ elf_version(EV_CURRENT);
+
+ memset(binary, 0, sizeof(*binary));
+ memcpy(&binary->options, &i.options, sizeof(binary->options));
+ binary->wave_size = i.wave_size;
+ binary->num_parts = i.num_parts;
+ binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
+ if (!binary->parts)
+ return false;
+
+ uint64_t pasted_text_size = 0;
+ uint64_t rx_align = 1;
+ uint64_t rx_size = 0;
+ uint64_t exec_size = 0;
+
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ goto fail; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_elf_errorf(#cond); \
+ goto fail; \
+ } \
+ } while (false)
+
+ /* Copy and layout shared LDS symbols. */
+ if (i.num_shared_lds_symbols) {
+ if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
+ i.num_shared_lds_symbols))
+ goto fail;
+
+ memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
+ }
+
+ util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
+ symbol->part_idx = ~0u;
+
+ unsigned max_lds_size = 64 * 1024;
+
+ if (i.info->chip_class == GFX6 ||
+ (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
+ max_lds_size = 32 * 1024;
+
+ uint64_t shared_lds_size = 0;
+ if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
+ goto fail;
+
+ if (shared_lds_size > max_lds_size) {
+ fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
+ (unsigned)shared_lds_size, max_lds_size);
+ goto fail;
+ }
+ binary->lds_size = shared_lds_size;
+
+ /* First pass over all parts: open ELFs, pre-determine the placement of
+ * sections in the memory image, and collect and layout private LDS symbols. */
+ uint32_t lds_end_align = 0;
+
+ if (binary->options.halt_at_entry)
+ pasted_text_size += 4;
+
+ for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ unsigned part_lds_symbols_begin =
+ util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
+
+ part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
+ report_elf_if(!part->elf);
+
+ const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
+ report_elf_if(!ehdr);
+ report_if(ehdr->e_machine != MY_EM_AMDGPU);
+
+ size_t section_str_index;
+ size_t num_shdrs;
+ report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);
+ report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
+
+ part->num_sections = num_shdrs;
+ part->sections = calloc(sizeof(*part->sections), num_shdrs);
+ report_if(!part->sections);
+
+ Elf_Scn *section = NULL;
+ while ((section = elf_nextscn(part->elf, section))) {
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
+ s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
+ report_elf_if(!s->name);
+
+ /* Cannot actually handle linked objects yet */
+ report_elf_if(shdr->sh_addr != 0);
+
+ /* Alignment must be 0 or a power of two */
+ report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
+ uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
+
+ if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
+ report_if(shdr->sh_flags & SHF_WRITE);
+
+ s->is_rx = true;
+
+ if (shdr->sh_flags & SHF_EXECINSTR) {
+ report_elf_if(shdr->sh_size & 3);
+
+ if (!strcmp(s->name, ".text"))
+ s->is_pasted_text = true;
+
+ exec_size += shdr->sh_size;
+ }
+
+ if (s->is_pasted_text) {
+ s->offset = pasted_text_size;
+ pasted_text_size += shdr->sh_size;
+ } else {
+ rx_align = align(rx_align, sh_align);
+ rx_size = align(rx_size, sh_align);
+ s->offset = rx_size;
+ rx_size += shdr->sh_size;
+ }
+ } else if (shdr->sh_type == SHT_SYMTAB) {
+ if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
+ goto fail;
+ }
+ }
+
+ uint64_t part_lds_size = shared_lds_size;
+ if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
+ part_lds_symbols_begin),
+ util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
+ part_lds_symbols_begin,
+ &part_lds_size))
+ goto fail;
+ binary->lds_size = MAX2(binary->lds_size, part_lds_size);
+ }
+
+ binary->rx_end_markers = pasted_text_size;
+ pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
+
+ /* __lds_end is a special symbol that points at the end of the memory
+ * occupied by other LDS symbols. Its alignment is taken as the
+ * maximum of its alignment over all shader parts where it occurs.
+ */
+ if (lds_end_align) {
+ binary->lds_size = align(binary->lds_size, lds_end_align);
+
+ struct ac_rtld_symbol *lds_end =
+ util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
+ lds_end->name = "__lds_end";
+ lds_end->size = 0;
+ lds_end->align = lds_end_align;
+ lds_end->offset = binary->lds_size;
+ lds_end->part_idx = ~0u;
+ }
+
+ if (binary->lds_size > max_lds_size) {
+ fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
+ (unsigned)binary->lds_size, max_lds_size);
+ goto fail;
+ }
+
+ /* Second pass: Adjust offsets of non-pasted text sections. */
+ binary->rx_size = pasted_text_size;
+ binary->rx_size = align(binary->rx_size, rx_align);
+
+ for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
+ struct ac_rtld_part *part = &binary->parts[part_idx];
+ size_t num_shdrs;
+ elf_getshdrnum(part->elf, &num_shdrs);
+
+ for (unsigned j = 0; j < num_shdrs; ++j) {
+ struct ac_rtld_section *s = &part->sections[j];
+ if (s->is_rx && !s->is_pasted_text)
+ s->offset += binary->rx_size;
+ }
+ }
+
+ binary->rx_size += rx_size;
+ binary->exec_size = exec_size;
+
+ if (i.info->chip_class >= GFX10) {
+ /* In gfx10, the SQ fetches up to 3 cache lines of 16 dwords
+ * ahead of the PC, configurable by SH_MEM_CONFIG and
+ * S_INST_PREFETCH. This can cause two issues:
+ *
+ * (1) Crossing a page boundary to an unmapped page. The logic
+ * does not distinguish between a required fetch and a "mere"
+ * prefetch and will fault.
+ *
+ * (2) Prefetching instructions that will be changed for a
+ * different shader.
+ *
+ * (2) is not currently an issue because we flush the I$ at IB
+ * boundaries, but (1) needs to be addressed. Due to buffer
+ * suballocation, we just play it safe.
+ */
+ binary->rx_size = align(binary->rx_size + 3 * 64, 64);
+ }
+
+ return true;
#undef report_if
#undef report_elf_if
fail:
- ac_rtld_close(binary);
- return false;
+ ac_rtld_close(binary);
+ return false;
}
void ac_rtld_close(struct ac_rtld_binary *binary)
{
- for (unsigned i = 0; i < binary->num_parts; ++i) {
- struct ac_rtld_part *part = &binary->parts[i];
- free(part->sections);
- elf_end(part->elf);
- }
-
- util_dynarray_fini(&binary->lds_symbols);
- free(binary->parts);
- binary->parts = NULL;
- binary->num_parts = 0;
+ for (unsigned i = 0; i < binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &binary->parts[i];
+ free(part->sections);
+ elf_end(part->elf);
+ }
+
+ util_dynarray_fini(&binary->lds_symbols);
+ free(binary->parts);
+ binary->parts = NULL;
+ binary->num_parts = 0;
}
-static bool get_section_by_name(struct ac_rtld_part *part, const char *name,
- const char **data, size_t *nbytes)
+static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
+ size_t *nbytes)
{
- for (unsigned i = 0; i < part->num_sections; ++i) {
- struct ac_rtld_section *s = &part->sections[i];
- if (s->name && !strcmp(name, s->name)) {
- Elf_Scn *target_scn = elf_getscn(part->elf, i);
- Elf_Data *target_data = elf_getdata(target_scn, NULL);
- if (!target_data) {
- report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
- return false;
- }
-
- *data = target_data->d_buf;
- *nbytes = target_data->d_size;
- return true;
- }
- }
- return false;
+ for (unsigned i = 0; i < part->num_sections; ++i) {
+ struct ac_rtld_section *s = &part->sections[i];
+ if (s->name && !strcmp(name, s->name)) {
+ Elf_Scn *target_scn = elf_getscn(part->elf, i);
+ Elf_Data *target_data = elf_getdata(target_scn, NULL);
+ if (!target_data) {
+ report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
+ return false;
+ }
+
+ *data = target_data->d_buf;
+ *nbytes = target_data->d_size;
+ return true;
+ }
+ }
+ return false;
}
-bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
- const char **data, size_t *nbytes)
+bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
+ size_t *nbytes)
{
- assert(binary->num_parts == 1);
- return get_section_by_name(&binary->parts[0], name, data, nbytes);
+ assert(binary->num_parts == 1);
+ return get_section_by_name(&binary->parts[0], name, data, nbytes);
}
-bool ac_rtld_read_config(const struct radeon_info *info,
- struct ac_rtld_binary *binary,
- struct ac_shader_config *config)
+bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
+ struct ac_shader_config *config)
{
- for (unsigned i = 0; i < binary->num_parts; ++i) {
- struct ac_rtld_part *part = &binary->parts[i];
- const char *config_data;
- size_t config_nbytes;
-
- if (!get_section_by_name(part, ".AMDGPU.config",
- &config_data, &config_nbytes))
- return false;
-
- /* TODO: be precise about scratch use? */
- struct ac_shader_config c = {};
- ac_parse_shader_binary_config(config_data, config_nbytes,
- binary->wave_size, true, info, &c);
-
- config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
- config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
- config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
- config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
- config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave,
- c.scratch_bytes_per_wave);
-
- assert(i == 0 || config->float_mode == c.float_mode);
- config->float_mode = c.float_mode;
-
- /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
- * the main shader part is used. */
- assert(config->spi_ps_input_ena == 0 &&
- config->spi_ps_input_addr == 0);
- config->spi_ps_input_ena = c.spi_ps_input_ena;
- config->spi_ps_input_addr = c.spi_ps_input_addr;
-
- /* TODO: consistently use LDS symbols for this */
- config->lds_size = MAX2(config->lds_size, c.lds_size);
-
- /* TODO: Should we combine these somehow? It's currently only
- * used for radeonsi's compute, where multiple parts aren't used. */
- assert(config->rsrc1 == 0 && config->rsrc2 == 0);
- config->rsrc1 = c.rsrc1;
- config->rsrc2 = c.rsrc2;
- }
-
- return true;
+ for (unsigned i = 0; i < binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &binary->parts[i];
+ const char *config_data;
+ size_t config_nbytes;
+
+ if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
+ return false;
+
+ /* TODO: be precise about scratch use? */
+ struct ac_shader_config c = {};
+ ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, true, info, &c);
+
+ config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
+ config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
+ config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
+ config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
+ config->scratch_bytes_per_wave =
+ MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
+
+ assert(i == 0 || config->float_mode == c.float_mode);
+ config->float_mode = c.float_mode;
+
+ /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
+ * the main shader part is used. */
+ assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
+ config->spi_ps_input_ena = c.spi_ps_input_ena;
+ config->spi_ps_input_addr = c.spi_ps_input_addr;
+
+ /* TODO: consistently use LDS symbols for this */
+ config->lds_size = MAX2(config->lds_size, c.lds_size);
+
+ /* TODO: Should we combine these somehow? It's currently only
+ * used for radeonsi's compute, where multiple parts aren't used. */
+ assert(config->rsrc1 == 0 && config->rsrc2 == 0);
+ config->rsrc1 = c.rsrc1;
+ config->rsrc2 = c.rsrc2;
+ }
+
+ return true;
}
-static bool resolve_symbol(const struct ac_rtld_upload_info *u,
- unsigned part_idx, const Elf64_Sym *sym,
- const char *name, uint64_t *value)
+static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
+ const Elf64_Sym *sym, const char *name, uint64_t *value)
{
- /* TODO: properly disentangle the undef and the LDS cases once
- * STT_AMDGPU_LDS is retired. */
- if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
- const struct ac_rtld_symbol *lds_sym =
- find_symbol(&u->binary->lds_symbols, name, part_idx);
-
- if (lds_sym) {
- *value = lds_sym->offset;
- return true;
- }
-
- /* TODO: resolve from other parts */
-
- if (u->get_external_symbol(u->cb_data, name, value))
- return true;
-
- report_errorf("symbol %s: unknown", name);
- return false;
- }
-
- struct ac_rtld_part *part = &u->binary->parts[part_idx];
- if (sym->st_shndx >= part->num_sections) {
- report_errorf("symbol %s: section out of bounds", name);
- return false;
- }
-
- struct ac_rtld_section *s = &part->sections[sym->st_shndx];
- if (!s->is_rx) {
- report_errorf("symbol %s: bad section", name);
- return false;
- }
-
- uint64_t section_base = u->rx_va + s->offset;
-
- *value = section_base + sym->st_value;
- return true;
+ /* TODO: properly disentangle the undef and the LDS cases once
+ * STT_AMDGPU_LDS is retired. */
+ if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
+ const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
+
+ if (lds_sym) {
+ *value = lds_sym->offset;
+ return true;
+ }
+
+ /* TODO: resolve from other parts */
+
+ if (u->get_external_symbol(u->cb_data, name, value))
+ return true;
+
+ report_errorf("symbol %s: unknown", name);
+ return false;
+ }
+
+ struct ac_rtld_part *part = &u->binary->parts[part_idx];
+ if (sym->st_shndx >= part->num_sections) {
+ report_errorf("symbol %s: section out of bounds", name);
+ return false;
+ }
+
+ struct ac_rtld_section *s = &part->sections[sym->st_shndx];
+ if (!s->is_rx) {
+ report_errorf("symbol %s: bad section", name);
+ return false;
+ }
+
+ uint64_t section_base = u->rx_va + s->offset;
+
+ *value = section_base + sym->st_value;
+ return true;
}
-static bool apply_relocs(const struct ac_rtld_upload_info *u,
- unsigned part_idx, const Elf64_Shdr *reloc_shdr,
- const Elf_Data *reloc_data)
+static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
+ const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
{
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_elf_errorf(#cond); \
- return false; \
- } \
- } while (false)
-
- struct ac_rtld_part *part = &u->binary->parts[part_idx];
- Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
- report_elf_if(!target_scn);
-
- Elf_Data *target_data = elf_getdata(target_scn, NULL);
- report_elf_if(!target_data);
-
- Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
- report_elf_if(!symbols_scn);
-
- Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
- report_elf_if(!symbols_shdr);
- uint32_t strtabidx = symbols_shdr->sh_link;
-
- Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
- report_elf_if(!symbols_data);
-
- const Elf64_Sym *symbols = symbols_data->d_buf;
- size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
-
- struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
- report_if(!s->is_rx);
-
- const char *orig_base = target_data->d_buf;
- char *dst_base = u->rx_ptr + s->offset;
- uint64_t va_base = u->rx_va + s->offset;
-
- Elf64_Rel *rel = reloc_data->d_buf;
- size_t num_relocs = reloc_data->d_size / sizeof(*rel);
- for (size_t i = 0; i < num_relocs; ++i, ++rel) {
- size_t r_sym = ELF64_R_SYM(rel->r_info);
- unsigned r_type = ELF64_R_TYPE(rel->r_info);
-
- const char *orig_ptr = orig_base + rel->r_offset;
- char *dst_ptr = dst_base + rel->r_offset;
- uint64_t va = va_base + rel->r_offset;
-
- uint64_t symbol;
- uint64_t addend;
-
- if (r_sym == STN_UNDEF) {
- symbol = 0;
- } else {
- report_elf_if(r_sym >= num_symbols);
-
- const Elf64_Sym *sym = &symbols[r_sym];
- const char *symbol_name =
- elf_strptr(part->elf, strtabidx, sym->st_name);
- report_elf_if(!symbol_name);
-
- if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
- return false;
- }
-
- /* TODO: Should we also support .rela sections, where the
- * addend is part of the relocation record? */
-
- /* Load the addend from the ELF instead of the destination,
- * because the destination may be in VRAM. */
- switch (r_type) {
- case R_AMDGPU_ABS32:
- case R_AMDGPU_ABS32_LO:
- case R_AMDGPU_ABS32_HI:
- case R_AMDGPU_REL32:
- case R_AMDGPU_REL32_LO:
- case R_AMDGPU_REL32_HI:
- addend = *(const uint32_t *)orig_ptr;
- break;
- case R_AMDGPU_ABS64:
- case R_AMDGPU_REL64:
- addend = *(const uint64_t *)orig_ptr;
- break;
- default:
- report_errorf("unsupported r_type == %u", r_type);
- return false;
- }
-
- uint64_t abs = symbol + addend;
-
- switch (r_type) {
- case R_AMDGPU_ABS32:
- assert((uint32_t)abs == abs);
- case R_AMDGPU_ABS32_LO:
- *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
- break;
- case R_AMDGPU_ABS32_HI:
- *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
- break;
- case R_AMDGPU_ABS64:
- *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
- break;
- case R_AMDGPU_REL32:
- assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
- case R_AMDGPU_REL32_LO:
- *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
- break;
- case R_AMDGPU_REL32_HI:
- *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
- break;
- case R_AMDGPU_REL64:
- *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
- break;
- default:
- unreachable("bad r_type");
- }
- }
-
- return true;
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_elf_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+
+ struct ac_rtld_part *part = &u->binary->parts[part_idx];
+ Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
+ report_elf_if(!target_scn);
+
+ Elf_Data *target_data = elf_getdata(target_scn, NULL);
+ report_elf_if(!target_data);
+
+ Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
+ report_elf_if(!symbols_scn);
+
+ Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
+ report_elf_if(!symbols_shdr);
+ uint32_t strtabidx = symbols_shdr->sh_link;
+
+ Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
+ report_elf_if(!symbols_data);
+
+ const Elf64_Sym *symbols = symbols_data->d_buf;
+ size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
+
+ struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
+ report_if(!s->is_rx);
+
+ const char *orig_base = target_data->d_buf;
+ char *dst_base = u->rx_ptr + s->offset;
+ uint64_t va_base = u->rx_va + s->offset;
+
+ Elf64_Rel *rel = reloc_data->d_buf;
+ size_t num_relocs = reloc_data->d_size / sizeof(*rel);
+ for (size_t i = 0; i < num_relocs; ++i, ++rel) {
+ size_t r_sym = ELF64_R_SYM(rel->r_info);
+ unsigned r_type = ELF64_R_TYPE(rel->r_info);
+
+ const char *orig_ptr = orig_base + rel->r_offset;
+ char *dst_ptr = dst_base + rel->r_offset;
+ uint64_t va = va_base + rel->r_offset;
+
+ uint64_t symbol;
+ uint64_t addend;
+
+ if (r_sym == STN_UNDEF) {
+ symbol = 0;
+ } else {
+ report_elf_if(r_sym >= num_symbols);
+
+ const Elf64_Sym *sym = &symbols[r_sym];
+ const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
+ report_elf_if(!symbol_name);
+
+ if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
+ return false;
+ }
+
+ /* TODO: Should we also support .rela sections, where the
+ * addend is part of the relocation record? */
+
+ /* Load the addend from the ELF instead of the destination,
+ * because the destination may be in VRAM. */
+ switch (r_type) {
+ case R_AMDGPU_ABS32:
+ case R_AMDGPU_ABS32_LO:
+ case R_AMDGPU_ABS32_HI:
+ case R_AMDGPU_REL32:
+ case R_AMDGPU_REL32_LO:
+ case R_AMDGPU_REL32_HI:
+ addend = *(const uint32_t *)orig_ptr;
+ break;
+ case R_AMDGPU_ABS64:
+ case R_AMDGPU_REL64:
+ addend = *(const uint64_t *)orig_ptr;
+ break;
+ default:
+ report_errorf("unsupported r_type == %u", r_type);
+ return false;
+ }
+
+ uint64_t abs = symbol + addend;
+
+ switch (r_type) {
+ case R_AMDGPU_ABS32:
+ assert((uint32_t)abs == abs);
+ case R_AMDGPU_ABS32_LO:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
+ break;
+ case R_AMDGPU_ABS32_HI:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
+ break;
+ case R_AMDGPU_ABS64:
+ *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
+ break;
+ case R_AMDGPU_REL32:
+ assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
+ case R_AMDGPU_REL32_LO:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
+ break;
+ case R_AMDGPU_REL32_HI:
+ *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
+ break;
+ case R_AMDGPU_REL64:
+ *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
+ break;
+ default:
+ unreachable("bad r_type");
+ }
+ }
+
+ return true;
#undef report_if
#undef report_elf_if
*/
bool ac_rtld_upload(struct ac_rtld_upload_info *u)
{
-#define report_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-#define report_elf_if(cond) \
- do { \
- if ((cond)) { \
- report_errorf(#cond); \
- return false; \
- } \
- } while (false)
-
- if (u->binary->options.halt_at_entry) {
- /* s_sethalt 1 */
- *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
- }
-
- /* First pass: upload raw section data and lay out private LDS symbols. */
- for (unsigned i = 0; i < u->binary->num_parts; ++i) {
- struct ac_rtld_part *part = &u->binary->parts[i];
-
- Elf_Scn *section = NULL;
- while ((section = elf_nextscn(part->elf, section))) {
- Elf64_Shdr *shdr = elf64_getshdr(section);
- struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
-
- if (!s->is_rx)
- continue;
-
- report_if(shdr->sh_type != SHT_PROGBITS);
-
- Elf_Data *data = elf_getdata(section, NULL);
- report_elf_if(!data || data->d_size != shdr->sh_size);
- memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
- }
- }
-
- if (u->binary->rx_end_markers) {
- uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
- for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
- *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
- }
-
- /* Second pass: handle relocations, overwriting uploaded data where
- * appropriate. */
- for (unsigned i = 0; i < u->binary->num_parts; ++i) {
- struct ac_rtld_part *part = &u->binary->parts[i];
- Elf_Scn *section = NULL;
- while ((section = elf_nextscn(part->elf, section))) {
- Elf64_Shdr *shdr = elf64_getshdr(section);
- if (shdr->sh_type == SHT_REL) {
- Elf_Data *relocs = elf_getdata(section, NULL);
- report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
- if (!apply_relocs(u, i, shdr, relocs))
- return false;
- } else if (shdr->sh_type == SHT_RELA) {
- report_errorf("SHT_RELA not supported");
- return false;
- }
- }
- }
-
- return true;
+#define report_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+#define report_elf_if(cond) \
+ do { \
+ if ((cond)) { \
+ report_errorf(#cond); \
+ return false; \
+ } \
+ } while (false)
+
+ if (u->binary->options.halt_at_entry) {
+ /* s_sethalt 1 */
+ *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
+ }
+
+ /* First pass: upload raw section data and lay out private LDS symbols. */
+ for (unsigned i = 0; i < u->binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &u->binary->parts[i];
+
+ Elf_Scn *section = NULL;
+ while ((section = elf_nextscn(part->elf, section))) {
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
+
+ if (!s->is_rx)
+ continue;
+
+ report_if(shdr->sh_type != SHT_PROGBITS);
+
+ Elf_Data *data = elf_getdata(section, NULL);
+ report_elf_if(!data || data->d_size != shdr->sh_size);
+ memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
+ }
+ }
+
+ if (u->binary->rx_end_markers) {
+ uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
+ for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
+ *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
+ }
+
+ /* Second pass: handle relocations, overwriting uploaded data where
+ * appropriate. */
+ for (unsigned i = 0; i < u->binary->num_parts; ++i) {
+ struct ac_rtld_part *part = &u->binary->parts[i];
+ Elf_Scn *section = NULL;
+ while ((section = elf_nextscn(part->elf, section))) {
+ Elf64_Shdr *shdr = elf64_getshdr(section);
+ if (shdr->sh_type == SHT_REL) {
+ Elf_Data *relocs = elf_getdata(section, NULL);
+ report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
+ if (!apply_relocs(u, i, shdr, relocs))
+ return false;
+ } else if (shdr->sh_type == SHT_RELA) {
+ report_errorf("SHT_RELA not supported");
+ return false;
+ }
+ }
+ }
+
+ return true;
#undef report_if
#undef report_elf_if
#ifndef AC_RTLD_H
#define AC_RTLD_H
+#include "compiler/shader_enums.h"
+#include "util/u_dynarray.h"
+
#include <stdbool.h>
-#include <stdint.h>
#include <stddef.h>
-
-#include "util/u_dynarray.h"
-#include "compiler/shader_enums.h"
+#include <stdint.h>
#ifdef __cplusplus
extern "C" {
struct radeon_info;
struct ac_rtld_symbol {
- const char *name;
- uint32_t size;
- uint32_t align;
- uint64_t offset; /* filled in by ac_rtld_open */
- unsigned part_idx; /* shader part in which this symbol appears */
+ const char *name;
+ uint32_t size;
+ uint32_t align;
+ uint64_t offset; /* filled in by ac_rtld_open */
+ unsigned part_idx; /* shader part in which this symbol appears */
};
struct ac_rtld_options {
- /* Loader will insert an s_sethalt 1 instruction as the
- * first instruction. */
- bool halt_at_entry:1;
+ /* Loader will insert an s_sethalt 1 instruction as the
+ * first instruction. */
+ bool halt_at_entry : 1;
};
/* Lightweight wrapper around underlying ELF objects. */
struct ac_rtld_binary {
- struct ac_rtld_options options;
- unsigned wave_size;
+ struct ac_rtld_options options;
+ unsigned wave_size;
- /* Required buffer sizes, currently read/executable only. */
- uint64_t rx_size;
+ /* Required buffer sizes, currently read/executable only. */
+ uint64_t rx_size;
- /* Size of executable code, for reporting purposes. */
- uint64_t exec_size;
+ /* Size of executable code, for reporting purposes. */
+ uint64_t exec_size;
- uint64_t rx_end_markers;
+ uint64_t rx_end_markers;
- unsigned num_parts;
- struct ac_rtld_part *parts;
+ unsigned num_parts;
+ struct ac_rtld_part *parts;
- struct util_dynarray lds_symbols;
- uint32_t lds_size;
+ struct util_dynarray lds_symbols;
+ uint32_t lds_size;
};
/**
* \param value to be filled in by the callback
* \return whether the symbol was found successfully
*/
-typedef bool (*ac_rtld_get_external_symbol_cb)(
- void *cb_data, const char *symbol, uint64_t *value);
+typedef bool (*ac_rtld_get_external_symbol_cb)(void *cb_data, const char *symbol, uint64_t *value);
/**
* Lifetimes of \ref info, in-memory ELF objects, and the names of
* the opened binary.
*/
struct ac_rtld_open_info {
- const struct radeon_info *info;
- struct ac_rtld_options options;
- gl_shader_stage shader_type;
- unsigned wave_size;
-
- unsigned num_parts;
- const char * const *elf_ptrs; /* in-memory ELF objects of each part */
- const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */
-
- /* Shared LDS symbols are layouted such that they are accessible from
- * all shader parts. Non-shared (private) LDS symbols of one part may
- * overlap private LDS symbols of another shader part.
- */
- unsigned num_shared_lds_symbols;
- const struct ac_rtld_symbol *shared_lds_symbols;
+ const struct radeon_info *info;
+ struct ac_rtld_options options;
+ gl_shader_stage shader_type;
+ unsigned wave_size;
+
+ unsigned num_parts;
+ const char *const *elf_ptrs; /* in-memory ELF objects of each part */
+ const size_t *elf_sizes; /* sizes of corresponding in-memory ELF objects in bytes */
+
+ /* Shared LDS symbols are layouted such that they are accessible from
+ * all shader parts. Non-shared (private) LDS symbols of one part may
+ * overlap private LDS symbols of another shader part.
+ */
+ unsigned num_shared_lds_symbols;
+ const struct ac_rtld_symbol *shared_lds_symbols;
};
-bool ac_rtld_open(struct ac_rtld_binary *binary,
- struct ac_rtld_open_info i);
+bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i);
void ac_rtld_close(struct ac_rtld_binary *binary);
-bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
- const char **data, size_t *nbytes);
+bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
+ size_t *nbytes);
-bool ac_rtld_read_config(const struct radeon_info *info,
- struct ac_rtld_binary *binary,
- struct ac_shader_config *config);
+bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
+ struct ac_shader_config *config);
struct ac_rtld_upload_info {
- struct ac_rtld_binary *binary;
+ struct ac_rtld_binary *binary;
- /** GPU mapping of the read/executable buffer. */
- uint64_t rx_va;
+ /** GPU mapping of the read/executable buffer. */
+ uint64_t rx_va;
- /** CPU mapping of the read/executable buffer */
- char *rx_ptr;
+ /** CPU mapping of the read/executable buffer */
+ char *rx_ptr;
- /** Optional callback function that will be queried for symbols not
- * defined in any of the binary's parts. */
- ac_rtld_get_external_symbol_cb get_external_symbol;
+ /** Optional callback function that will be queried for symbols not
+ * defined in any of the binary's parts. */
+ ac_rtld_get_external_symbol_cb get_external_symbol;
- /** Caller-defined data that will be passed to callback functions. */
- void *cb_data;
+ /** Caller-defined data that will be passed to callback functions. */
+ void *cb_data;
};
bool ac_rtld_upload(struct ac_rtld_upload_info *u);
*/
#include "ac_shader_args.h"
+
#include "nir/nir_builder.h"
-void
-ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile,
- unsigned size, enum ac_arg_type type, struct ac_arg *arg)
+void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned size,
+ enum ac_arg_type type, struct ac_arg *arg)
{
- assert(info->arg_count < AC_MAX_ARGS);
+ assert(info->arg_count < AC_MAX_ARGS);
- unsigned offset;
- if (regfile == AC_ARG_SGPR) {
- offset = info->num_sgprs_used;
- info->num_sgprs_used += size;
- } else {
- assert(regfile == AC_ARG_VGPR);
- offset = info->num_vgprs_used;
- info->num_vgprs_used += size;
- }
+ unsigned offset;
+ if (regfile == AC_ARG_SGPR) {
+ offset = info->num_sgprs_used;
+ info->num_sgprs_used += size;
+ } else {
+ assert(regfile == AC_ARG_VGPR);
+ offset = info->num_vgprs_used;
+ info->num_vgprs_used += size;
+ }
- info->args[info->arg_count].file = regfile;
- info->args[info->arg_count].offset = offset;
- info->args[info->arg_count].size = size;
- info->args[info->arg_count].type = type;
+ info->args[info->arg_count].file = regfile;
+ info->args[info->arg_count].offset = offset;
+ info->args[info->arg_count].size = size;
+ info->args[info->arg_count].type = type;
- if (arg) {
- arg->arg_index = info->arg_count;
- arg->used = true;
- }
+ if (arg) {
+ arg->arg_index = info->arg_count;
+ arg->used = true;
+ }
- info->arg_count++;
+ info->arg_count++;
}
-
#ifndef AC_SHADER_ARGS_H
#define AC_SHADER_ARGS_H
-#include <stdint.h>
#include <stdbool.h>
+#include <stdint.h>
#define AC_MAX_INLINE_PUSH_CONSTS 8
-enum ac_arg_regfile {
- AC_ARG_SGPR,
- AC_ARG_VGPR,
+enum ac_arg_regfile
+{
+ AC_ARG_SGPR,
+ AC_ARG_VGPR,
};
-enum ac_arg_type {
- AC_ARG_FLOAT,
- AC_ARG_INT,
- AC_ARG_CONST_PTR, /* Pointer to i8 array */
- AC_ARG_CONST_FLOAT_PTR, /* Pointer to f32 array */
- AC_ARG_CONST_PTR_PTR, /* Pointer to pointer to i8 array */
- AC_ARG_CONST_DESC_PTR, /* Pointer to v4i32 array */
- AC_ARG_CONST_IMAGE_PTR, /* Pointer to v8i32 array */
+enum ac_arg_type
+{
+ AC_ARG_FLOAT,
+ AC_ARG_INT,
+ AC_ARG_CONST_PTR, /* Pointer to i8 array */
+ AC_ARG_CONST_FLOAT_PTR, /* Pointer to f32 array */
+ AC_ARG_CONST_PTR_PTR, /* Pointer to pointer to i8 array */
+ AC_ARG_CONST_DESC_PTR, /* Pointer to v4i32 array */
+ AC_ARG_CONST_IMAGE_PTR, /* Pointer to v8i32 array */
};
struct ac_arg {
- uint8_t arg_index;
- bool used;
+ uint8_t arg_index;
+ bool used;
};
-
#define AC_MAX_ARGS 128
struct ac_shader_args {
- /* Info on how to declare arguments */
- struct {
- enum ac_arg_type type;
- enum ac_arg_regfile file;
- uint8_t offset;
- uint8_t size;
- bool skip;
- } args[AC_MAX_ARGS];
-
- uint8_t arg_count;
- uint8_t sgpr_count;
- uint8_t num_sgprs_used;
- uint8_t num_vgprs_used;
-
- struct ac_arg base_vertex;
- struct ac_arg start_instance;
- struct ac_arg draw_id;
- struct ac_arg vertex_id;
- struct ac_arg instance_id;
- struct ac_arg tcs_patch_id;
- struct ac_arg tcs_rel_ids;
- struct ac_arg tes_patch_id;
- struct ac_arg gs_prim_id;
- struct ac_arg gs_invocation_id;
-
- /* PS */
- struct ac_arg frag_pos[4];
- struct ac_arg front_face;
- struct ac_arg ancillary;
- struct ac_arg sample_coverage;
- struct ac_arg prim_mask;
- struct ac_arg persp_sample;
- struct ac_arg persp_center;
- struct ac_arg persp_centroid;
- struct ac_arg pull_model;
- struct ac_arg linear_sample;
- struct ac_arg linear_center;
- struct ac_arg linear_centroid;
-
- /* CS */
- struct ac_arg local_invocation_ids;
- struct ac_arg num_work_groups;
- struct ac_arg workgroup_ids[3];
- struct ac_arg tg_size;
-
- /* Vulkan only */
- struct ac_arg push_constants;
- struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
- unsigned num_inline_push_consts;
- unsigned base_inline_push_consts;
- struct ac_arg view_index;
+ /* Info on how to declare arguments */
+ struct {
+ enum ac_arg_type type;
+ enum ac_arg_regfile file;
+ uint8_t offset;
+ uint8_t size;
+ bool skip;
+ } args[AC_MAX_ARGS];
+
+ uint8_t arg_count;
+ uint8_t sgpr_count;
+ uint8_t num_sgprs_used;
+ uint8_t num_vgprs_used;
+
+ struct ac_arg base_vertex;
+ struct ac_arg start_instance;
+ struct ac_arg draw_id;
+ struct ac_arg vertex_id;
+ struct ac_arg instance_id;
+ struct ac_arg tcs_patch_id;
+ struct ac_arg tcs_rel_ids;
+ struct ac_arg tes_patch_id;
+ struct ac_arg gs_prim_id;
+ struct ac_arg gs_invocation_id;
+
+ /* PS */
+ struct ac_arg frag_pos[4];
+ struct ac_arg front_face;
+ struct ac_arg ancillary;
+ struct ac_arg sample_coverage;
+ struct ac_arg prim_mask;
+ struct ac_arg persp_sample;
+ struct ac_arg persp_center;
+ struct ac_arg persp_centroid;
+ struct ac_arg pull_model;
+ struct ac_arg linear_sample;
+ struct ac_arg linear_center;
+ struct ac_arg linear_centroid;
+
+ /* CS */
+ struct ac_arg local_invocation_ids;
+ struct ac_arg num_work_groups;
+ struct ac_arg workgroup_ids[3];
+ struct ac_arg tg_size;
+
+ /* Vulkan only */
+ struct ac_arg push_constants;
+ struct ac_arg inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
+ unsigned num_inline_push_consts;
+ unsigned base_inline_push_consts;
+ struct ac_arg view_index;
};
-void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile,
- unsigned registers, enum ac_arg_type type,
- struct ac_arg *arg);
+void ac_add_arg(struct ac_shader_args *info, enum ac_arg_regfile regfile, unsigned registers,
+ enum ac_arg_type type, struct ac_arg *arg);
#endif
-
* IN THE SOFTWARE.
*/
+#include "ac_shader_util.h"
+
+#include "sid.h"
+
#include <assert.h>
#include <stdlib.h>
#include <string.h>
-#include "ac_shader_util.h"
-#include "sid.h"
-
-unsigned
-ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
- bool writes_samplemask)
+unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask)
{
- if (writes_z) {
- /* Z needs 32 bits. */
- if (writes_samplemask)
- return V_028710_SPI_SHADER_32_ABGR;
- else if (writes_stencil)
- return V_028710_SPI_SHADER_32_GR;
- else
- return V_028710_SPI_SHADER_32_R;
- } else if (writes_stencil || writes_samplemask) {
- /* Both stencil and sample mask need only 16 bits. */
- return V_028710_SPI_SHADER_UINT16_ABGR;
- } else {
- return V_028710_SPI_SHADER_ZERO;
- }
+ if (writes_z) {
+ /* Z needs 32 bits. */
+ if (writes_samplemask)
+ return V_028710_SPI_SHADER_32_ABGR;
+ else if (writes_stencil)
+ return V_028710_SPI_SHADER_32_GR;
+ else
+ return V_028710_SPI_SHADER_32_R;
+ } else if (writes_stencil || writes_samplemask) {
+ /* Both stencil and sample mask need only 16 bits. */
+ return V_028710_SPI_SHADER_UINT16_ABGR;
+ } else {
+ return V_028710_SPI_SHADER_ZERO;
+ }
}
-unsigned
-ac_get_cb_shader_mask(unsigned spi_shader_col_format)
+unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format)
{
- unsigned i, cb_shader_mask = 0;
-
- for (i = 0; i < 8; i++) {
- switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
- case V_028714_SPI_SHADER_ZERO:
- break;
- case V_028714_SPI_SHADER_32_R:
- cb_shader_mask |= 0x1 << (i * 4);
- break;
- case V_028714_SPI_SHADER_32_GR:
- cb_shader_mask |= 0x3 << (i * 4);
- break;
- case V_028714_SPI_SHADER_32_AR:
- cb_shader_mask |= 0x9u << (i * 4);
- break;
- case V_028714_SPI_SHADER_FP16_ABGR:
- case V_028714_SPI_SHADER_UNORM16_ABGR:
- case V_028714_SPI_SHADER_SNORM16_ABGR:
- case V_028714_SPI_SHADER_UINT16_ABGR:
- case V_028714_SPI_SHADER_SINT16_ABGR:
- case V_028714_SPI_SHADER_32_ABGR:
- cb_shader_mask |= 0xfu << (i * 4);
- break;
- default:
- assert(0);
- }
- }
- return cb_shader_mask;
+ unsigned i, cb_shader_mask = 0;
+
+ for (i = 0; i < 8; i++) {
+ switch ((spi_shader_col_format >> (i * 4)) & 0xf) {
+ case V_028714_SPI_SHADER_ZERO:
+ break;
+ case V_028714_SPI_SHADER_32_R:
+ cb_shader_mask |= 0x1 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_GR:
+ cb_shader_mask |= 0x3 << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_32_AR:
+ cb_shader_mask |= 0x9u << (i * 4);
+ break;
+ case V_028714_SPI_SHADER_FP16_ABGR:
+ case V_028714_SPI_SHADER_UNORM16_ABGR:
+ case V_028714_SPI_SHADER_SNORM16_ABGR:
+ case V_028714_SPI_SHADER_UINT16_ABGR:
+ case V_028714_SPI_SHADER_SINT16_ABGR:
+ case V_028714_SPI_SHADER_32_ABGR:
+ cb_shader_mask |= 0xfu << (i * 4);
+ break;
+ default:
+ assert(0);
+ }
+ }
+ return cb_shader_mask;
}
/**
* Calculate the appropriate setting of VGT_GS_MODE when \p shader is a
* geometry shader.
*/
-uint32_t
-ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
+uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class)
{
- unsigned cut_mode;
-
- if (gs_max_vert_out <= 128) {
- cut_mode = V_028A40_GS_CUT_128;
- } else if (gs_max_vert_out <= 256) {
- cut_mode = V_028A40_GS_CUT_256;
- } else if (gs_max_vert_out <= 512) {
- cut_mode = V_028A40_GS_CUT_512;
- } else {
- assert(gs_max_vert_out <= 1024);
- cut_mode = V_028A40_GS_CUT_1024;
- }
-
- return S_028A40_MODE(V_028A40_GS_SCENARIO_G) |
- S_028A40_CUT_MODE(cut_mode)|
- S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) |
- S_028A40_GS_WRITE_OPTIMIZE(1) |
- S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
+ unsigned cut_mode;
+
+ if (gs_max_vert_out <= 128) {
+ cut_mode = V_028A40_GS_CUT_128;
+ } else if (gs_max_vert_out <= 256) {
+ cut_mode = V_028A40_GS_CUT_256;
+ } else if (gs_max_vert_out <= 512) {
+ cut_mode = V_028A40_GS_CUT_512;
+ } else {
+ assert(gs_max_vert_out <= 1024);
+ cut_mode = V_028A40_GS_CUT_1024;
+ }
+
+ return S_028A40_MODE(V_028A40_GS_SCENARIO_G) | S_028A40_CUT_MODE(cut_mode) |
+ S_028A40_ES_WRITE_OPTIMIZE(chip_class <= GFX8) | S_028A40_GS_WRITE_OPTIMIZE(1) |
+ S_028A40_ONCHIP(chip_class >= GFX9 ? 1 : 0);
}
/// Translate a (dfmt, nfmt) pair into a chip-appropriate combined format
/// value for LLVM8+ tbuffer intrinsics.
-unsigned
-ac_get_tbuffer_format(enum chip_class chip_class,
- unsigned dfmt, unsigned nfmt)
+unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt)
{
- // Some games try to access vertex buffers without a valid format.
- // This is a game bug, but we should still handle it gracefully.
- if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
- return V_008F0C_IMG_FORMAT_INVALID;
-
- if (chip_class >= GFX10) {
- unsigned format;
- switch (dfmt) {
- default: unreachable("bad dfmt");
- case V_008F0C_BUF_DATA_FORMAT_INVALID: format = V_008F0C_IMG_FORMAT_INVALID; break;
- case V_008F0C_BUF_DATA_FORMAT_8: format = V_008F0C_IMG_FORMAT_8_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_8_8: format = V_008F0C_IMG_FORMAT_8_8_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_16: format = V_008F0C_IMG_FORMAT_16_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_16_16: format = V_008F0C_IMG_FORMAT_16_16_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: format = V_008F0C_IMG_FORMAT_16_16_16_16_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_32: format = V_008F0C_IMG_FORMAT_32_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_32_32: format = V_008F0C_IMG_FORMAT_32_32_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: format = V_008F0C_IMG_FORMAT_32_32_32_32_UINT; break;
- case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: format = V_008F0C_IMG_FORMAT_2_10_10_10_UINT; break;
- }
-
- // Use the regularity properties of the combined format enum.
- //
- // Note: float is incompatible with 8-bit data formats,
- // [us]{norm,scaled} are incomparible with 32-bit data formats.
- // [us]scaled are not writable.
- switch (nfmt) {
- case V_008F0C_BUF_NUM_FORMAT_UNORM: format -= 4; break;
- case V_008F0C_BUF_NUM_FORMAT_SNORM: format -= 3; break;
- case V_008F0C_BUF_NUM_FORMAT_USCALED: format -= 2; break;
- case V_008F0C_BUF_NUM_FORMAT_SSCALED: format -= 1; break;
- default: unreachable("bad nfmt");
- case V_008F0C_BUF_NUM_FORMAT_UINT: break;
- case V_008F0C_BUF_NUM_FORMAT_SINT: format += 1; break;
- case V_008F0C_BUF_NUM_FORMAT_FLOAT: format += 2; break;
- }
-
- return format;
- } else {
- return dfmt | (nfmt << 4);
- }
+ // Some games try to access vertex buffers without a valid format.
+ // This is a game bug, but we should still handle it gracefully.
+ if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
+ return V_008F0C_IMG_FORMAT_INVALID;
+
+ if (chip_class >= GFX10) {
+ unsigned format;
+ switch (dfmt) {
+ default:
+ unreachable("bad dfmt");
+ case V_008F0C_BUF_DATA_FORMAT_INVALID:
+ format = V_008F0C_IMG_FORMAT_INVALID;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_8:
+ format = V_008F0C_IMG_FORMAT_8_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_8_8:
+ format = V_008F0C_IMG_FORMAT_8_8_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
+ format = V_008F0C_IMG_FORMAT_8_8_8_8_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_16:
+ format = V_008F0C_IMG_FORMAT_16_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_16_16:
+ format = V_008F0C_IMG_FORMAT_16_16_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
+ format = V_008F0C_IMG_FORMAT_16_16_16_16_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_32:
+ format = V_008F0C_IMG_FORMAT_32_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_32_32:
+ format = V_008F0C_IMG_FORMAT_32_32_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_32_32_32:
+ format = V_008F0C_IMG_FORMAT_32_32_32_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
+ format = V_008F0C_IMG_FORMAT_32_32_32_32_UINT;
+ break;
+ case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
+ format = V_008F0C_IMG_FORMAT_2_10_10_10_UINT;
+ break;
+ }
+
+ // Use the regularity properties of the combined format enum.
+ //
+ // Note: float is incompatible with 8-bit data formats,
+ // [us]{norm,scaled} are incomparible with 32-bit data formats.
+ // [us]scaled are not writable.
+ switch (nfmt) {
+ case V_008F0C_BUF_NUM_FORMAT_UNORM:
+ format -= 4;
+ break;
+ case V_008F0C_BUF_NUM_FORMAT_SNORM:
+ format -= 3;
+ break;
+ case V_008F0C_BUF_NUM_FORMAT_USCALED:
+ format -= 2;
+ break;
+ case V_008F0C_BUF_NUM_FORMAT_SSCALED:
+ format -= 1;
+ break;
+ default:
+ unreachable("bad nfmt");
+ case V_008F0C_BUF_NUM_FORMAT_UINT:
+ break;
+ case V_008F0C_BUF_NUM_FORMAT_SINT:
+ format += 1;
+ break;
+ case V_008F0C_BUF_NUM_FORMAT_FLOAT:
+ format += 2;
+ break;
+ }
+
+ return format;
+ } else {
+ return dfmt | (nfmt << 4);
+ }
}
static const struct ac_data_format_info data_format_table[] = {
- [V_008F0C_BUF_DATA_FORMAT_INVALID] = { 0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID },
- [V_008F0C_BUF_DATA_FORMAT_8] = { 1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8 },
- [V_008F0C_BUF_DATA_FORMAT_16] = { 2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16 },
- [V_008F0C_BUF_DATA_FORMAT_8_8] = { 2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8 },
- [V_008F0C_BUF_DATA_FORMAT_32] = { 4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32 },
- [V_008F0C_BUF_DATA_FORMAT_16_16] = { 4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16 },
- [V_008F0C_BUF_DATA_FORMAT_10_11_11] = { 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11 },
- [V_008F0C_BUF_DATA_FORMAT_11_11_10] = { 4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10 },
- [V_008F0C_BUF_DATA_FORMAT_10_10_10_2] = { 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2 },
- [V_008F0C_BUF_DATA_FORMAT_2_10_10_10] = { 4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10 },
- [V_008F0C_BUF_DATA_FORMAT_8_8_8_8] = { 4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8 },
- [V_008F0C_BUF_DATA_FORMAT_32_32] = { 8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32 },
- [V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = { 8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16 },
- [V_008F0C_BUF_DATA_FORMAT_32_32_32] = { 12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32 },
- [V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = { 16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32 },
+ [V_008F0C_BUF_DATA_FORMAT_INVALID] = {0, 4, 0, V_008F0C_BUF_DATA_FORMAT_INVALID},
+ [V_008F0C_BUF_DATA_FORMAT_8] = {1, 1, 1, V_008F0C_BUF_DATA_FORMAT_8},
+ [V_008F0C_BUF_DATA_FORMAT_16] = {2, 1, 2, V_008F0C_BUF_DATA_FORMAT_16},
+ [V_008F0C_BUF_DATA_FORMAT_8_8] = {2, 2, 1, V_008F0C_BUF_DATA_FORMAT_8},
+ [V_008F0C_BUF_DATA_FORMAT_32] = {4, 1, 4, V_008F0C_BUF_DATA_FORMAT_32},
+ [V_008F0C_BUF_DATA_FORMAT_16_16] = {4, 2, 2, V_008F0C_BUF_DATA_FORMAT_16},
+ [V_008F0C_BUF_DATA_FORMAT_10_11_11] = {4, 3, 0, V_008F0C_BUF_DATA_FORMAT_10_11_11},
+ [V_008F0C_BUF_DATA_FORMAT_11_11_10] = {4, 3, 0, V_008F0C_BUF_DATA_FORMAT_11_11_10},
+ [V_008F0C_BUF_DATA_FORMAT_10_10_10_2] = {4, 4, 0, V_008F0C_BUF_DATA_FORMAT_10_10_10_2},
+ [V_008F0C_BUF_DATA_FORMAT_2_10_10_10] = {4, 4, 0, V_008F0C_BUF_DATA_FORMAT_2_10_10_10},
+ [V_008F0C_BUF_DATA_FORMAT_8_8_8_8] = {4, 4, 1, V_008F0C_BUF_DATA_FORMAT_8},
+ [V_008F0C_BUF_DATA_FORMAT_32_32] = {8, 2, 4, V_008F0C_BUF_DATA_FORMAT_32},
+ [V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = {8, 4, 2, V_008F0C_BUF_DATA_FORMAT_16},
+ [V_008F0C_BUF_DATA_FORMAT_32_32_32] = {12, 3, 4, V_008F0C_BUF_DATA_FORMAT_32},
+ [V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = {16, 4, 4, V_008F0C_BUF_DATA_FORMAT_32},
};
-const struct ac_data_format_info *
-ac_get_data_format_info(unsigned dfmt)
+const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt)
{
- assert(dfmt < ARRAY_SIZE(data_format_table));
- return &data_format_table[dfmt];
+ assert(dfmt < ARRAY_SIZE(data_format_table));
+ return &data_format_table[dfmt];
}
-enum ac_image_dim
-ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
- bool is_array)
+enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
+ bool is_array)
{
- switch (dim) {
- case GLSL_SAMPLER_DIM_1D:
- if (chip_class == GFX9)
- return is_array ? ac_image_2darray : ac_image_2d;
- return is_array ? ac_image_1darray : ac_image_1d;
- case GLSL_SAMPLER_DIM_2D:
- case GLSL_SAMPLER_DIM_RECT:
- case GLSL_SAMPLER_DIM_EXTERNAL:
- return is_array ? ac_image_2darray : ac_image_2d;
- case GLSL_SAMPLER_DIM_3D:
- return ac_image_3d;
- case GLSL_SAMPLER_DIM_CUBE:
- return ac_image_cube;
- case GLSL_SAMPLER_DIM_MS:
- return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
- case GLSL_SAMPLER_DIM_SUBPASS:
- return ac_image_2darray;
- case GLSL_SAMPLER_DIM_SUBPASS_MS:
- return ac_image_2darraymsaa;
- default:
- unreachable("bad sampler dim");
- }
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ if (chip_class == GFX9)
+ return is_array ? ac_image_2darray : ac_image_2d;
+ return is_array ? ac_image_1darray : ac_image_1d;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ return is_array ? ac_image_2darray : ac_image_2d;
+ case GLSL_SAMPLER_DIM_3D:
+ return ac_image_3d;
+ case GLSL_SAMPLER_DIM_CUBE:
+ return ac_image_cube;
+ case GLSL_SAMPLER_DIM_MS:
+ return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
+ case GLSL_SAMPLER_DIM_SUBPASS:
+ return ac_image_2darray;
+ case GLSL_SAMPLER_DIM_SUBPASS_MS:
+ return ac_image_2darraymsaa;
+ default:
+ unreachable("bad sampler dim");
+ }
}
-enum ac_image_dim
-ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
- bool is_array)
+enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
+ bool is_array)
{
- enum ac_image_dim dim = ac_get_sampler_dim(chip_class, sdim, is_array);
-
- /* Match the resource type set in the descriptor. */
- if (dim == ac_image_cube ||
- (chip_class <= GFX8 && dim == ac_image_3d))
- dim = ac_image_2darray;
- else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && chip_class == GFX9) {
- /* When a single layer of a 3D texture is bound, the shader
- * will refer to a 2D target, but the descriptor has a 3D type.
- * Since the HW ignores BASE_ARRAY in this case, we need to
- * send 3 coordinates. This doesn't hurt when the underlying
- * texture is non-3D.
- */
- dim = ac_image_3d;
- }
-
- return dim;
+ enum ac_image_dim dim = ac_get_sampler_dim(chip_class, sdim, is_array);
+
+ /* Match the resource type set in the descriptor. */
+ if (dim == ac_image_cube || (chip_class <= GFX8 && dim == ac_image_3d))
+ dim = ac_image_2darray;
+ else if (sdim == GLSL_SAMPLER_DIM_2D && !is_array && chip_class == GFX9) {
+ /* When a single layer of a 3D texture is bound, the shader
+ * will refer to a 2D target, but the descriptor has a 3D type.
+ * Since the HW ignores BASE_ARRAY in this case, we need to
+ * send 3 coordinates. This doesn't hurt when the underlying
+ * texture is non-3D.
+ */
+ dim = ac_image_3d;
+ }
+
+ return dim;
}
-unsigned
-ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
- signed char *face_vgpr_index_ptr,
- signed char *ancillary_vgpr_index_ptr)
+unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
+ signed char *face_vgpr_index_ptr,
+ signed char *ancillary_vgpr_index_ptr)
{
- unsigned num_input_vgprs = 0;
- signed char face_vgpr_index = -1;
- signed char ancillary_vgpr_index = -1;
-
- if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 2;
- if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 2;
- if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 2;
- if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 3;
- if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 2;
- if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 2;
- if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 2;
- if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 1;
- if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 1;
- if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 1;
- if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 1;
- if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 1;
- if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) {
- face_vgpr_index = num_input_vgprs;
- num_input_vgprs += 1;
- }
- if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) {
- ancillary_vgpr_index = num_input_vgprs;
- num_input_vgprs += 1;
- }
- if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 1;
- if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
- num_input_vgprs += 1;
-
- if (face_vgpr_index_ptr)
- *face_vgpr_index_ptr = face_vgpr_index;
- if (ancillary_vgpr_index_ptr)
- *ancillary_vgpr_index_ptr = ancillary_vgpr_index;
-
- return num_input_vgprs;
+ unsigned num_input_vgprs = 0;
+ signed char face_vgpr_index = -1;
+ signed char ancillary_vgpr_index = -1;
+
+ if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 3;
+ if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 2;
+ if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) {
+ face_vgpr_index = num_input_vgprs;
+ num_input_vgprs += 1;
+ }
+ if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) {
+ ancillary_vgpr_index = num_input_vgprs;
+ num_input_vgprs += 1;
+ }
+ if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 1;
+ if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
+ num_input_vgprs += 1;
+
+ if (face_vgpr_index_ptr)
+ *face_vgpr_index_ptr = face_vgpr_index;
+ if (ancillary_vgpr_index_ptr)
+ *ancillary_vgpr_index_ptr = ancillary_vgpr_index;
+
+ return num_input_vgprs;
}
-void ac_choose_spi_color_formats(unsigned format, unsigned swap,
- unsigned ntype, bool is_depth,
- struct ac_spi_color_formats *formats)
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+ struct ac_spi_color_formats *formats)
{
/* Alpha is needed for alpha-to-coverage.
* Blending may be with or without alpha.
#ifndef AC_SHADER_UTIL_H
#define AC_SHADER_UTIL_H
-#include <stdbool.h>
-#include <stdint.h>
-
-#include "amd_family.h"
#include "ac_binary.h"
+#include "amd_family.h"
#include "compiler/nir/nir.h"
+#include <stdbool.h>
+#include <stdint.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-enum ac_image_dim {
- ac_image_1d,
- ac_image_2d,
- ac_image_3d,
- ac_image_cube, // includes cube arrays
- ac_image_1darray,
- ac_image_2darray,
- ac_image_2dmsaa,
- ac_image_2darraymsaa,
+enum ac_image_dim
+{
+ ac_image_1d,
+ ac_image_2d,
+ ac_image_3d,
+ ac_image_cube, // includes cube arrays
+ ac_image_1darray,
+ ac_image_2darray,
+ ac_image_2dmsaa,
+ ac_image_2darraymsaa,
};
struct ac_data_format_info {
- uint8_t element_size;
- uint8_t num_channels;
- uint8_t chan_byte_size;
- uint8_t chan_format;
+ uint8_t element_size;
+ uint8_t num_channels;
+ uint8_t chan_byte_size;
+ uint8_t chan_format;
};
struct ac_spi_color_formats {
- unsigned normal : 8;
- unsigned alpha : 8;
- unsigned blend : 8;
- unsigned blend_alpha : 8;
+ unsigned normal : 8;
+ unsigned alpha : 8;
+ unsigned blend : 8;
+ unsigned blend_alpha : 8;
};
-unsigned
-ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
- bool writes_samplemask);
+unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask);
-unsigned
-ac_get_cb_shader_mask(unsigned spi_shader_col_format);
+unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format);
-uint32_t
-ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);
+uint32_t ac_vgt_gs_mode(unsigned gs_max_vert_out, enum chip_class chip_class);
-unsigned
-ac_get_tbuffer_format(enum chip_class chip_class,
- unsigned dfmt, unsigned nfmt);
+unsigned ac_get_tbuffer_format(enum chip_class chip_class, unsigned dfmt, unsigned nfmt);
-const struct ac_data_format_info *
-ac_get_data_format_info(unsigned dfmt);
+const struct ac_data_format_info *ac_get_data_format_info(unsigned dfmt);
-enum ac_image_dim
-ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
- bool is_array);
+enum ac_image_dim ac_get_sampler_dim(enum chip_class chip_class, enum glsl_sampler_dim dim,
+ bool is_array);
-enum ac_image_dim
-ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
- bool is_array);
+enum ac_image_dim ac_get_image_dim(enum chip_class chip_class, enum glsl_sampler_dim sdim,
+ bool is_array);
-unsigned
-ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
- signed char *face_vgpr_index,
- signed char *ancillary_vgpr_index);
+unsigned ac_get_fs_input_vgpr_cnt(const struct ac_shader_config *config,
+ signed char *face_vgpr_index, signed char *ancillary_vgpr_index);
-void ac_choose_spi_color_formats(unsigned format, unsigned swap,
- unsigned ntype, bool is_depth,
- struct ac_spi_color_formats *formats);
+void ac_choose_spi_color_formats(unsigned format, unsigned swap, unsigned ntype, bool is_depth,
+ struct ac_spi_color_formats *formats);
#ifdef __cplusplus
}
*/
#include "ac_shadowed_regs.h"
+
#include "ac_debug.h"
#include "sid.h"
#include "util/macros.h"
#include "util/u_debug.h"
+
#include <stdio.h>
static const struct ac_reg_range Gfx9UserConfigShadowRange[] = {
VGT_DMA_PRIMITIVE_TYPE,
VGT_DMA_LS_HS_CONFIG - VGT_DMA_PRIMITIVE_TYPE + 4,
},*/
- /* VGT_INDEX_TYPE and VGT_DMA_INDEX_TYPE are a special case and neither of these should be shadowed. */
+ /* VGT_INDEX_TYPE and VGT_DMA_INDEX_TYPE are a special case and neither of these should be
+ shadowed. */
{
R_028A7C_VGT_DMA_INDEX_TYPE,
4,
VGT_DMA_PRIMITIVE_TYPE,
VGT_DMA_LS_HS_CONFIG - VGT_DMA_PRIMITIVE_TYPE + 4,
},*/
- /* VGT_INDEX_TYPE and VGT_DMA_INDEX_TYPE are a special case and neither of these should be shadowed. */
+ /* VGT_INDEX_TYPE and VGT_DMA_INDEX_TYPE are a special case and neither of these should be
+ shadowed. */
{
R_028A7C_VGT_DMA_INDEX_TYPE,
4,
enum ac_reg_range_type type, unsigned *num_ranges,
const struct ac_reg_range **ranges)
{
-#define RETURN(array) do { *ranges = array; *num_ranges = ARRAY_SIZE(array); } while (0)
+#define RETURN(array) \
+ do { \
+ *ranges = array; \
+ *num_ranges = ARRAY_SIZE(array); \
+ } while (0)
*num_ranges = 0;
*ranges = NULL;
case SI_REG_RANGE_SH:
if (chip_class == GFX10_3 || chip_class == GFX10)
RETURN(Gfx10ShShadowRange);
- else if (family == CHIP_RAVEN2 ||
- family == CHIP_RENOIR)
+ else if (family == CHIP_RAVEN2 || family == CHIP_RENOIR)
RETURN(Gfx9ShShadowRangeRaven2);
else if (chip_class == GFX9)
RETURN(Gfx9ShShadowRange);
case SI_REG_RANGE_CS_SH:
if (chip_class == GFX10_3 || chip_class == GFX10)
RETURN(Gfx10CsShShadowRange);
- else if (family == CHIP_RAVEN2 ||
- family == CHIP_RENOIR)
+ else if (family == CHIP_RAVEN2 || family == CHIP_RENOIR)
RETURN(Gfx9CsShShadowRangeRaven2);
else if (chip_class == GFX9)
RETURN(Gfx9CsShShadowRange);
set_context_reg_seq_array_fn set_context_reg_seq_array)
{
static const uint32_t DbRenderControlGfx9[] = {
- 0x0 , // DB_RENDER_CONTROL
- 0x0 , // DB_COUNT_CONTROL
- 0x0 , // DB_DEPTH_VIEW
- 0x0 , // DB_RENDER_OVERRIDE
- 0x0 , // DB_RENDER_OVERRIDE2
- 0x0 , // DB_HTILE_DATA_BASE
- 0x0 , // DB_HTILE_DATA_BASE_HI
- 0x0 , // DB_DEPTH_SIZE
- 0x0 , // DB_DEPTH_BOUNDS_MIN
- 0x0 , // DB_DEPTH_BOUNDS_MAX
- 0x0 , // DB_STENCIL_CLEAR
- 0x0 , // DB_DEPTH_CLEAR
- 0x0 , // PA_SC_SCREEN_SCISSOR_TL
+ 0x0, // DB_RENDER_CONTROL
+ 0x0, // DB_COUNT_CONTROL
+ 0x0, // DB_DEPTH_VIEW
+ 0x0, // DB_RENDER_OVERRIDE
+ 0x0, // DB_RENDER_OVERRIDE2
+ 0x0, // DB_HTILE_DATA_BASE
+ 0x0, // DB_HTILE_DATA_BASE_HI
+ 0x0, // DB_DEPTH_SIZE
+ 0x0, // DB_DEPTH_BOUNDS_MIN
+ 0x0, // DB_DEPTH_BOUNDS_MAX
+ 0x0, // DB_STENCIL_CLEAR
+ 0x0, // DB_DEPTH_CLEAR
+ 0x0, // PA_SC_SCREEN_SCISSOR_TL
0x40004000, // PA_SC_SCREEN_SCISSOR_BR
- 0x0 , // DB_Z_INFO
- 0x0 , // DB_STENCIL_INFO
- 0x0 , // DB_Z_READ_BASE
- 0x0 , // DB_Z_READ_BASE_HI
- 0x0 , // DB_STENCIL_READ_BASE
- 0x0 , // DB_STENCIL_READ_BASE_HI
- 0x0 , // DB_Z_WRITE_BASE
- 0x0 , // DB_Z_WRITE_BASE_HI
- 0x0 , // DB_STENCIL_WRITE_BASE
- 0x0 , // DB_STENCIL_WRITE_BASE_HI
- 0x0 , // DB_DFSM_CONTROL
- 0x0 , //
- 0x0 , // DB_Z_INFO2
- 0x0 , // DB_STENCIL_INFO2
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // TA_BC_BASE_ADDR
+ 0x0, // DB_Z_INFO
+ 0x0, // DB_STENCIL_INFO
+ 0x0, // DB_Z_READ_BASE
+ 0x0, // DB_Z_READ_BASE_HI
+ 0x0, // DB_STENCIL_READ_BASE
+ 0x0, // DB_STENCIL_READ_BASE_HI
+ 0x0, // DB_Z_WRITE_BASE
+ 0x0, // DB_Z_WRITE_BASE_HI
+ 0x0, // DB_STENCIL_WRITE_BASE
+ 0x0, // DB_STENCIL_WRITE_BASE_HI
+ 0x0, // DB_DFSM_CONTROL
+ 0x0, //
+ 0x0, // DB_Z_INFO2
+ 0x0, // DB_STENCIL_INFO2
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // TA_BC_BASE_ADDR
0x0 // TA_BC_BASE_ADDR_HI
};
static const uint32_t CoherDestBaseHi0Gfx9[] = {
- 0x0 , // COHER_DEST_BASE_HI_0
- 0x0 , // COHER_DEST_BASE_HI_1
- 0x0 , // COHER_DEST_BASE_HI_2
- 0x0 , // COHER_DEST_BASE_HI_3
- 0x0 , // COHER_DEST_BASE_2
- 0x0 , // COHER_DEST_BASE_3
- 0x0 , // PA_SC_WINDOW_OFFSET
+ 0x0, // COHER_DEST_BASE_HI_0
+ 0x0, // COHER_DEST_BASE_HI_1
+ 0x0, // COHER_DEST_BASE_HI_2
+ 0x0, // COHER_DEST_BASE_HI_3
+ 0x0, // COHER_DEST_BASE_2
+ 0x0, // COHER_DEST_BASE_3
+ 0x0, // PA_SC_WINDOW_OFFSET
0x80000000, // PA_SC_WINDOW_SCISSOR_TL
0x40004000, // PA_SC_WINDOW_SCISSOR_BR
- 0xffff , // PA_SC_CLIPRECT_RULE
- 0x0 , // PA_SC_CLIPRECT_0_TL
+ 0xffff, // PA_SC_CLIPRECT_RULE
+ 0x0, // PA_SC_CLIPRECT_0_TL
0x40004000, // PA_SC_CLIPRECT_0_BR
- 0x0 , // PA_SC_CLIPRECT_1_TL
+ 0x0, // PA_SC_CLIPRECT_1_TL
0x40004000, // PA_SC_CLIPRECT_1_BR
- 0x0 , // PA_SC_CLIPRECT_2_TL
+ 0x0, // PA_SC_CLIPRECT_2_TL
0x40004000, // PA_SC_CLIPRECT_2_BR
- 0x0 , // PA_SC_CLIPRECT_3_TL
+ 0x0, // PA_SC_CLIPRECT_3_TL
0x40004000, // PA_SC_CLIPRECT_3_BR
0xaa99aaaa, // PA_SC_EDGERULE
- 0x0 , // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0x0, // PA_SU_HARDWARE_SCREEN_OFFSET
0xffffffff, // CB_TARGET_MASK
0xffffffff, // CB_SHADER_MASK
0x80000000, // PA_SC_GENERIC_SCISSOR_TL
0x40004000, // PA_SC_GENERIC_SCISSOR_BR
- 0x0 , // COHER_DEST_BASE_0
- 0x0 , // COHER_DEST_BASE_1
+ 0x0, // COHER_DEST_BASE_0
+ 0x0, // COHER_DEST_BASE_1
0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
- 0x0 , // PA_SC_VPORT_ZMIN_0
+ 0x0, // PA_SC_VPORT_ZMIN_0
0x3f800000, // PA_SC_VPORT_ZMAX_0
- 0x0 , // PA_SC_VPORT_ZMIN_1
+ 0x0, // PA_SC_VPORT_ZMIN_1
0x3f800000, // PA_SC_VPORT_ZMAX_1
- 0x0 , // PA_SC_VPORT_ZMIN_2
+ 0x0, // PA_SC_VPORT_ZMIN_2
0x3f800000, // PA_SC_VPORT_ZMAX_2
- 0x0 , // PA_SC_VPORT_ZMIN_3
+ 0x0, // PA_SC_VPORT_ZMIN_3
0x3f800000, // PA_SC_VPORT_ZMAX_3
- 0x0 , // PA_SC_VPORT_ZMIN_4
+ 0x0, // PA_SC_VPORT_ZMIN_4
0x3f800000, // PA_SC_VPORT_ZMAX_4
- 0x0 , // PA_SC_VPORT_ZMIN_5
+ 0x0, // PA_SC_VPORT_ZMIN_5
0x3f800000, // PA_SC_VPORT_ZMAX_5
- 0x0 , // PA_SC_VPORT_ZMIN_6
+ 0x0, // PA_SC_VPORT_ZMIN_6
0x3f800000, // PA_SC_VPORT_ZMAX_6
- 0x0 , // PA_SC_VPORT_ZMIN_7
+ 0x0, // PA_SC_VPORT_ZMIN_7
0x3f800000, // PA_SC_VPORT_ZMAX_7
- 0x0 , // PA_SC_VPORT_ZMIN_8
+ 0x0, // PA_SC_VPORT_ZMIN_8
0x3f800000, // PA_SC_VPORT_ZMAX_8
- 0x0 , // PA_SC_VPORT_ZMIN_9
+ 0x0, // PA_SC_VPORT_ZMIN_9
0x3f800000, // PA_SC_VPORT_ZMAX_9
- 0x0 , // PA_SC_VPORT_ZMIN_10
+ 0x0, // PA_SC_VPORT_ZMIN_10
0x3f800000, // PA_SC_VPORT_ZMAX_10
- 0x0 , // PA_SC_VPORT_ZMIN_11
+ 0x0, // PA_SC_VPORT_ZMIN_11
0x3f800000, // PA_SC_VPORT_ZMAX_11
- 0x0 , // PA_SC_VPORT_ZMIN_12
+ 0x0, // PA_SC_VPORT_ZMIN_12
0x3f800000, // PA_SC_VPORT_ZMAX_12
- 0x0 , // PA_SC_VPORT_ZMIN_13
+ 0x0, // PA_SC_VPORT_ZMIN_13
0x3f800000, // PA_SC_VPORT_ZMAX_13
- 0x0 , // PA_SC_VPORT_ZMIN_14
+ 0x0, // PA_SC_VPORT_ZMIN_14
0x3f800000, // PA_SC_VPORT_ZMAX_14
- 0x0 , // PA_SC_VPORT_ZMIN_15
+ 0x0, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
- 0x0 , // PA_SC_RASTER_CONFIG
- 0x0 , // PA_SC_RASTER_CONFIG_1
- 0x0 , //
+ 0x0, // PA_SC_RASTER_CONFIG
+ 0x0, // PA_SC_RASTER_CONFIG_1
+ 0x0, //
0x0 // PA_SC_TILE_STEERING_OVERRIDE
};
static const uint32_t VgtMultiPrimIbResetIndxGfx9[] = {
- 0x0 // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x0 // VGT_MULTI_PRIM_IB_RESET_INDX
};
static const uint32_t CbBlendRedGfx9[] = {
- 0x0 , // CB_BLEND_RED
- 0x0 , // CB_BLEND_GREEN
- 0x0 , // CB_BLEND_BLUE
- 0x0 , // CB_BLEND_ALPHA
- 0x0 , // CB_DCC_CONTROL
- 0x0 , //
- 0x0 , // DB_STENCIL_CONTROL
- 0x1000000 , // DB_STENCILREFMASK
- 0x1000000 , // DB_STENCILREFMASK_BF
- 0x0 , //
- 0x0 , // PA_CL_VPORT_XSCALE
- 0x0 , // PA_CL_VPORT_XOFFSET
- 0x0 , // PA_CL_VPORT_YSCALE
- 0x0 , // PA_CL_VPORT_YOFFSET
- 0x0 , // PA_CL_VPORT_ZSCALE
- 0x0 , // PA_CL_VPORT_ZOFFSET
- 0x0 , // PA_CL_VPORT_XSCALE_1
- 0x0 , // PA_CL_VPORT_XOFFSET_1
- 0x0 , // PA_CL_VPORT_YSCALE_1
- 0x0 , // PA_CL_VPORT_YOFFSET_1
- 0x0 , // PA_CL_VPORT_ZSCALE_1
- 0x0 , // PA_CL_VPORT_ZOFFSET_1
- 0x0 , // PA_CL_VPORT_XSCALE_2
- 0x0 , // PA_CL_VPORT_XOFFSET_2
- 0x0 , // PA_CL_VPORT_YSCALE_2
- 0x0 , // PA_CL_VPORT_YOFFSET_2
- 0x0 , // PA_CL_VPORT_ZSCALE_2
- 0x0 , // PA_CL_VPORT_ZOFFSET_2
- 0x0 , // PA_CL_VPORT_XSCALE_3
- 0x0 , // PA_CL_VPORT_XOFFSET_3
- 0x0 , // PA_CL_VPORT_YSCALE_3
- 0x0 , // PA_CL_VPORT_YOFFSET_3
- 0x0 , // PA_CL_VPORT_ZSCALE_3
- 0x0 , // PA_CL_VPORT_ZOFFSET_3
- 0x0 , // PA_CL_VPORT_XSCALE_4
- 0x0 , // PA_CL_VPORT_XOFFSET_4
- 0x0 , // PA_CL_VPORT_YSCALE_4
- 0x0 , // PA_CL_VPORT_YOFFSET_4
- 0x0 , // PA_CL_VPORT_ZSCALE_4
- 0x0 , // PA_CL_VPORT_ZOFFSET_4
- 0x0 , // PA_CL_VPORT_XSCALE_5
- 0x0 , // PA_CL_VPORT_XOFFSET_5
- 0x0 , // PA_CL_VPORT_YSCALE_5
- 0x0 , // PA_CL_VPORT_YOFFSET_5
- 0x0 , // PA_CL_VPORT_ZSCALE_5
- 0x0 , // PA_CL_VPORT_ZOFFSET_5
- 0x0 , // PA_CL_VPORT_XSCALE_6
- 0x0 , // PA_CL_VPORT_XOFFSET_6
- 0x0 , // PA_CL_VPORT_YSCALE_6
- 0x0 , // PA_CL_VPORT_YOFFSET_6
- 0x0 , // PA_CL_VPORT_ZSCALE_6
- 0x0 , // PA_CL_VPORT_ZOFFSET_6
- 0x0 , // PA_CL_VPORT_XSCALE_7
- 0x0 , // PA_CL_VPORT_XOFFSET_7
- 0x0 , // PA_CL_VPORT_YSCALE_7
- 0x0 , // PA_CL_VPORT_YOFFSET_7
- 0x0 , // PA_CL_VPORT_ZSCALE_7
- 0x0 , // PA_CL_VPORT_ZOFFSET_7
- 0x0 , // PA_CL_VPORT_XSCALE_8
- 0x0 , // PA_CL_VPORT_XOFFSET_8
- 0x0 , // PA_CL_VPORT_YSCALE_8
- 0x0 , // PA_CL_VPORT_YOFFSET_8
- 0x0 , // PA_CL_VPORT_ZSCALE_8
- 0x0 , // PA_CL_VPORT_ZOFFSET_8
- 0x0 , // PA_CL_VPORT_XSCALE_9
- 0x0 , // PA_CL_VPORT_XOFFSET_9
- 0x0 , // PA_CL_VPORT_YSCALE_9
- 0x0 , // PA_CL_VPORT_YOFFSET_9
- 0x0 , // PA_CL_VPORT_ZSCALE_9
- 0x0 , // PA_CL_VPORT_ZOFFSET_9
- 0x0 , // PA_CL_VPORT_XSCALE_10
- 0x0 , // PA_CL_VPORT_XOFFSET_10
- 0x0 , // PA_CL_VPORT_YSCALE_10
- 0x0 , // PA_CL_VPORT_YOFFSET_10
- 0x0 , // PA_CL_VPORT_ZSCALE_10
- 0x0 , // PA_CL_VPORT_ZOFFSET_10
- 0x0 , // PA_CL_VPORT_XSCALE_11
- 0x0 , // PA_CL_VPORT_XOFFSET_11
- 0x0 , // PA_CL_VPORT_YSCALE_11
- 0x0 , // PA_CL_VPORT_YOFFSET_11
- 0x0 , // PA_CL_VPORT_ZSCALE_11
- 0x0 , // PA_CL_VPORT_ZOFFSET_11
- 0x0 , // PA_CL_VPORT_XSCALE_12
- 0x0 , // PA_CL_VPORT_XOFFSET_12
- 0x0 , // PA_CL_VPORT_YSCALE_12
- 0x0 , // PA_CL_VPORT_YOFFSET_12
- 0x0 , // PA_CL_VPORT_ZSCALE_12
- 0x0 , // PA_CL_VPORT_ZOFFSET_12
- 0x0 , // PA_CL_VPORT_XSCALE_13
- 0x0 , // PA_CL_VPORT_XOFFSET_13
- 0x0 , // PA_CL_VPORT_YSCALE_13
- 0x0 , // PA_CL_VPORT_YOFFSET_13
- 0x0 , // PA_CL_VPORT_ZSCALE_13
- 0x0 , // PA_CL_VPORT_ZOFFSET_13
- 0x0 , // PA_CL_VPORT_XSCALE_14
- 0x0 , // PA_CL_VPORT_XOFFSET_14
- 0x0 , // PA_CL_VPORT_YSCALE_14
- 0x0 , // PA_CL_VPORT_YOFFSET_14
- 0x0 , // PA_CL_VPORT_ZSCALE_14
- 0x0 , // PA_CL_VPORT_ZOFFSET_14
- 0x0 , // PA_CL_VPORT_XSCALE_15
- 0x0 , // PA_CL_VPORT_XOFFSET_15
- 0x0 , // PA_CL_VPORT_YSCALE_15
- 0x0 , // PA_CL_VPORT_YOFFSET_15
- 0x0 , // PA_CL_VPORT_ZSCALE_15
- 0x0 , // PA_CL_VPORT_ZOFFSET_15
- 0x0 , // PA_CL_UCP_0_X
- 0x0 , // PA_CL_UCP_0_Y
- 0x0 , // PA_CL_UCP_0_Z
- 0x0 , // PA_CL_UCP_0_W
- 0x0 , // PA_CL_UCP_1_X
- 0x0 , // PA_CL_UCP_1_Y
- 0x0 , // PA_CL_UCP_1_Z
- 0x0 , // PA_CL_UCP_1_W
- 0x0 , // PA_CL_UCP_2_X
- 0x0 , // PA_CL_UCP_2_Y
- 0x0 , // PA_CL_UCP_2_Z
- 0x0 , // PA_CL_UCP_2_W
- 0x0 , // PA_CL_UCP_3_X
- 0x0 , // PA_CL_UCP_3_Y
- 0x0 , // PA_CL_UCP_3_Z
- 0x0 , // PA_CL_UCP_3_W
- 0x0 , // PA_CL_UCP_4_X
- 0x0 , // PA_CL_UCP_4_Y
- 0x0 , // PA_CL_UCP_4_Z
- 0x0 , // PA_CL_UCP_4_W
- 0x0 , // PA_CL_UCP_5_X
- 0x0 , // PA_CL_UCP_5_Y
- 0x0 , // PA_CL_UCP_5_Z
- 0x0 // PA_CL_UCP_5_W
+ 0x0, // CB_BLEND_RED
+ 0x0, // CB_BLEND_GREEN
+ 0x0, // CB_BLEND_BLUE
+ 0x0, // CB_BLEND_ALPHA
+ 0x0, // CB_DCC_CONTROL
+ 0x0, //
+ 0x0, // DB_STENCIL_CONTROL
+ 0x1000000, // DB_STENCILREFMASK
+ 0x1000000, // DB_STENCILREFMASK_BF
+ 0x0, //
+ 0x0, // PA_CL_VPORT_XSCALE
+ 0x0, // PA_CL_VPORT_XOFFSET
+ 0x0, // PA_CL_VPORT_YSCALE
+ 0x0, // PA_CL_VPORT_YOFFSET
+ 0x0, // PA_CL_VPORT_ZSCALE
+ 0x0, // PA_CL_VPORT_ZOFFSET
+ 0x0, // PA_CL_VPORT_XSCALE_1
+ 0x0, // PA_CL_VPORT_XOFFSET_1
+ 0x0, // PA_CL_VPORT_YSCALE_1
+ 0x0, // PA_CL_VPORT_YOFFSET_1
+ 0x0, // PA_CL_VPORT_ZSCALE_1
+ 0x0, // PA_CL_VPORT_ZOFFSET_1
+ 0x0, // PA_CL_VPORT_XSCALE_2
+ 0x0, // PA_CL_VPORT_XOFFSET_2
+ 0x0, // PA_CL_VPORT_YSCALE_2
+ 0x0, // PA_CL_VPORT_YOFFSET_2
+ 0x0, // PA_CL_VPORT_ZSCALE_2
+ 0x0, // PA_CL_VPORT_ZOFFSET_2
+ 0x0, // PA_CL_VPORT_XSCALE_3
+ 0x0, // PA_CL_VPORT_XOFFSET_3
+ 0x0, // PA_CL_VPORT_YSCALE_3
+ 0x0, // PA_CL_VPORT_YOFFSET_3
+ 0x0, // PA_CL_VPORT_ZSCALE_3
+ 0x0, // PA_CL_VPORT_ZOFFSET_3
+ 0x0, // PA_CL_VPORT_XSCALE_4
+ 0x0, // PA_CL_VPORT_XOFFSET_4
+ 0x0, // PA_CL_VPORT_YSCALE_4
+ 0x0, // PA_CL_VPORT_YOFFSET_4
+ 0x0, // PA_CL_VPORT_ZSCALE_4
+ 0x0, // PA_CL_VPORT_ZOFFSET_4
+ 0x0, // PA_CL_VPORT_XSCALE_5
+ 0x0, // PA_CL_VPORT_XOFFSET_5
+ 0x0, // PA_CL_VPORT_YSCALE_5
+ 0x0, // PA_CL_VPORT_YOFFSET_5
+ 0x0, // PA_CL_VPORT_ZSCALE_5
+ 0x0, // PA_CL_VPORT_ZOFFSET_5
+ 0x0, // PA_CL_VPORT_XSCALE_6
+ 0x0, // PA_CL_VPORT_XOFFSET_6
+ 0x0, // PA_CL_VPORT_YSCALE_6
+ 0x0, // PA_CL_VPORT_YOFFSET_6
+ 0x0, // PA_CL_VPORT_ZSCALE_6
+ 0x0, // PA_CL_VPORT_ZOFFSET_6
+ 0x0, // PA_CL_VPORT_XSCALE_7
+ 0x0, // PA_CL_VPORT_XOFFSET_7
+ 0x0, // PA_CL_VPORT_YSCALE_7
+ 0x0, // PA_CL_VPORT_YOFFSET_7
+ 0x0, // PA_CL_VPORT_ZSCALE_7
+ 0x0, // PA_CL_VPORT_ZOFFSET_7
+ 0x0, // PA_CL_VPORT_XSCALE_8
+ 0x0, // PA_CL_VPORT_XOFFSET_8
+ 0x0, // PA_CL_VPORT_YSCALE_8
+ 0x0, // PA_CL_VPORT_YOFFSET_8
+ 0x0, // PA_CL_VPORT_ZSCALE_8
+ 0x0, // PA_CL_VPORT_ZOFFSET_8
+ 0x0, // PA_CL_VPORT_XSCALE_9
+ 0x0, // PA_CL_VPORT_XOFFSET_9
+ 0x0, // PA_CL_VPORT_YSCALE_9
+ 0x0, // PA_CL_VPORT_YOFFSET_9
+ 0x0, // PA_CL_VPORT_ZSCALE_9
+ 0x0, // PA_CL_VPORT_ZOFFSET_9
+ 0x0, // PA_CL_VPORT_XSCALE_10
+ 0x0, // PA_CL_VPORT_XOFFSET_10
+ 0x0, // PA_CL_VPORT_YSCALE_10
+ 0x0, // PA_CL_VPORT_YOFFSET_10
+ 0x0, // PA_CL_VPORT_ZSCALE_10
+ 0x0, // PA_CL_VPORT_ZOFFSET_10
+ 0x0, // PA_CL_VPORT_XSCALE_11
+ 0x0, // PA_CL_VPORT_XOFFSET_11
+ 0x0, // PA_CL_VPORT_YSCALE_11
+ 0x0, // PA_CL_VPORT_YOFFSET_11
+ 0x0, // PA_CL_VPORT_ZSCALE_11
+ 0x0, // PA_CL_VPORT_ZOFFSET_11
+ 0x0, // PA_CL_VPORT_XSCALE_12
+ 0x0, // PA_CL_VPORT_XOFFSET_12
+ 0x0, // PA_CL_VPORT_YSCALE_12
+ 0x0, // PA_CL_VPORT_YOFFSET_12
+ 0x0, // PA_CL_VPORT_ZSCALE_12
+ 0x0, // PA_CL_VPORT_ZOFFSET_12
+ 0x0, // PA_CL_VPORT_XSCALE_13
+ 0x0, // PA_CL_VPORT_XOFFSET_13
+ 0x0, // PA_CL_VPORT_YSCALE_13
+ 0x0, // PA_CL_VPORT_YOFFSET_13
+ 0x0, // PA_CL_VPORT_ZSCALE_13
+ 0x0, // PA_CL_VPORT_ZOFFSET_13
+ 0x0, // PA_CL_VPORT_XSCALE_14
+ 0x0, // PA_CL_VPORT_XOFFSET_14
+ 0x0, // PA_CL_VPORT_YSCALE_14
+ 0x0, // PA_CL_VPORT_YOFFSET_14
+ 0x0, // PA_CL_VPORT_ZSCALE_14
+ 0x0, // PA_CL_VPORT_ZOFFSET_14
+ 0x0, // PA_CL_VPORT_XSCALE_15
+ 0x0, // PA_CL_VPORT_XOFFSET_15
+ 0x0, // PA_CL_VPORT_YSCALE_15
+ 0x0, // PA_CL_VPORT_YOFFSET_15
+ 0x0, // PA_CL_VPORT_ZSCALE_15
+ 0x0, // PA_CL_VPORT_ZOFFSET_15
+ 0x0, // PA_CL_UCP_0_X
+ 0x0, // PA_CL_UCP_0_Y
+ 0x0, // PA_CL_UCP_0_Z
+ 0x0, // PA_CL_UCP_0_W
+ 0x0, // PA_CL_UCP_1_X
+ 0x0, // PA_CL_UCP_1_Y
+ 0x0, // PA_CL_UCP_1_Z
+ 0x0, // PA_CL_UCP_1_W
+ 0x0, // PA_CL_UCP_2_X
+ 0x0, // PA_CL_UCP_2_Y
+ 0x0, // PA_CL_UCP_2_Z
+ 0x0, // PA_CL_UCP_2_W
+ 0x0, // PA_CL_UCP_3_X
+ 0x0, // PA_CL_UCP_3_Y
+ 0x0, // PA_CL_UCP_3_Z
+ 0x0, // PA_CL_UCP_3_W
+ 0x0, // PA_CL_UCP_4_X
+ 0x0, // PA_CL_UCP_4_Y
+ 0x0, // PA_CL_UCP_4_Z
+ 0x0, // PA_CL_UCP_4_W
+ 0x0, // PA_CL_UCP_5_X
+ 0x0, // PA_CL_UCP_5_Y
+ 0x0, // PA_CL_UCP_5_Z
+ 0x0 // PA_CL_UCP_5_W
};
static const uint32_t SpiPsInputCntl0Gfx9[] = {
- 0x0 , // SPI_PS_INPUT_CNTL_0
- 0x0 , // SPI_PS_INPUT_CNTL_1
- 0x0 , // SPI_PS_INPUT_CNTL_2
- 0x0 , // SPI_PS_INPUT_CNTL_3
- 0x0 , // SPI_PS_INPUT_CNTL_4
- 0x0 , // SPI_PS_INPUT_CNTL_5
- 0x0 , // SPI_PS_INPUT_CNTL_6
- 0x0 , // SPI_PS_INPUT_CNTL_7
- 0x0 , // SPI_PS_INPUT_CNTL_8
- 0x0 , // SPI_PS_INPUT_CNTL_9
- 0x0 , // SPI_PS_INPUT_CNTL_10
- 0x0 , // SPI_PS_INPUT_CNTL_11
- 0x0 , // SPI_PS_INPUT_CNTL_12
- 0x0 , // SPI_PS_INPUT_CNTL_13
- 0x0 , // SPI_PS_INPUT_CNTL_14
- 0x0 , // SPI_PS_INPUT_CNTL_15
- 0x0 , // SPI_PS_INPUT_CNTL_16
- 0x0 , // SPI_PS_INPUT_CNTL_17
- 0x0 , // SPI_PS_INPUT_CNTL_18
- 0x0 , // SPI_PS_INPUT_CNTL_19
- 0x0 , // SPI_PS_INPUT_CNTL_20
- 0x0 , // SPI_PS_INPUT_CNTL_21
- 0x0 , // SPI_PS_INPUT_CNTL_22
- 0x0 , // SPI_PS_INPUT_CNTL_23
- 0x0 , // SPI_PS_INPUT_CNTL_24
- 0x0 , // SPI_PS_INPUT_CNTL_25
- 0x0 , // SPI_PS_INPUT_CNTL_26
- 0x0 , // SPI_PS_INPUT_CNTL_27
- 0x0 , // SPI_PS_INPUT_CNTL_28
- 0x0 , // SPI_PS_INPUT_CNTL_29
- 0x0 , // SPI_PS_INPUT_CNTL_30
- 0x0 , // SPI_PS_INPUT_CNTL_31
- 0x0 , // SPI_VS_OUT_CONFIG
- 0x0 , //
- 0x0 , // SPI_PS_INPUT_ENA
- 0x0 , // SPI_PS_INPUT_ADDR
- 0x0 , // SPI_INTERP_CONTROL_0
- 0x2 , // SPI_PS_IN_CONTROL
- 0x0 , //
- 0x0 , // SPI_BARYC_CNTL
- 0x0 , //
- 0x0 , // SPI_TMPRING_SIZE
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // SPI_SHADER_POS_FORMAT
- 0x0 , // SPI_SHADER_Z_FORMAT
- 0x0 // SPI_SHADER_COL_FORMAT
+ 0x0, // SPI_PS_INPUT_CNTL_0
+ 0x0, // SPI_PS_INPUT_CNTL_1
+ 0x0, // SPI_PS_INPUT_CNTL_2
+ 0x0, // SPI_PS_INPUT_CNTL_3
+ 0x0, // SPI_PS_INPUT_CNTL_4
+ 0x0, // SPI_PS_INPUT_CNTL_5
+ 0x0, // SPI_PS_INPUT_CNTL_6
+ 0x0, // SPI_PS_INPUT_CNTL_7
+ 0x0, // SPI_PS_INPUT_CNTL_8
+ 0x0, // SPI_PS_INPUT_CNTL_9
+ 0x0, // SPI_PS_INPUT_CNTL_10
+ 0x0, // SPI_PS_INPUT_CNTL_11
+ 0x0, // SPI_PS_INPUT_CNTL_12
+ 0x0, // SPI_PS_INPUT_CNTL_13
+ 0x0, // SPI_PS_INPUT_CNTL_14
+ 0x0, // SPI_PS_INPUT_CNTL_15
+ 0x0, // SPI_PS_INPUT_CNTL_16
+ 0x0, // SPI_PS_INPUT_CNTL_17
+ 0x0, // SPI_PS_INPUT_CNTL_18
+ 0x0, // SPI_PS_INPUT_CNTL_19
+ 0x0, // SPI_PS_INPUT_CNTL_20
+ 0x0, // SPI_PS_INPUT_CNTL_21
+ 0x0, // SPI_PS_INPUT_CNTL_22
+ 0x0, // SPI_PS_INPUT_CNTL_23
+ 0x0, // SPI_PS_INPUT_CNTL_24
+ 0x0, // SPI_PS_INPUT_CNTL_25
+ 0x0, // SPI_PS_INPUT_CNTL_26
+ 0x0, // SPI_PS_INPUT_CNTL_27
+ 0x0, // SPI_PS_INPUT_CNTL_28
+ 0x0, // SPI_PS_INPUT_CNTL_29
+ 0x0, // SPI_PS_INPUT_CNTL_30
+ 0x0, // SPI_PS_INPUT_CNTL_31
+ 0x0, // SPI_VS_OUT_CONFIG
+ 0x0, //
+ 0x0, // SPI_PS_INPUT_ENA
+ 0x0, // SPI_PS_INPUT_ADDR
+ 0x0, // SPI_INTERP_CONTROL_0
+ 0x2, // SPI_PS_IN_CONTROL
+ 0x0, //
+ 0x0, // SPI_BARYC_CNTL
+ 0x0, //
+ 0x0, // SPI_TMPRING_SIZE
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // SPI_SHADER_POS_FORMAT
+ 0x0, // SPI_SHADER_Z_FORMAT
+ 0x0 // SPI_SHADER_COL_FORMAT
};
static const uint32_t SxPsDownconvertGfx9[] = {
- 0x0 , // SX_PS_DOWNCONVERT
- 0x0 , // SX_BLEND_OPT_EPSILON
- 0x0 , // SX_BLEND_OPT_CONTROL
- 0x0 , // SX_MRT0_BLEND_OPT
- 0x0 , // SX_MRT1_BLEND_OPT
- 0x0 , // SX_MRT2_BLEND_OPT
- 0x0 , // SX_MRT3_BLEND_OPT
- 0x0 , // SX_MRT4_BLEND_OPT
- 0x0 , // SX_MRT5_BLEND_OPT
- 0x0 , // SX_MRT6_BLEND_OPT
- 0x0 , // SX_MRT7_BLEND_OPT
- 0x0 , // CB_BLEND0_CONTROL
- 0x0 , // CB_BLEND1_CONTROL
- 0x0 , // CB_BLEND2_CONTROL
- 0x0 , // CB_BLEND3_CONTROL
- 0x0 , // CB_BLEND4_CONTROL
- 0x0 , // CB_BLEND5_CONTROL
- 0x0 , // CB_BLEND6_CONTROL
- 0x0 , // CB_BLEND7_CONTROL
- 0x0 , // CB_MRT0_EPITCH
- 0x0 , // CB_MRT1_EPITCH
- 0x0 , // CB_MRT2_EPITCH
- 0x0 , // CB_MRT3_EPITCH
- 0x0 , // CB_MRT4_EPITCH
- 0x0 , // CB_MRT5_EPITCH
- 0x0 , // CB_MRT6_EPITCH
- 0x0 // CB_MRT7_EPITCH
+ 0x0, // SX_PS_DOWNCONVERT
+ 0x0, // SX_BLEND_OPT_EPSILON
+ 0x0, // SX_BLEND_OPT_CONTROL
+ 0x0, // SX_MRT0_BLEND_OPT
+ 0x0, // SX_MRT1_BLEND_OPT
+ 0x0, // SX_MRT2_BLEND_OPT
+ 0x0, // SX_MRT3_BLEND_OPT
+ 0x0, // SX_MRT4_BLEND_OPT
+ 0x0, // SX_MRT5_BLEND_OPT
+ 0x0, // SX_MRT6_BLEND_OPT
+ 0x0, // SX_MRT7_BLEND_OPT
+ 0x0, // CB_BLEND0_CONTROL
+ 0x0, // CB_BLEND1_CONTROL
+ 0x0, // CB_BLEND2_CONTROL
+ 0x0, // CB_BLEND3_CONTROL
+ 0x0, // CB_BLEND4_CONTROL
+ 0x0, // CB_BLEND5_CONTROL
+ 0x0, // CB_BLEND6_CONTROL
+ 0x0, // CB_BLEND7_CONTROL
+ 0x0, // CB_MRT0_EPITCH
+ 0x0, // CB_MRT1_EPITCH
+ 0x0, // CB_MRT2_EPITCH
+ 0x0, // CB_MRT3_EPITCH
+ 0x0, // CB_MRT4_EPITCH
+ 0x0, // CB_MRT5_EPITCH
+ 0x0, // CB_MRT6_EPITCH
+ 0x0 // CB_MRT7_EPITCH
};
static const uint32_t DbDepthControlGfx9[] = {
- 0x0 , // DB_DEPTH_CONTROL
- 0x0 , // DB_EQAA
- 0x0 , // CB_COLOR_CONTROL
- 0x0 , // DB_SHADER_CONTROL
- 0x90000 , // PA_CL_CLIP_CNTL
- 0x4 , // PA_SU_SC_MODE_CNTL
- 0x0 , // PA_CL_VTE_CNTL
- 0x0 , // PA_CL_VS_OUT_CNTL
- 0x0 // PA_CL_NANINF_CNTL
+ 0x0, // DB_DEPTH_CONTROL
+ 0x0, // DB_EQAA
+ 0x0, // CB_COLOR_CONTROL
+ 0x0, // DB_SHADER_CONTROL
+ 0x90000, // PA_CL_CLIP_CNTL
+ 0x4, // PA_SU_SC_MODE_CNTL
+ 0x0, // PA_CL_VTE_CNTL
+ 0x0, // PA_CL_VS_OUT_CNTL
+ 0x0 // PA_CL_NANINF_CNTL
};
static const uint32_t PaSuPrimFilterCntlGfx9[] = {
- 0x0 , // PA_SU_PRIM_FILTER_CNTL
- 0x0 , // PA_SU_SMALL_PRIM_FILTER_CNTL
- 0x0 , // PA_CL_OBJPRIM_ID_CNTL
- 0x0 , // PA_CL_NGG_CNTL
- 0x0 , // PA_SU_OVER_RASTERIZATION_CNTL
- 0x0 // PA_STEREO_CNTL
+ 0x0, // PA_SU_PRIM_FILTER_CNTL
+ 0x0, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0x0, // PA_CL_OBJPRIM_ID_CNTL
+ 0x0, // PA_CL_NGG_CNTL
+ 0x0, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0x0 // PA_STEREO_CNTL
};
static const uint32_t PaSuPointSizeGfx9[] = {
- 0x0 , // PA_SU_POINT_SIZE
- 0x0 , // PA_SU_POINT_MINMAX
- 0x0 , // PA_SU_LINE_CNTL
- 0x0 // PA_SC_LINE_STIPPLE
+ 0x0, // PA_SU_POINT_SIZE
+ 0x0, // PA_SU_POINT_MINMAX
+ 0x0, // PA_SU_LINE_CNTL
+ 0x0 // PA_SC_LINE_STIPPLE
};
static const uint32_t VgtHosMaxTessLevelGfx9[] = {
- 0x0 , // VGT_HOS_MAX_TESS_LEVEL
- 0x0 // VGT_HOS_MIN_TESS_LEVEL
+ 0x0, // VGT_HOS_MAX_TESS_LEVEL
+ 0x0 // VGT_HOS_MIN_TESS_LEVEL
};
static const uint32_t VgtGsModeGfx9[] = {
- 0x0 , // VGT_GS_MODE
- 0x0 , // VGT_GS_ONCHIP_CNTL
- 0x0 , // PA_SC_MODE_CNTL_0
- 0x0 , // PA_SC_MODE_CNTL_1
- 0x0 , // VGT_ENHANCE
- 0x100 , // VGT_GS_PER_ES
- 0x80 , // VGT_ES_PER_GS
- 0x2 , // VGT_GS_PER_VS
- 0x0 , // VGT_GSVS_RING_OFFSET_1
- 0x0 , // VGT_GSVS_RING_OFFSET_2
- 0x0 , // VGT_GSVS_RING_OFFSET_3
- 0x0 // VGT_GS_OUT_PRIM_TYPE
+ 0x0, // VGT_GS_MODE
+ 0x0, // VGT_GS_ONCHIP_CNTL
+ 0x0, // PA_SC_MODE_CNTL_0
+ 0x0, // PA_SC_MODE_CNTL_1
+ 0x0, // VGT_ENHANCE
+ 0x100, // VGT_GS_PER_ES
+ 0x80, // VGT_ES_PER_GS
+ 0x2, // VGT_GS_PER_VS
+ 0x0, // VGT_GSVS_RING_OFFSET_1
+ 0x0, // VGT_GSVS_RING_OFFSET_2
+ 0x0, // VGT_GSVS_RING_OFFSET_3
+ 0x0 // VGT_GS_OUT_PRIM_TYPE
};
static const uint32_t VgtPrimitiveidEnGfx9[] = {
- 0x0 // VGT_PRIMITIVEID_EN
+ 0x0 // VGT_PRIMITIVEID_EN
};
static const uint32_t VgtPrimitiveidResetGfx9[] = {
- 0x0 // VGT_PRIMITIVEID_RESET
+ 0x0 // VGT_PRIMITIVEID_RESET
};
static const uint32_t VgtGsMaxPrimsPerSubgroupGfx9[] = {
- 0x0 , // VGT_GS_MAX_PRIMS_PER_SUBGROUP
- 0x0 , // VGT_DRAW_PAYLOAD_CNTL
- 0x0 , //
- 0x0 , // VGT_INSTANCE_STEP_RATE_0
- 0x0 , // VGT_INSTANCE_STEP_RATE_1
- 0x0 , //
- 0x0 , // VGT_ESGS_RING_ITEMSIZE
- 0x0 , // VGT_GSVS_RING_ITEMSIZE
- 0x0 , // VGT_REUSE_OFF
- 0x0 , // VGT_VTX_CNT_EN
- 0x0 , // DB_HTILE_SURFACE
- 0x0 , // DB_SRESULTS_COMPARE_STATE0
- 0x0 , // DB_SRESULTS_COMPARE_STATE1
- 0x0 , // DB_PRELOAD_CONTROL
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_0
- 0x0 // VGT_STRMOUT_VTX_STRIDE_0
+ 0x0, // VGT_GS_MAX_PRIMS_PER_SUBGROUP
+ 0x0, // VGT_DRAW_PAYLOAD_CNTL
+ 0x0, //
+ 0x0, // VGT_INSTANCE_STEP_RATE_0
+ 0x0, // VGT_INSTANCE_STEP_RATE_1
+ 0x0, //
+ 0x0, // VGT_ESGS_RING_ITEMSIZE
+ 0x0, // VGT_GSVS_RING_ITEMSIZE
+ 0x0, // VGT_REUSE_OFF
+ 0x0, // VGT_VTX_CNT_EN
+ 0x0, // DB_HTILE_SURFACE
+ 0x0, // DB_SRESULTS_COMPARE_STATE0
+ 0x0, // DB_SRESULTS_COMPARE_STATE1
+ 0x0, // DB_PRELOAD_CONTROL
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x0 // VGT_STRMOUT_VTX_STRIDE_0
};
static const uint32_t VgtStrmoutBufferSize1Gfx9[] = {
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_1
- 0x0 // VGT_STRMOUT_VTX_STRIDE_1
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x0 // VGT_STRMOUT_VTX_STRIDE_1
};
static const uint32_t VgtStrmoutBufferSize2Gfx9[] = {
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_2
- 0x0 // VGT_STRMOUT_VTX_STRIDE_2
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x0 // VGT_STRMOUT_VTX_STRIDE_2
};
static const uint32_t VgtStrmoutBufferSize3Gfx9[] = {
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_3
- 0x0 // VGT_STRMOUT_VTX_STRIDE_3
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x0 // VGT_STRMOUT_VTX_STRIDE_3
};
static const uint32_t VgtStrmoutDrawOpaqueOffsetGfx9[] = {
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
- 0x0 // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x0 // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
};
static const uint32_t VgtGsMaxVertOutGfx9[] = {
- 0x0 , // VGT_GS_MAX_VERT_OUT
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // VGT_TESS_DISTRIBUTION
- 0x0 , // VGT_SHADER_STAGES_EN
- 0x0 , // VGT_LS_HS_CONFIG
- 0x0 , // VGT_GS_VERT_ITEMSIZE
- 0x0 , // VGT_GS_VERT_ITEMSIZE_1
- 0x0 , // VGT_GS_VERT_ITEMSIZE_2
- 0x0 , // VGT_GS_VERT_ITEMSIZE_3
- 0x0 , // VGT_TF_PARAM
- 0x0 , // DB_ALPHA_TO_MASK
- 0x0 , // VGT_DISPATCH_DRAW_INDEX
- 0x0 , // PA_SU_POLY_OFFSET_DB_FMT_CNTL
- 0x0 , // PA_SU_POLY_OFFSET_CLAMP
- 0x0 , // PA_SU_POLY_OFFSET_FRONT_SCALE
- 0x0 , // PA_SU_POLY_OFFSET_FRONT_OFFSET
- 0x0 , // PA_SU_POLY_OFFSET_BACK_SCALE
- 0x0 , // PA_SU_POLY_OFFSET_BACK_OFFSET
- 0x0 , // VGT_GS_INSTANCE_CNT
- 0x0 , // VGT_STRMOUT_CONFIG
- 0x0 // VGT_STRMOUT_BUFFER_CONFIG
+ 0x0, // VGT_GS_MAX_VERT_OUT
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // VGT_TESS_DISTRIBUTION
+ 0x0, // VGT_SHADER_STAGES_EN
+ 0x0, // VGT_LS_HS_CONFIG
+ 0x0, // VGT_GS_VERT_ITEMSIZE
+ 0x0, // VGT_GS_VERT_ITEMSIZE_1
+ 0x0, // VGT_GS_VERT_ITEMSIZE_2
+ 0x0, // VGT_GS_VERT_ITEMSIZE_3
+ 0x0, // VGT_TF_PARAM
+ 0x0, // DB_ALPHA_TO_MASK
+ 0x0, // VGT_DISPATCH_DRAW_INDEX
+ 0x0, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x0, // PA_SU_POLY_OFFSET_CLAMP
+ 0x0, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x0, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x0, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x0, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x0, // VGT_GS_INSTANCE_CNT
+ 0x0, // VGT_STRMOUT_CONFIG
+ 0x0 // VGT_STRMOUT_BUFFER_CONFIG
};
static const uint32_t PaScCentroidPriority0Gfx9[] = {
- 0x0 , // PA_SC_CENTROID_PRIORITY_0
- 0x0 , // PA_SC_CENTROID_PRIORITY_1
- 0x1000 , // PA_SC_LINE_CNTL
- 0x0 , // PA_SC_AA_CONFIG
- 0x5 , // PA_SU_VTX_CNTL
+ 0x0, // PA_SC_CENTROID_PRIORITY_0
+ 0x0, // PA_SC_CENTROID_PRIORITY_1
+ 0x1000, // PA_SC_LINE_CNTL
+ 0x0, // PA_SC_AA_CONFIG
+ 0x5, // PA_SU_VTX_CNTL
0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
- 0x0 , // PA_SC_SHADER_CONTROL
- 0x3 , // PA_SC_BINNER_CNTL_0
- 0x0 , // PA_SC_BINNER_CNTL_1
- 0x100000 , // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
- 0x0 , // PA_SC_NGG_MODE_CNTL
- 0x0 , //
- 0x1e , // VGT_VERTEX_REUSE_BLOCK_CNTL
- 0x20 , // VGT_OUT_DEALLOC_CNTL
- 0x0 , // CB_COLOR0_BASE
- 0x0 , // CB_COLOR0_BASE_EXT
- 0x0 , // CB_COLOR0_ATTRIB2
- 0x0 , // CB_COLOR0_VIEW
- 0x0 , // CB_COLOR0_INFO
- 0x0 , // CB_COLOR0_ATTRIB
- 0x0 , // CB_COLOR0_DCC_CONTROL
- 0x0 , // CB_COLOR0_CMASK
- 0x0 , // CB_COLOR0_CMASK_BASE_EXT
- 0x0 , // CB_COLOR0_FMASK
- 0x0 , // CB_COLOR0_FMASK_BASE_EXT
- 0x0 , // CB_COLOR0_CLEAR_WORD0
- 0x0 , // CB_COLOR0_CLEAR_WORD1
- 0x0 , // CB_COLOR0_DCC_BASE
- 0x0 , // CB_COLOR0_DCC_BASE_EXT
- 0x0 , // CB_COLOR1_BASE
- 0x0 , // CB_COLOR1_BASE_EXT
- 0x0 , // CB_COLOR1_ATTRIB2
- 0x0 , // CB_COLOR1_VIEW
- 0x0 , // CB_COLOR1_INFO
- 0x0 , // CB_COLOR1_ATTRIB
- 0x0 , // CB_COLOR1_DCC_CONTROL
- 0x0 , // CB_COLOR1_CMASK
- 0x0 , // CB_COLOR1_CMASK_BASE_EXT
- 0x0 , // CB_COLOR1_FMASK
- 0x0 , // CB_COLOR1_FMASK_BASE_EXT
- 0x0 , // CB_COLOR1_CLEAR_WORD0
- 0x0 , // CB_COLOR1_CLEAR_WORD1
- 0x0 , // CB_COLOR1_DCC_BASE
- 0x0 , // CB_COLOR1_DCC_BASE_EXT
- 0x0 , // CB_COLOR2_BASE
- 0x0 , // CB_COLOR2_BASE_EXT
- 0x0 , // CB_COLOR2_ATTRIB2
- 0x0 , // CB_COLOR2_VIEW
- 0x0 , // CB_COLOR2_INFO
- 0x0 , // CB_COLOR2_ATTRIB
- 0x0 , // CB_COLOR2_DCC_CONTROL
- 0x0 , // CB_COLOR2_CMASK
- 0x0 , // CB_COLOR2_CMASK_BASE_EXT
- 0x0 , // CB_COLOR2_FMASK
- 0x0 , // CB_COLOR2_FMASK_BASE_EXT
- 0x0 , // CB_COLOR2_CLEAR_WORD0
- 0x0 , // CB_COLOR2_CLEAR_WORD1
- 0x0 , // CB_COLOR2_DCC_BASE
- 0x0 , // CB_COLOR2_DCC_BASE_EXT
- 0x0 , // CB_COLOR3_BASE
- 0x0 , // CB_COLOR3_BASE_EXT
- 0x0 , // CB_COLOR3_ATTRIB2
- 0x0 , // CB_COLOR3_VIEW
- 0x0 , // CB_COLOR3_INFO
- 0x0 , // CB_COLOR3_ATTRIB
- 0x0 , // CB_COLOR3_DCC_CONTROL
- 0x0 , // CB_COLOR3_CMASK
- 0x0 , // CB_COLOR3_CMASK_BASE_EXT
- 0x0 , // CB_COLOR3_FMASK
- 0x0 , // CB_COLOR3_FMASK_BASE_EXT
- 0x0 , // CB_COLOR3_CLEAR_WORD0
- 0x0 , // CB_COLOR3_CLEAR_WORD1
- 0x0 , // CB_COLOR3_DCC_BASE
- 0x0 , // CB_COLOR3_DCC_BASE_EXT
- 0x0 , // CB_COLOR4_BASE
- 0x0 , // CB_COLOR4_BASE_EXT
- 0x0 , // CB_COLOR4_ATTRIB2
- 0x0 , // CB_COLOR4_VIEW
- 0x0 , // CB_COLOR4_INFO
- 0x0 , // CB_COLOR4_ATTRIB
- 0x0 , // CB_COLOR4_DCC_CONTROL
- 0x0 , // CB_COLOR4_CMASK
- 0x0 , // CB_COLOR4_CMASK_BASE_EXT
- 0x0 , // CB_COLOR4_FMASK
- 0x0 , // CB_COLOR4_FMASK_BASE_EXT
- 0x0 , // CB_COLOR4_CLEAR_WORD0
- 0x0 , // CB_COLOR4_CLEAR_WORD1
- 0x0 , // CB_COLOR4_DCC_BASE
- 0x0 , // CB_COLOR4_DCC_BASE_EXT
- 0x0 , // CB_COLOR5_BASE
- 0x0 , // CB_COLOR5_BASE_EXT
- 0x0 , // CB_COLOR5_ATTRIB2
- 0x0 , // CB_COLOR5_VIEW
- 0x0 , // CB_COLOR5_INFO
- 0x0 , // CB_COLOR5_ATTRIB
- 0x0 , // CB_COLOR5_DCC_CONTROL
- 0x0 , // CB_COLOR5_CMASK
- 0x0 , // CB_COLOR5_CMASK_BASE_EXT
- 0x0 , // CB_COLOR5_FMASK
- 0x0 , // CB_COLOR5_FMASK_BASE_EXT
- 0x0 , // CB_COLOR5_CLEAR_WORD0
- 0x0 , // CB_COLOR5_CLEAR_WORD1
- 0x0 , // CB_COLOR5_DCC_BASE
- 0x0 , // CB_COLOR5_DCC_BASE_EXT
- 0x0 , // CB_COLOR6_BASE
- 0x0 , // CB_COLOR6_BASE_EXT
- 0x0 , // CB_COLOR6_ATTRIB2
- 0x0 , // CB_COLOR6_VIEW
- 0x0 , // CB_COLOR6_INFO
- 0x0 , // CB_COLOR6_ATTRIB
- 0x0 , // CB_COLOR6_DCC_CONTROL
- 0x0 , // CB_COLOR6_CMASK
- 0x0 , // CB_COLOR6_CMASK_BASE_EXT
- 0x0 , // CB_COLOR6_FMASK
- 0x0 , // CB_COLOR6_FMASK_BASE_EXT
- 0x0 , // CB_COLOR6_CLEAR_WORD0
- 0x0 , // CB_COLOR6_CLEAR_WORD1
- 0x0 , // CB_COLOR6_DCC_BASE
- 0x0 , // CB_COLOR6_DCC_BASE_EXT
- 0x0 , // CB_COLOR7_BASE
- 0x0 , // CB_COLOR7_BASE_EXT
- 0x0 , // CB_COLOR7_ATTRIB2
- 0x0 , // CB_COLOR7_VIEW
- 0x0 , // CB_COLOR7_INFO
- 0x0 , // CB_COLOR7_ATTRIB
- 0x0 , // CB_COLOR7_DCC_CONTROL
- 0x0 , // CB_COLOR7_CMASK
- 0x0 , // CB_COLOR7_CMASK_BASE_EXT
- 0x0 , // CB_COLOR7_FMASK
- 0x0 , // CB_COLOR7_FMASK_BASE_EXT
- 0x0 , // CB_COLOR7_CLEAR_WORD0
- 0x0 , // CB_COLOR7_CLEAR_WORD1
- 0x0 , // CB_COLOR7_DCC_BASE
+ 0x0, // PA_SC_SHADER_CONTROL
+ 0x3, // PA_SC_BINNER_CNTL_0
+ 0x0, // PA_SC_BINNER_CNTL_1
+ 0x100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x0, // PA_SC_NGG_MODE_CNTL
+ 0x0, //
+ 0x1e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x20, // VGT_OUT_DEALLOC_CNTL
+ 0x0, // CB_COLOR0_BASE
+ 0x0, // CB_COLOR0_BASE_EXT
+ 0x0, // CB_COLOR0_ATTRIB2
+ 0x0, // CB_COLOR0_VIEW
+ 0x0, // CB_COLOR0_INFO
+ 0x0, // CB_COLOR0_ATTRIB
+ 0x0, // CB_COLOR0_DCC_CONTROL
+ 0x0, // CB_COLOR0_CMASK
+ 0x0, // CB_COLOR0_CMASK_BASE_EXT
+ 0x0, // CB_COLOR0_FMASK
+ 0x0, // CB_COLOR0_FMASK_BASE_EXT
+ 0x0, // CB_COLOR0_CLEAR_WORD0
+ 0x0, // CB_COLOR0_CLEAR_WORD1
+ 0x0, // CB_COLOR0_DCC_BASE
+ 0x0, // CB_COLOR0_DCC_BASE_EXT
+ 0x0, // CB_COLOR1_BASE
+ 0x0, // CB_COLOR1_BASE_EXT
+ 0x0, // CB_COLOR1_ATTRIB2
+ 0x0, // CB_COLOR1_VIEW
+ 0x0, // CB_COLOR1_INFO
+ 0x0, // CB_COLOR1_ATTRIB
+ 0x0, // CB_COLOR1_DCC_CONTROL
+ 0x0, // CB_COLOR1_CMASK
+ 0x0, // CB_COLOR1_CMASK_BASE_EXT
+ 0x0, // CB_COLOR1_FMASK
+ 0x0, // CB_COLOR1_FMASK_BASE_EXT
+ 0x0, // CB_COLOR1_CLEAR_WORD0
+ 0x0, // CB_COLOR1_CLEAR_WORD1
+ 0x0, // CB_COLOR1_DCC_BASE
+ 0x0, // CB_COLOR1_DCC_BASE_EXT
+ 0x0, // CB_COLOR2_BASE
+ 0x0, // CB_COLOR2_BASE_EXT
+ 0x0, // CB_COLOR2_ATTRIB2
+ 0x0, // CB_COLOR2_VIEW
+ 0x0, // CB_COLOR2_INFO
+ 0x0, // CB_COLOR2_ATTRIB
+ 0x0, // CB_COLOR2_DCC_CONTROL
+ 0x0, // CB_COLOR2_CMASK
+ 0x0, // CB_COLOR2_CMASK_BASE_EXT
+ 0x0, // CB_COLOR2_FMASK
+ 0x0, // CB_COLOR2_FMASK_BASE_EXT
+ 0x0, // CB_COLOR2_CLEAR_WORD0
+ 0x0, // CB_COLOR2_CLEAR_WORD1
+ 0x0, // CB_COLOR2_DCC_BASE
+ 0x0, // CB_COLOR2_DCC_BASE_EXT
+ 0x0, // CB_COLOR3_BASE
+ 0x0, // CB_COLOR3_BASE_EXT
+ 0x0, // CB_COLOR3_ATTRIB2
+ 0x0, // CB_COLOR3_VIEW
+ 0x0, // CB_COLOR3_INFO
+ 0x0, // CB_COLOR3_ATTRIB
+ 0x0, // CB_COLOR3_DCC_CONTROL
+ 0x0, // CB_COLOR3_CMASK
+ 0x0, // CB_COLOR3_CMASK_BASE_EXT
+ 0x0, // CB_COLOR3_FMASK
+ 0x0, // CB_COLOR3_FMASK_BASE_EXT
+ 0x0, // CB_COLOR3_CLEAR_WORD0
+ 0x0, // CB_COLOR3_CLEAR_WORD1
+ 0x0, // CB_COLOR3_DCC_BASE
+ 0x0, // CB_COLOR3_DCC_BASE_EXT
+ 0x0, // CB_COLOR4_BASE
+ 0x0, // CB_COLOR4_BASE_EXT
+ 0x0, // CB_COLOR4_ATTRIB2
+ 0x0, // CB_COLOR4_VIEW
+ 0x0, // CB_COLOR4_INFO
+ 0x0, // CB_COLOR4_ATTRIB
+ 0x0, // CB_COLOR4_DCC_CONTROL
+ 0x0, // CB_COLOR4_CMASK
+ 0x0, // CB_COLOR4_CMASK_BASE_EXT
+ 0x0, // CB_COLOR4_FMASK
+ 0x0, // CB_COLOR4_FMASK_BASE_EXT
+ 0x0, // CB_COLOR4_CLEAR_WORD0
+ 0x0, // CB_COLOR4_CLEAR_WORD1
+ 0x0, // CB_COLOR4_DCC_BASE
+ 0x0, // CB_COLOR4_DCC_BASE_EXT
+ 0x0, // CB_COLOR5_BASE
+ 0x0, // CB_COLOR5_BASE_EXT
+ 0x0, // CB_COLOR5_ATTRIB2
+ 0x0, // CB_COLOR5_VIEW
+ 0x0, // CB_COLOR5_INFO
+ 0x0, // CB_COLOR5_ATTRIB
+ 0x0, // CB_COLOR5_DCC_CONTROL
+ 0x0, // CB_COLOR5_CMASK
+ 0x0, // CB_COLOR5_CMASK_BASE_EXT
+ 0x0, // CB_COLOR5_FMASK
+ 0x0, // CB_COLOR5_FMASK_BASE_EXT
+ 0x0, // CB_COLOR5_CLEAR_WORD0
+ 0x0, // CB_COLOR5_CLEAR_WORD1
+ 0x0, // CB_COLOR5_DCC_BASE
+ 0x0, // CB_COLOR5_DCC_BASE_EXT
+ 0x0, // CB_COLOR6_BASE
+ 0x0, // CB_COLOR6_BASE_EXT
+ 0x0, // CB_COLOR6_ATTRIB2
+ 0x0, // CB_COLOR6_VIEW
+ 0x0, // CB_COLOR6_INFO
+ 0x0, // CB_COLOR6_ATTRIB
+ 0x0, // CB_COLOR6_DCC_CONTROL
+ 0x0, // CB_COLOR6_CMASK
+ 0x0, // CB_COLOR6_CMASK_BASE_EXT
+ 0x0, // CB_COLOR6_FMASK
+ 0x0, // CB_COLOR6_FMASK_BASE_EXT
+ 0x0, // CB_COLOR6_CLEAR_WORD0
+ 0x0, // CB_COLOR6_CLEAR_WORD1
+ 0x0, // CB_COLOR6_DCC_BASE
+ 0x0, // CB_COLOR6_DCC_BASE_EXT
+ 0x0, // CB_COLOR7_BASE
+ 0x0, // CB_COLOR7_BASE_EXT
+ 0x0, // CB_COLOR7_ATTRIB2
+ 0x0, // CB_COLOR7_VIEW
+ 0x0, // CB_COLOR7_INFO
+ 0x0, // CB_COLOR7_ATTRIB
+ 0x0, // CB_COLOR7_DCC_CONTROL
+ 0x0, // CB_COLOR7_CMASK
+ 0x0, // CB_COLOR7_CMASK_BASE_EXT
+ 0x0, // CB_COLOR7_FMASK
+ 0x0, // CB_COLOR7_FMASK_BASE_EXT
+ 0x0, // CB_COLOR7_CLEAR_WORD0
+ 0x0, // CB_COLOR7_CLEAR_WORD1
+ 0x0, // CB_COLOR7_DCC_BASE
0x0 // CB_COLOR7_DCC_BASE_EXT
};
set_context_reg_seq_array(cs, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx9));
set_context_reg_seq_array(cs, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx9));
- set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, SET(VgtMultiPrimIbResetIndxGfx9));
+ set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+ SET(VgtMultiPrimIbResetIndxGfx9));
set_context_reg_seq_array(cs, R_028414_CB_BLEND_RED, SET(CbBlendRedGfx9));
set_context_reg_seq_array(cs, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx9));
set_context_reg_seq_array(cs, R_028754_SX_PS_DOWNCONVERT, SET(SxPsDownconvertGfx9));
set_context_reg_seq_array(cs, R_028A40_VGT_GS_MODE, SET(VgtGsModeGfx9));
set_context_reg_seq_array(cs, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx9));
set_context_reg_seq_array(cs, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx9));
- set_context_reg_seq_array(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, SET(VgtGsMaxPrimsPerSubgroupGfx9));
- set_context_reg_seq_array(cs, R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1, SET(VgtStrmoutBufferSize1Gfx9));
- set_context_reg_seq_array(cs, R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2, SET(VgtStrmoutBufferSize2Gfx9));
- set_context_reg_seq_array(cs, R_028B00_VGT_STRMOUT_BUFFER_SIZE_3, SET(VgtStrmoutBufferSize3Gfx9));
- set_context_reg_seq_array(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, SET(VgtStrmoutDrawOpaqueOffsetGfx9));
+ set_context_reg_seq_array(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
+ SET(VgtGsMaxPrimsPerSubgroupGfx9));
+ set_context_reg_seq_array(cs, R_028AE0_VGT_STRMOUT_BUFFER_SIZE_1,
+ SET(VgtStrmoutBufferSize1Gfx9));
+ set_context_reg_seq_array(cs, R_028AF0_VGT_STRMOUT_BUFFER_SIZE_2,
+ SET(VgtStrmoutBufferSize2Gfx9));
+ set_context_reg_seq_array(cs, R_028B00_VGT_STRMOUT_BUFFER_SIZE_3,
+ SET(VgtStrmoutBufferSize3Gfx9));
+ set_context_reg_seq_array(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET,
+ SET(VgtStrmoutDrawOpaqueOffsetGfx9));
set_context_reg_seq_array(cs, R_028B38_VGT_GS_MAX_VERT_OUT, SET(VgtGsMaxVertOutGfx9));
- set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, SET(PaScCentroidPriority0Gfx9));
+ set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
+ SET(PaScCentroidPriority0Gfx9));
}
/**
set_context_reg_seq_array_fn set_context_reg_seq_array)
{
static const uint32_t DbRenderControlNv10[] = {
- 0x0 , // DB_RENDER_CONTROL
- 0x0 , // DB_COUNT_CONTROL
- 0x0 , // DB_DEPTH_VIEW
- 0x0 , // DB_RENDER_OVERRIDE
- 0x0 , // DB_RENDER_OVERRIDE2
- 0x0 , // DB_HTILE_DATA_BASE
- 0x0 , //
- 0x0 , // DB_DEPTH_SIZE_XY
- 0x0 , // DB_DEPTH_BOUNDS_MIN
- 0x0 , // DB_DEPTH_BOUNDS_MAX
- 0x0 , // DB_STENCIL_CLEAR
- 0x0 , // DB_DEPTH_CLEAR
- 0x0 , // PA_SC_SCREEN_SCISSOR_TL
+ 0x0, // DB_RENDER_CONTROL
+ 0x0, // DB_COUNT_CONTROL
+ 0x0, // DB_DEPTH_VIEW
+ 0x0, // DB_RENDER_OVERRIDE
+ 0x0, // DB_RENDER_OVERRIDE2
+ 0x0, // DB_HTILE_DATA_BASE
+ 0x0, //
+ 0x0, // DB_DEPTH_SIZE_XY
+ 0x0, // DB_DEPTH_BOUNDS_MIN
+ 0x0, // DB_DEPTH_BOUNDS_MAX
+ 0x0, // DB_STENCIL_CLEAR
+ 0x0, // DB_DEPTH_CLEAR
+ 0x0, // PA_SC_SCREEN_SCISSOR_TL
0x40004000, // PA_SC_SCREEN_SCISSOR_BR
- 0x0 , // DB_DFSM_CONTROL
- 0x0 , // DB_RESERVED_REG_2
- 0x0 , // DB_Z_INFO
- 0x0 , // DB_STENCIL_INFO
- 0x0 , // DB_Z_READ_BASE
- 0x0 , // DB_STENCIL_READ_BASE
- 0x0 , // DB_Z_WRITE_BASE
- 0x0 , // DB_STENCIL_WRITE_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // DB_Z_READ_BASE_HI
- 0x0 , // DB_STENCIL_READ_BASE_HI
- 0x0 , // DB_Z_WRITE_BASE_HI
- 0x0 , // DB_STENCIL_WRITE_BASE_HI
- 0x0 , // DB_HTILE_DATA_BASE_HI
- 0x0 , // DB_RMI_L2_CACHE_CONTROL
- 0x0 , // TA_BC_BASE_ADDR
+ 0x0, // DB_DFSM_CONTROL
+ 0x0, // DB_RESERVED_REG_2
+ 0x0, // DB_Z_INFO
+ 0x0, // DB_STENCIL_INFO
+ 0x0, // DB_Z_READ_BASE
+ 0x0, // DB_STENCIL_READ_BASE
+ 0x0, // DB_Z_WRITE_BASE
+ 0x0, // DB_STENCIL_WRITE_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // DB_Z_READ_BASE_HI
+ 0x0, // DB_STENCIL_READ_BASE_HI
+ 0x0, // DB_Z_WRITE_BASE_HI
+ 0x0, // DB_STENCIL_WRITE_BASE_HI
+ 0x0, // DB_HTILE_DATA_BASE_HI
+ 0x0, // DB_RMI_L2_CACHE_CONTROL
+ 0x0, // TA_BC_BASE_ADDR
0x0 // TA_BC_BASE_ADDR_HI
};
static const uint32_t CoherDestBaseHi0Nv10[] = {
- 0x0 , // COHER_DEST_BASE_HI_0
- 0x0 , // COHER_DEST_BASE_HI_1
- 0x0 , // COHER_DEST_BASE_HI_2
- 0x0 , // COHER_DEST_BASE_HI_3
- 0x0 , // COHER_DEST_BASE_2
- 0x0 , // COHER_DEST_BASE_3
- 0x0 , // PA_SC_WINDOW_OFFSET
+ 0x0, // COHER_DEST_BASE_HI_0
+ 0x0, // COHER_DEST_BASE_HI_1
+ 0x0, // COHER_DEST_BASE_HI_2
+ 0x0, // COHER_DEST_BASE_HI_3
+ 0x0, // COHER_DEST_BASE_2
+ 0x0, // COHER_DEST_BASE_3
+ 0x0, // PA_SC_WINDOW_OFFSET
0x80000000, // PA_SC_WINDOW_SCISSOR_TL
0x40004000, // PA_SC_WINDOW_SCISSOR_BR
- 0xffff , // PA_SC_CLIPRECT_RULE
- 0x0 , // PA_SC_CLIPRECT_0_TL
+ 0xffff, // PA_SC_CLIPRECT_RULE
+ 0x0, // PA_SC_CLIPRECT_0_TL
0x40004000, // PA_SC_CLIPRECT_0_BR
- 0x0 , // PA_SC_CLIPRECT_1_TL
+ 0x0, // PA_SC_CLIPRECT_1_TL
0x40004000, // PA_SC_CLIPRECT_1_BR
- 0x0 , // PA_SC_CLIPRECT_2_TL
+ 0x0, // PA_SC_CLIPRECT_2_TL
0x40004000, // PA_SC_CLIPRECT_2_BR
- 0x0 , // PA_SC_CLIPRECT_3_TL
+ 0x0, // PA_SC_CLIPRECT_3_TL
0x40004000, // PA_SC_CLIPRECT_3_BR
0xaa99aaaa, // PA_SC_EDGERULE
- 0x0 , // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0x0, // PA_SU_HARDWARE_SCREEN_OFFSET
0xffffffff, // CB_TARGET_MASK
0xffffffff, // CB_SHADER_MASK
0x80000000, // PA_SC_GENERIC_SCISSOR_TL
0x40004000, // PA_SC_GENERIC_SCISSOR_BR
- 0x0 , // COHER_DEST_BASE_0
- 0x0 , // COHER_DEST_BASE_1
+ 0x0, // COHER_DEST_BASE_0
+ 0x0, // COHER_DEST_BASE_1
0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
- 0x0 , // PA_SC_VPORT_ZMIN_0
+ 0x0, // PA_SC_VPORT_ZMIN_0
0x3f800000, // PA_SC_VPORT_ZMAX_0
- 0x0 , // PA_SC_VPORT_ZMIN_1
+ 0x0, // PA_SC_VPORT_ZMIN_1
0x3f800000, // PA_SC_VPORT_ZMAX_1
- 0x0 , // PA_SC_VPORT_ZMIN_2
+ 0x0, // PA_SC_VPORT_ZMIN_2
0x3f800000, // PA_SC_VPORT_ZMAX_2
- 0x0 , // PA_SC_VPORT_ZMIN_3
+ 0x0, // PA_SC_VPORT_ZMIN_3
0x3f800000, // PA_SC_VPORT_ZMAX_3
- 0x0 , // PA_SC_VPORT_ZMIN_4
+ 0x0, // PA_SC_VPORT_ZMIN_4
0x3f800000, // PA_SC_VPORT_ZMAX_4
- 0x0 , // PA_SC_VPORT_ZMIN_5
+ 0x0, // PA_SC_VPORT_ZMIN_5
0x3f800000, // PA_SC_VPORT_ZMAX_5
- 0x0 , // PA_SC_VPORT_ZMIN_6
+ 0x0, // PA_SC_VPORT_ZMIN_6
0x3f800000, // PA_SC_VPORT_ZMAX_6
- 0x0 , // PA_SC_VPORT_ZMIN_7
+ 0x0, // PA_SC_VPORT_ZMIN_7
0x3f800000, // PA_SC_VPORT_ZMAX_7
- 0x0 , // PA_SC_VPORT_ZMIN_8
+ 0x0, // PA_SC_VPORT_ZMIN_8
0x3f800000, // PA_SC_VPORT_ZMAX_8
- 0x0 , // PA_SC_VPORT_ZMIN_9
+ 0x0, // PA_SC_VPORT_ZMIN_9
0x3f800000, // PA_SC_VPORT_ZMAX_9
- 0x0 , // PA_SC_VPORT_ZMIN_10
+ 0x0, // PA_SC_VPORT_ZMIN_10
0x3f800000, // PA_SC_VPORT_ZMAX_10
- 0x0 , // PA_SC_VPORT_ZMIN_11
+ 0x0, // PA_SC_VPORT_ZMIN_11
0x3f800000, // PA_SC_VPORT_ZMAX_11
- 0x0 , // PA_SC_VPORT_ZMIN_12
+ 0x0, // PA_SC_VPORT_ZMIN_12
0x3f800000, // PA_SC_VPORT_ZMAX_12
- 0x0 , // PA_SC_VPORT_ZMIN_13
+ 0x0, // PA_SC_VPORT_ZMIN_13
0x3f800000, // PA_SC_VPORT_ZMAX_13
- 0x0 , // PA_SC_VPORT_ZMIN_14
+ 0x0, // PA_SC_VPORT_ZMIN_14
0x3f800000, // PA_SC_VPORT_ZMAX_14
- 0x0 , // PA_SC_VPORT_ZMIN_15
+ 0x0, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
- 0x0 , // PA_SC_RASTER_CONFIG
- 0x0 , // PA_SC_RASTER_CONFIG_1
- 0x0 , //
+ 0x0, // PA_SC_RASTER_CONFIG
+ 0x0, // PA_SC_RASTER_CONFIG_1
+ 0x0, //
0x0 // PA_SC_TILE_STEERING_OVERRIDE
};
static const uint32_t VgtMultiPrimIbResetIndxNv10[] = {
- 0x0 , // VGT_MULTI_PRIM_IB_RESET_INDX
- 0x0 , // CB_RMI_GL2_CACHE_CONTROL
- 0x0 , // CB_BLEND_RED
- 0x0 , // CB_BLEND_GREEN
- 0x0 , // CB_BLEND_BLUE
- 0x0 , // CB_BLEND_ALPHA
- 0x0 , // CB_DCC_CONTROL
- 0x0 , // CB_COVERAGE_OUT_CONTROL
- 0x0 , // DB_STENCIL_CONTROL
- 0x1000000 , // DB_STENCILREFMASK
- 0x1000000 , // DB_STENCILREFMASK_BF
- 0x0 , //
- 0x0 , // PA_CL_VPORT_XSCALE
- 0x0 , // PA_CL_VPORT_XOFFSET
- 0x0 , // PA_CL_VPORT_YSCALE
- 0x0 , // PA_CL_VPORT_YOFFSET
- 0x0 , // PA_CL_VPORT_ZSCALE
- 0x0 , // PA_CL_VPORT_ZOFFSET
- 0x0 , // PA_CL_VPORT_XSCALE_1
- 0x0 , // PA_CL_VPORT_XOFFSET_1
- 0x0 , // PA_CL_VPORT_YSCALE_1
- 0x0 , // PA_CL_VPORT_YOFFSET_1
- 0x0 , // PA_CL_VPORT_ZSCALE_1
- 0x0 , // PA_CL_VPORT_ZOFFSET_1
- 0x0 , // PA_CL_VPORT_XSCALE_2
- 0x0 , // PA_CL_VPORT_XOFFSET_2
- 0x0 , // PA_CL_VPORT_YSCALE_2
- 0x0 , // PA_CL_VPORT_YOFFSET_2
- 0x0 , // PA_CL_VPORT_ZSCALE_2
- 0x0 , // PA_CL_VPORT_ZOFFSET_2
- 0x0 , // PA_CL_VPORT_XSCALE_3
- 0x0 , // PA_CL_VPORT_XOFFSET_3
- 0x0 , // PA_CL_VPORT_YSCALE_3
- 0x0 , // PA_CL_VPORT_YOFFSET_3
- 0x0 , // PA_CL_VPORT_ZSCALE_3
- 0x0 , // PA_CL_VPORT_ZOFFSET_3
- 0x0 , // PA_CL_VPORT_XSCALE_4
- 0x0 , // PA_CL_VPORT_XOFFSET_4
- 0x0 , // PA_CL_VPORT_YSCALE_4
- 0x0 , // PA_CL_VPORT_YOFFSET_4
- 0x0 , // PA_CL_VPORT_ZSCALE_4
- 0x0 , // PA_CL_VPORT_ZOFFSET_4
- 0x0 , // PA_CL_VPORT_XSCALE_5
- 0x0 , // PA_CL_VPORT_XOFFSET_5
- 0x0 , // PA_CL_VPORT_YSCALE_5
- 0x0 , // PA_CL_VPORT_YOFFSET_5
- 0x0 , // PA_CL_VPORT_ZSCALE_5
- 0x0 , // PA_CL_VPORT_ZOFFSET_5
- 0x0 , // PA_CL_VPORT_XSCALE_6
- 0x0 , // PA_CL_VPORT_XOFFSET_6
- 0x0 , // PA_CL_VPORT_YSCALE_6
- 0x0 , // PA_CL_VPORT_YOFFSET_6
- 0x0 , // PA_CL_VPORT_ZSCALE_6
- 0x0 , // PA_CL_VPORT_ZOFFSET_6
- 0x0 , // PA_CL_VPORT_XSCALE_7
- 0x0 , // PA_CL_VPORT_XOFFSET_7
- 0x0 , // PA_CL_VPORT_YSCALE_7
- 0x0 , // PA_CL_VPORT_YOFFSET_7
- 0x0 , // PA_CL_VPORT_ZSCALE_7
- 0x0 , // PA_CL_VPORT_ZOFFSET_7
- 0x0 , // PA_CL_VPORT_XSCALE_8
- 0x0 , // PA_CL_VPORT_XOFFSET_8
- 0x0 , // PA_CL_VPORT_YSCALE_8
- 0x0 , // PA_CL_VPORT_YOFFSET_8
- 0x0 , // PA_CL_VPORT_ZSCALE_8
- 0x0 , // PA_CL_VPORT_ZOFFSET_8
- 0x0 , // PA_CL_VPORT_XSCALE_9
- 0x0 , // PA_CL_VPORT_XOFFSET_9
- 0x0 , // PA_CL_VPORT_YSCALE_9
- 0x0 , // PA_CL_VPORT_YOFFSET_9
- 0x0 , // PA_CL_VPORT_ZSCALE_9
- 0x0 , // PA_CL_VPORT_ZOFFSET_9
- 0x0 , // PA_CL_VPORT_XSCALE_10
- 0x0 , // PA_CL_VPORT_XOFFSET_10
- 0x0 , // PA_CL_VPORT_YSCALE_10
- 0x0 , // PA_CL_VPORT_YOFFSET_10
- 0x0 , // PA_CL_VPORT_ZSCALE_10
- 0x0 , // PA_CL_VPORT_ZOFFSET_10
- 0x0 , // PA_CL_VPORT_XSCALE_11
- 0x0 , // PA_CL_VPORT_XOFFSET_11
- 0x0 , // PA_CL_VPORT_YSCALE_11
- 0x0 , // PA_CL_VPORT_YOFFSET_11
- 0x0 , // PA_CL_VPORT_ZSCALE_11
- 0x0 , // PA_CL_VPORT_ZOFFSET_11
- 0x0 , // PA_CL_VPORT_XSCALE_12
- 0x0 , // PA_CL_VPORT_XOFFSET_12
- 0x0 , // PA_CL_VPORT_YSCALE_12
- 0x0 , // PA_CL_VPORT_YOFFSET_12
- 0x0 , // PA_CL_VPORT_ZSCALE_12
- 0x0 , // PA_CL_VPORT_ZOFFSET_12
- 0x0 , // PA_CL_VPORT_XSCALE_13
- 0x0 , // PA_CL_VPORT_XOFFSET_13
- 0x0 , // PA_CL_VPORT_YSCALE_13
- 0x0 , // PA_CL_VPORT_YOFFSET_13
- 0x0 , // PA_CL_VPORT_ZSCALE_13
- 0x0 , // PA_CL_VPORT_ZOFFSET_13
- 0x0 , // PA_CL_VPORT_XSCALE_14
- 0x0 , // PA_CL_VPORT_XOFFSET_14
- 0x0 , // PA_CL_VPORT_YSCALE_14
- 0x0 , // PA_CL_VPORT_YOFFSET_14
- 0x0 , // PA_CL_VPORT_ZSCALE_14
- 0x0 , // PA_CL_VPORT_ZOFFSET_14
- 0x0 , // PA_CL_VPORT_XSCALE_15
- 0x0 , // PA_CL_VPORT_XOFFSET_15
- 0x0 , // PA_CL_VPORT_YSCALE_15
- 0x0 , // PA_CL_VPORT_YOFFSET_15
- 0x0 , // PA_CL_VPORT_ZSCALE_15
- 0x0 , // PA_CL_VPORT_ZOFFSET_15
- 0x0 , // PA_CL_UCP_0_X
- 0x0 , // PA_CL_UCP_0_Y
- 0x0 , // PA_CL_UCP_0_Z
- 0x0 , // PA_CL_UCP_0_W
- 0x0 , // PA_CL_UCP_1_X
- 0x0 , // PA_CL_UCP_1_Y
- 0x0 , // PA_CL_UCP_1_Z
- 0x0 , // PA_CL_UCP_1_W
- 0x0 , // PA_CL_UCP_2_X
- 0x0 , // PA_CL_UCP_2_Y
- 0x0 , // PA_CL_UCP_2_Z
- 0x0 , // PA_CL_UCP_2_W
- 0x0 , // PA_CL_UCP_3_X
- 0x0 , // PA_CL_UCP_3_Y
- 0x0 , // PA_CL_UCP_3_Z
- 0x0 , // PA_CL_UCP_3_W
- 0x0 , // PA_CL_UCP_4_X
- 0x0 , // PA_CL_UCP_4_Y
- 0x0 , // PA_CL_UCP_4_Z
- 0x0 , // PA_CL_UCP_4_W
- 0x0 , // PA_CL_UCP_5_X
- 0x0 , // PA_CL_UCP_5_Y
- 0x0 , // PA_CL_UCP_5_Z
- 0x0 // PA_CL_UCP_5_W
+ 0x0, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x0, // CB_RMI_GL2_CACHE_CONTROL
+ 0x0, // CB_BLEND_RED
+ 0x0, // CB_BLEND_GREEN
+ 0x0, // CB_BLEND_BLUE
+ 0x0, // CB_BLEND_ALPHA
+ 0x0, // CB_DCC_CONTROL
+ 0x0, // CB_COVERAGE_OUT_CONTROL
+ 0x0, // DB_STENCIL_CONTROL
+ 0x1000000, // DB_STENCILREFMASK
+ 0x1000000, // DB_STENCILREFMASK_BF
+ 0x0, //
+ 0x0, // PA_CL_VPORT_XSCALE
+ 0x0, // PA_CL_VPORT_XOFFSET
+ 0x0, // PA_CL_VPORT_YSCALE
+ 0x0, // PA_CL_VPORT_YOFFSET
+ 0x0, // PA_CL_VPORT_ZSCALE
+ 0x0, // PA_CL_VPORT_ZOFFSET
+ 0x0, // PA_CL_VPORT_XSCALE_1
+ 0x0, // PA_CL_VPORT_XOFFSET_1
+ 0x0, // PA_CL_VPORT_YSCALE_1
+ 0x0, // PA_CL_VPORT_YOFFSET_1
+ 0x0, // PA_CL_VPORT_ZSCALE_1
+ 0x0, // PA_CL_VPORT_ZOFFSET_1
+ 0x0, // PA_CL_VPORT_XSCALE_2
+ 0x0, // PA_CL_VPORT_XOFFSET_2
+ 0x0, // PA_CL_VPORT_YSCALE_2
+ 0x0, // PA_CL_VPORT_YOFFSET_2
+ 0x0, // PA_CL_VPORT_ZSCALE_2
+ 0x0, // PA_CL_VPORT_ZOFFSET_2
+ 0x0, // PA_CL_VPORT_XSCALE_3
+ 0x0, // PA_CL_VPORT_XOFFSET_3
+ 0x0, // PA_CL_VPORT_YSCALE_3
+ 0x0, // PA_CL_VPORT_YOFFSET_3
+ 0x0, // PA_CL_VPORT_ZSCALE_3
+ 0x0, // PA_CL_VPORT_ZOFFSET_3
+ 0x0, // PA_CL_VPORT_XSCALE_4
+ 0x0, // PA_CL_VPORT_XOFFSET_4
+ 0x0, // PA_CL_VPORT_YSCALE_4
+ 0x0, // PA_CL_VPORT_YOFFSET_4
+ 0x0, // PA_CL_VPORT_ZSCALE_4
+ 0x0, // PA_CL_VPORT_ZOFFSET_4
+ 0x0, // PA_CL_VPORT_XSCALE_5
+ 0x0, // PA_CL_VPORT_XOFFSET_5
+ 0x0, // PA_CL_VPORT_YSCALE_5
+ 0x0, // PA_CL_VPORT_YOFFSET_5
+ 0x0, // PA_CL_VPORT_ZSCALE_5
+ 0x0, // PA_CL_VPORT_ZOFFSET_5
+ 0x0, // PA_CL_VPORT_XSCALE_6
+ 0x0, // PA_CL_VPORT_XOFFSET_6
+ 0x0, // PA_CL_VPORT_YSCALE_6
+ 0x0, // PA_CL_VPORT_YOFFSET_6
+ 0x0, // PA_CL_VPORT_ZSCALE_6
+ 0x0, // PA_CL_VPORT_ZOFFSET_6
+ 0x0, // PA_CL_VPORT_XSCALE_7
+ 0x0, // PA_CL_VPORT_XOFFSET_7
+ 0x0, // PA_CL_VPORT_YSCALE_7
+ 0x0, // PA_CL_VPORT_YOFFSET_7
+ 0x0, // PA_CL_VPORT_ZSCALE_7
+ 0x0, // PA_CL_VPORT_ZOFFSET_7
+ 0x0, // PA_CL_VPORT_XSCALE_8
+ 0x0, // PA_CL_VPORT_XOFFSET_8
+ 0x0, // PA_CL_VPORT_YSCALE_8
+ 0x0, // PA_CL_VPORT_YOFFSET_8
+ 0x0, // PA_CL_VPORT_ZSCALE_8
+ 0x0, // PA_CL_VPORT_ZOFFSET_8
+ 0x0, // PA_CL_VPORT_XSCALE_9
+ 0x0, // PA_CL_VPORT_XOFFSET_9
+ 0x0, // PA_CL_VPORT_YSCALE_9
+ 0x0, // PA_CL_VPORT_YOFFSET_9
+ 0x0, // PA_CL_VPORT_ZSCALE_9
+ 0x0, // PA_CL_VPORT_ZOFFSET_9
+ 0x0, // PA_CL_VPORT_XSCALE_10
+ 0x0, // PA_CL_VPORT_XOFFSET_10
+ 0x0, // PA_CL_VPORT_YSCALE_10
+ 0x0, // PA_CL_VPORT_YOFFSET_10
+ 0x0, // PA_CL_VPORT_ZSCALE_10
+ 0x0, // PA_CL_VPORT_ZOFFSET_10
+ 0x0, // PA_CL_VPORT_XSCALE_11
+ 0x0, // PA_CL_VPORT_XOFFSET_11
+ 0x0, // PA_CL_VPORT_YSCALE_11
+ 0x0, // PA_CL_VPORT_YOFFSET_11
+ 0x0, // PA_CL_VPORT_ZSCALE_11
+ 0x0, // PA_CL_VPORT_ZOFFSET_11
+ 0x0, // PA_CL_VPORT_XSCALE_12
+ 0x0, // PA_CL_VPORT_XOFFSET_12
+ 0x0, // PA_CL_VPORT_YSCALE_12
+ 0x0, // PA_CL_VPORT_YOFFSET_12
+ 0x0, // PA_CL_VPORT_ZSCALE_12
+ 0x0, // PA_CL_VPORT_ZOFFSET_12
+ 0x0, // PA_CL_VPORT_XSCALE_13
+ 0x0, // PA_CL_VPORT_XOFFSET_13
+ 0x0, // PA_CL_VPORT_YSCALE_13
+ 0x0, // PA_CL_VPORT_YOFFSET_13
+ 0x0, // PA_CL_VPORT_ZSCALE_13
+ 0x0, // PA_CL_VPORT_ZOFFSET_13
+ 0x0, // PA_CL_VPORT_XSCALE_14
+ 0x0, // PA_CL_VPORT_XOFFSET_14
+ 0x0, // PA_CL_VPORT_YSCALE_14
+ 0x0, // PA_CL_VPORT_YOFFSET_14
+ 0x0, // PA_CL_VPORT_ZSCALE_14
+ 0x0, // PA_CL_VPORT_ZOFFSET_14
+ 0x0, // PA_CL_VPORT_XSCALE_15
+ 0x0, // PA_CL_VPORT_XOFFSET_15
+ 0x0, // PA_CL_VPORT_YSCALE_15
+ 0x0, // PA_CL_VPORT_YOFFSET_15
+ 0x0, // PA_CL_VPORT_ZSCALE_15
+ 0x0, // PA_CL_VPORT_ZOFFSET_15
+ 0x0, // PA_CL_UCP_0_X
+ 0x0, // PA_CL_UCP_0_Y
+ 0x0, // PA_CL_UCP_0_Z
+ 0x0, // PA_CL_UCP_0_W
+ 0x0, // PA_CL_UCP_1_X
+ 0x0, // PA_CL_UCP_1_Y
+ 0x0, // PA_CL_UCP_1_Z
+ 0x0, // PA_CL_UCP_1_W
+ 0x0, // PA_CL_UCP_2_X
+ 0x0, // PA_CL_UCP_2_Y
+ 0x0, // PA_CL_UCP_2_Z
+ 0x0, // PA_CL_UCP_2_W
+ 0x0, // PA_CL_UCP_3_X
+ 0x0, // PA_CL_UCP_3_Y
+ 0x0, // PA_CL_UCP_3_Z
+ 0x0, // PA_CL_UCP_3_W
+ 0x0, // PA_CL_UCP_4_X
+ 0x0, // PA_CL_UCP_4_Y
+ 0x0, // PA_CL_UCP_4_Z
+ 0x0, // PA_CL_UCP_4_W
+ 0x0, // PA_CL_UCP_5_X
+ 0x0, // PA_CL_UCP_5_Y
+ 0x0, // PA_CL_UCP_5_Z
+ 0x0 // PA_CL_UCP_5_W
};
static const uint32_t SpiPsInputCntl0Nv10[] = {
- 0x0 , // SPI_PS_INPUT_CNTL_0
- 0x0 , // SPI_PS_INPUT_CNTL_1
- 0x0 , // SPI_PS_INPUT_CNTL_2
- 0x0 , // SPI_PS_INPUT_CNTL_3
- 0x0 , // SPI_PS_INPUT_CNTL_4
- 0x0 , // SPI_PS_INPUT_CNTL_5
- 0x0 , // SPI_PS_INPUT_CNTL_6
- 0x0 , // SPI_PS_INPUT_CNTL_7
- 0x0 , // SPI_PS_INPUT_CNTL_8
- 0x0 , // SPI_PS_INPUT_CNTL_9
- 0x0 , // SPI_PS_INPUT_CNTL_10
- 0x0 , // SPI_PS_INPUT_CNTL_11
- 0x0 , // SPI_PS_INPUT_CNTL_12
- 0x0 , // SPI_PS_INPUT_CNTL_13
- 0x0 , // SPI_PS_INPUT_CNTL_14
- 0x0 , // SPI_PS_INPUT_CNTL_15
- 0x0 , // SPI_PS_INPUT_CNTL_16
- 0x0 , // SPI_PS_INPUT_CNTL_17
- 0x0 , // SPI_PS_INPUT_CNTL_18
- 0x0 , // SPI_PS_INPUT_CNTL_19
- 0x0 , // SPI_PS_INPUT_CNTL_20
- 0x0 , // SPI_PS_INPUT_CNTL_21
- 0x0 , // SPI_PS_INPUT_CNTL_22
- 0x0 , // SPI_PS_INPUT_CNTL_23
- 0x0 , // SPI_PS_INPUT_CNTL_24
- 0x0 , // SPI_PS_INPUT_CNTL_25
- 0x0 , // SPI_PS_INPUT_CNTL_26
- 0x0 , // SPI_PS_INPUT_CNTL_27
- 0x0 , // SPI_PS_INPUT_CNTL_28
- 0x0 , // SPI_PS_INPUT_CNTL_29
- 0x0 , // SPI_PS_INPUT_CNTL_30
- 0x0 , // SPI_PS_INPUT_CNTL_31
- 0x0 , // SPI_VS_OUT_CONFIG
- 0x0 , //
- 0x0 , // SPI_PS_INPUT_ENA
- 0x0 , // SPI_PS_INPUT_ADDR
- 0x0 , // SPI_INTERP_CONTROL_0
- 0x2 , // SPI_PS_IN_CONTROL
- 0x0 , //
- 0x0 , // SPI_BARYC_CNTL
- 0x0 , //
- 0x0 , // SPI_TMPRING_SIZE
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // SPI_SHADER_IDX_FORMAT
- 0x0 , // SPI_SHADER_POS_FORMAT
- 0x0 , // SPI_SHADER_Z_FORMAT
- 0x0 // SPI_SHADER_COL_FORMAT
+ 0x0, // SPI_PS_INPUT_CNTL_0
+ 0x0, // SPI_PS_INPUT_CNTL_1
+ 0x0, // SPI_PS_INPUT_CNTL_2
+ 0x0, // SPI_PS_INPUT_CNTL_3
+ 0x0, // SPI_PS_INPUT_CNTL_4
+ 0x0, // SPI_PS_INPUT_CNTL_5
+ 0x0, // SPI_PS_INPUT_CNTL_6
+ 0x0, // SPI_PS_INPUT_CNTL_7
+ 0x0, // SPI_PS_INPUT_CNTL_8
+ 0x0, // SPI_PS_INPUT_CNTL_9
+ 0x0, // SPI_PS_INPUT_CNTL_10
+ 0x0, // SPI_PS_INPUT_CNTL_11
+ 0x0, // SPI_PS_INPUT_CNTL_12
+ 0x0, // SPI_PS_INPUT_CNTL_13
+ 0x0, // SPI_PS_INPUT_CNTL_14
+ 0x0, // SPI_PS_INPUT_CNTL_15
+ 0x0, // SPI_PS_INPUT_CNTL_16
+ 0x0, // SPI_PS_INPUT_CNTL_17
+ 0x0, // SPI_PS_INPUT_CNTL_18
+ 0x0, // SPI_PS_INPUT_CNTL_19
+ 0x0, // SPI_PS_INPUT_CNTL_20
+ 0x0, // SPI_PS_INPUT_CNTL_21
+ 0x0, // SPI_PS_INPUT_CNTL_22
+ 0x0, // SPI_PS_INPUT_CNTL_23
+ 0x0, // SPI_PS_INPUT_CNTL_24
+ 0x0, // SPI_PS_INPUT_CNTL_25
+ 0x0, // SPI_PS_INPUT_CNTL_26
+ 0x0, // SPI_PS_INPUT_CNTL_27
+ 0x0, // SPI_PS_INPUT_CNTL_28
+ 0x0, // SPI_PS_INPUT_CNTL_29
+ 0x0, // SPI_PS_INPUT_CNTL_30
+ 0x0, // SPI_PS_INPUT_CNTL_31
+ 0x0, // SPI_VS_OUT_CONFIG
+ 0x0, //
+ 0x0, // SPI_PS_INPUT_ENA
+ 0x0, // SPI_PS_INPUT_ADDR
+ 0x0, // SPI_INTERP_CONTROL_0
+ 0x2, // SPI_PS_IN_CONTROL
+ 0x0, //
+ 0x0, // SPI_BARYC_CNTL
+ 0x0, //
+ 0x0, // SPI_TMPRING_SIZE
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // SPI_SHADER_IDX_FORMAT
+ 0x0, // SPI_SHADER_POS_FORMAT
+ 0x0, // SPI_SHADER_Z_FORMAT
+ 0x0 // SPI_SHADER_COL_FORMAT
};
static const uint32_t SxPsDownconvertNv10[] = {
- 0x0 , // SX_PS_DOWNCONVERT
- 0x0 , // SX_BLEND_OPT_EPSILON
- 0x0 , // SX_BLEND_OPT_CONTROL
- 0x0 , // SX_MRT0_BLEND_OPT
- 0x0 , // SX_MRT1_BLEND_OPT
- 0x0 , // SX_MRT2_BLEND_OPT
- 0x0 , // SX_MRT3_BLEND_OPT
- 0x0 , // SX_MRT4_BLEND_OPT
- 0x0 , // SX_MRT5_BLEND_OPT
- 0x0 , // SX_MRT6_BLEND_OPT
- 0x0 , // SX_MRT7_BLEND_OPT
- 0x0 , // CB_BLEND0_CONTROL
- 0x0 , // CB_BLEND1_CONTROL
- 0x0 , // CB_BLEND2_CONTROL
- 0x0 , // CB_BLEND3_CONTROL
- 0x0 , // CB_BLEND4_CONTROL
- 0x0 , // CB_BLEND5_CONTROL
- 0x0 , // CB_BLEND6_CONTROL
- 0x0 // CB_BLEND7_CONTROL
+ 0x0, // SX_PS_DOWNCONVERT
+ 0x0, // SX_BLEND_OPT_EPSILON
+ 0x0, // SX_BLEND_OPT_CONTROL
+ 0x0, // SX_MRT0_BLEND_OPT
+ 0x0, // SX_MRT1_BLEND_OPT
+ 0x0, // SX_MRT2_BLEND_OPT
+ 0x0, // SX_MRT3_BLEND_OPT
+ 0x0, // SX_MRT4_BLEND_OPT
+ 0x0, // SX_MRT5_BLEND_OPT
+ 0x0, // SX_MRT6_BLEND_OPT
+ 0x0, // SX_MRT7_BLEND_OPT
+ 0x0, // CB_BLEND0_CONTROL
+ 0x0, // CB_BLEND1_CONTROL
+ 0x0, // CB_BLEND2_CONTROL
+ 0x0, // CB_BLEND3_CONTROL
+ 0x0, // CB_BLEND4_CONTROL
+ 0x0, // CB_BLEND5_CONTROL
+ 0x0, // CB_BLEND6_CONTROL
+ 0x0 // CB_BLEND7_CONTROL
};
static const uint32_t GeMaxOutputPerSubgroupNv10[] = {
- 0x0 , // GE_MAX_OUTPUT_PER_SUBGROUP
- 0x0 , // DB_DEPTH_CONTROL
- 0x0 , // DB_EQAA
- 0x0 , // CB_COLOR_CONTROL
- 0x0 , // DB_SHADER_CONTROL
- 0x90000 , // PA_CL_CLIP_CNTL
- 0x4 , // PA_SU_SC_MODE_CNTL
- 0x0 , // PA_CL_VTE_CNTL
- 0x0 , // PA_CL_VS_OUT_CNTL
- 0x0 // PA_CL_NANINF_CNTL
+ 0x0, // GE_MAX_OUTPUT_PER_SUBGROUP
+ 0x0, // DB_DEPTH_CONTROL
+ 0x0, // DB_EQAA
+ 0x0, // CB_COLOR_CONTROL
+ 0x0, // DB_SHADER_CONTROL
+ 0x90000, // PA_CL_CLIP_CNTL
+ 0x4, // PA_SU_SC_MODE_CNTL
+ 0x0, // PA_CL_VTE_CNTL
+ 0x0, // PA_CL_VS_OUT_CNTL
+ 0x0 // PA_CL_NANINF_CNTL
};
static const uint32_t PaSuPrimFilterCntlNv10[] = {
- 0x0 , // PA_SU_PRIM_FILTER_CNTL
- 0x0 , // PA_SU_SMALL_PRIM_FILTER_CNTL
- 0x0 , // PA_CL_OBJPRIM_ID_CNTL
- 0x0 , // PA_CL_NGG_CNTL
- 0x0 , // PA_SU_OVER_RASTERIZATION_CNTL
- 0x0 , // PA_STEREO_CNTL
- 0x0 // PA_STATE_STEREO_X
+ 0x0, // PA_SU_PRIM_FILTER_CNTL
+ 0x0, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0x0, // PA_CL_OBJPRIM_ID_CNTL
+ 0x0, // PA_CL_NGG_CNTL
+ 0x0, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0x0, // PA_STEREO_CNTL
+ 0x0 // PA_STATE_STEREO_X
};
static const uint32_t PaSuPointSizeNv10[] = {
- 0x0 , // PA_SU_POINT_SIZE
- 0x0 , // PA_SU_POINT_MINMAX
- 0x0 , // PA_SU_LINE_CNTL
- 0x0 // PA_SC_LINE_STIPPLE
+ 0x0, // PA_SU_POINT_SIZE
+ 0x0, // PA_SU_POINT_MINMAX
+ 0x0, // PA_SU_LINE_CNTL
+ 0x0 // PA_SC_LINE_STIPPLE
};
static const uint32_t VgtHosMaxTessLevelNv10[] = {
- 0x0 , // VGT_HOS_MAX_TESS_LEVEL
- 0x0 // VGT_HOS_MIN_TESS_LEVEL
+ 0x0, // VGT_HOS_MAX_TESS_LEVEL
+ 0x0 // VGT_HOS_MIN_TESS_LEVEL
};
static const uint32_t VgtGsModeNv10[] = {
- 0x0 , // VGT_GS_MODE
- 0x0 , // VGT_GS_ONCHIP_CNTL
- 0x0 , // PA_SC_MODE_CNTL_0
- 0x0 , // PA_SC_MODE_CNTL_1
- 0x0 , // VGT_ENHANCE
- 0x100 , // VGT_GS_PER_ES
- 0x80 , // VGT_ES_PER_GS
- 0x2 , // VGT_GS_PER_VS
- 0x0 , // VGT_GSVS_RING_OFFSET_1
- 0x0 , // VGT_GSVS_RING_OFFSET_2
- 0x0 , // VGT_GSVS_RING_OFFSET_3
- 0x0 // VGT_GS_OUT_PRIM_TYPE
+ 0x0, // VGT_GS_MODE
+ 0x0, // VGT_GS_ONCHIP_CNTL
+ 0x0, // PA_SC_MODE_CNTL_0
+ 0x0, // PA_SC_MODE_CNTL_1
+ 0x0, // VGT_ENHANCE
+ 0x100, // VGT_GS_PER_ES
+ 0x80, // VGT_ES_PER_GS
+ 0x2, // VGT_GS_PER_VS
+ 0x0, // VGT_GSVS_RING_OFFSET_1
+ 0x0, // VGT_GSVS_RING_OFFSET_2
+ 0x0, // VGT_GSVS_RING_OFFSET_3
+ 0x0 // VGT_GS_OUT_PRIM_TYPE
};
static const uint32_t VgtPrimitiveidEnNv10[] = {
- 0x0 // VGT_PRIMITIVEID_EN
+ 0x0 // VGT_PRIMITIVEID_EN
};
static const uint32_t VgtPrimitiveidResetNv10[] = {
- 0x0 // VGT_PRIMITIVEID_RESET
+ 0x0 // VGT_PRIMITIVEID_RESET
};
static const uint32_t VgtDrawPayloadCntlNv10[] = {
- 0x0 , // VGT_DRAW_PAYLOAD_CNTL
- 0x0 , //
- 0x0 , // VGT_INSTANCE_STEP_RATE_0
- 0x0 , // VGT_INSTANCE_STEP_RATE_1
- 0x0 , // IA_MULTI_VGT_PARAM
- 0x0 , // VGT_ESGS_RING_ITEMSIZE
- 0x0 , // VGT_GSVS_RING_ITEMSIZE
- 0x0 , // VGT_REUSE_OFF
- 0x0 , // VGT_VTX_CNT_EN
- 0x0 , // DB_HTILE_SURFACE
- 0x0 , // DB_SRESULTS_COMPARE_STATE0
- 0x0 , // DB_SRESULTS_COMPARE_STATE1
- 0x0 , // DB_PRELOAD_CONTROL
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_0
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_0
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_0
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_1
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_1
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_1
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_2
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_2
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_2
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_3
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_3
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_3
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
- 0x0 , //
- 0x0 , // VGT_GS_MAX_VERT_OUT
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // GE_NGG_SUBGRP_CNTL
- 0x0 , // VGT_TESS_DISTRIBUTION
- 0x0 , // VGT_SHADER_STAGES_EN
- 0x0 , // VGT_LS_HS_CONFIG
- 0x0 , // VGT_GS_VERT_ITEMSIZE
- 0x0 , // VGT_GS_VERT_ITEMSIZE_1
- 0x0 , // VGT_GS_VERT_ITEMSIZE_2
- 0x0 , // VGT_GS_VERT_ITEMSIZE_3
- 0x0 , // VGT_TF_PARAM
- 0x0 , // DB_ALPHA_TO_MASK
- 0x0 , // VGT_DISPATCH_DRAW_INDEX
- 0x0 , // PA_SU_POLY_OFFSET_DB_FMT_CNTL
- 0x0 , // PA_SU_POLY_OFFSET_CLAMP
- 0x0 , // PA_SU_POLY_OFFSET_FRONT_SCALE
- 0x0 , // PA_SU_POLY_OFFSET_FRONT_OFFSET
- 0x0 , // PA_SU_POLY_OFFSET_BACK_SCALE
- 0x0 , // PA_SU_POLY_OFFSET_BACK_OFFSET
- 0x0 , // VGT_GS_INSTANCE_CNT
- 0x0 , // VGT_STRMOUT_CONFIG
- 0x0 // VGT_STRMOUT_BUFFER_CONFIG
+ 0x0, // VGT_DRAW_PAYLOAD_CNTL
+ 0x0, //
+ 0x0, // VGT_INSTANCE_STEP_RATE_0
+ 0x0, // VGT_INSTANCE_STEP_RATE_1
+ 0x0, // IA_MULTI_VGT_PARAM
+ 0x0, // VGT_ESGS_RING_ITEMSIZE
+ 0x0, // VGT_GSVS_RING_ITEMSIZE
+ 0x0, // VGT_REUSE_OFF
+ 0x0, // VGT_VTX_CNT_EN
+ 0x0, // DB_HTILE_SURFACE
+ 0x0, // DB_SRESULTS_COMPARE_STATE0
+ 0x0, // DB_SRESULTS_COMPARE_STATE1
+ 0x0, // DB_PRELOAD_CONTROL
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_0
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_1
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_2
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_3
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0x0, //
+ 0x0, // VGT_GS_MAX_VERT_OUT
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // GE_NGG_SUBGRP_CNTL
+ 0x0, // VGT_TESS_DISTRIBUTION
+ 0x0, // VGT_SHADER_STAGES_EN
+ 0x0, // VGT_LS_HS_CONFIG
+ 0x0, // VGT_GS_VERT_ITEMSIZE
+ 0x0, // VGT_GS_VERT_ITEMSIZE_1
+ 0x0, // VGT_GS_VERT_ITEMSIZE_2
+ 0x0, // VGT_GS_VERT_ITEMSIZE_3
+ 0x0, // VGT_TF_PARAM
+ 0x0, // DB_ALPHA_TO_MASK
+ 0x0, // VGT_DISPATCH_DRAW_INDEX
+ 0x0, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x0, // PA_SU_POLY_OFFSET_CLAMP
+ 0x0, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x0, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x0, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x0, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x0, // VGT_GS_INSTANCE_CNT
+ 0x0, // VGT_STRMOUT_CONFIG
+ 0x0 // VGT_STRMOUT_BUFFER_CONFIG
};
static const uint32_t PaScCentroidPriority0Nv10[] = {
- 0x0 , // PA_SC_CENTROID_PRIORITY_0
- 0x0 , // PA_SC_CENTROID_PRIORITY_1
- 0x1000 , // PA_SC_LINE_CNTL
- 0x0 , // PA_SC_AA_CONFIG
- 0x5 , // PA_SU_VTX_CNTL
+ 0x0, // PA_SC_CENTROID_PRIORITY_0
+ 0x0, // PA_SC_CENTROID_PRIORITY_1
+ 0x1000, // PA_SC_LINE_CNTL
+ 0x0, // PA_SC_AA_CONFIG
+ 0x5, // PA_SU_VTX_CNTL
0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
- 0x0 , // PA_SC_SHADER_CONTROL
- 0x3 , // PA_SC_BINNER_CNTL_0
- 0x0 , // PA_SC_BINNER_CNTL_1
- 0x100000 , // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
- 0x0 , // PA_SC_NGG_MODE_CNTL
- 0x0 , //
- 0x1e , // VGT_VERTEX_REUSE_BLOCK_CNTL
- 0x20 , // VGT_OUT_DEALLOC_CNTL
- 0x0 , // CB_COLOR0_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR0_VIEW
- 0x0 , // CB_COLOR0_INFO
- 0x0 , // CB_COLOR0_ATTRIB
- 0x0 , // CB_COLOR0_DCC_CONTROL
- 0x0 , // CB_COLOR0_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR0_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR0_CLEAR_WORD0
- 0x0 , // CB_COLOR0_CLEAR_WORD1
- 0x0 , // CB_COLOR0_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR1_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR1_VIEW
- 0x0 , // CB_COLOR1_INFO
- 0x0 , // CB_COLOR1_ATTRIB
- 0x0 , // CB_COLOR1_DCC_CONTROL
- 0x0 , // CB_COLOR1_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR1_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR1_CLEAR_WORD0
- 0x0 , // CB_COLOR1_CLEAR_WORD1
- 0x0 , // CB_COLOR1_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR2_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR2_VIEW
- 0x0 , // CB_COLOR2_INFO
- 0x0 , // CB_COLOR2_ATTRIB
- 0x0 , // CB_COLOR2_DCC_CONTROL
- 0x0 , // CB_COLOR2_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR2_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR2_CLEAR_WORD0
- 0x0 , // CB_COLOR2_CLEAR_WORD1
- 0x0 , // CB_COLOR2_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR3_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR3_VIEW
- 0x0 , // CB_COLOR3_INFO
- 0x0 , // CB_COLOR3_ATTRIB
- 0x0 , // CB_COLOR3_DCC_CONTROL
- 0x0 , // CB_COLOR3_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR3_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR3_CLEAR_WORD0
- 0x0 , // CB_COLOR3_CLEAR_WORD1
- 0x0 , // CB_COLOR3_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR4_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR4_VIEW
- 0x0 , // CB_COLOR4_INFO
- 0x0 , // CB_COLOR4_ATTRIB
- 0x0 , // CB_COLOR4_DCC_CONTROL
- 0x0 , // CB_COLOR4_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR4_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR4_CLEAR_WORD0
- 0x0 , // CB_COLOR4_CLEAR_WORD1
- 0x0 , // CB_COLOR4_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR5_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR5_VIEW
- 0x0 , // CB_COLOR5_INFO
- 0x0 , // CB_COLOR5_ATTRIB
- 0x0 , // CB_COLOR5_DCC_CONTROL
- 0x0 , // CB_COLOR5_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR5_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR5_CLEAR_WORD0
- 0x0 , // CB_COLOR5_CLEAR_WORD1
- 0x0 , // CB_COLOR5_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR6_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR6_VIEW
- 0x0 , // CB_COLOR6_INFO
- 0x0 , // CB_COLOR6_ATTRIB
- 0x0 , // CB_COLOR6_DCC_CONTROL
- 0x0 , // CB_COLOR6_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR6_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR6_CLEAR_WORD0
- 0x0 , // CB_COLOR6_CLEAR_WORD1
- 0x0 , // CB_COLOR6_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR7_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR7_VIEW
- 0x0 , // CB_COLOR7_INFO
- 0x0 , // CB_COLOR7_ATTRIB
- 0x0 , // CB_COLOR7_DCC_CONTROL
- 0x0 , // CB_COLOR7_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR7_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR7_CLEAR_WORD0
- 0x0 , // CB_COLOR7_CLEAR_WORD1
- 0x0 , // CB_COLOR7_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR0_BASE_EXT
- 0x0 , // CB_COLOR1_BASE_EXT
- 0x0 , // CB_COLOR2_BASE_EXT
- 0x0 , // CB_COLOR3_BASE_EXT
- 0x0 , // CB_COLOR4_BASE_EXT
- 0x0 , // CB_COLOR5_BASE_EXT
- 0x0 , // CB_COLOR6_BASE_EXT
- 0x0 , // CB_COLOR7_BASE_EXT
- 0x0 , // CB_COLOR0_CMASK_BASE_EXT
- 0x0 , // CB_COLOR1_CMASK_BASE_EXT
- 0x0 , // CB_COLOR2_CMASK_BASE_EXT
- 0x0 , // CB_COLOR3_CMASK_BASE_EXT
- 0x0 , // CB_COLOR4_CMASK_BASE_EXT
- 0x0 , // CB_COLOR5_CMASK_BASE_EXT
- 0x0 , // CB_COLOR6_CMASK_BASE_EXT
- 0x0 , // CB_COLOR7_CMASK_BASE_EXT
- 0x0 , // CB_COLOR0_FMASK_BASE_EXT
- 0x0 , // CB_COLOR1_FMASK_BASE_EXT
- 0x0 , // CB_COLOR2_FMASK_BASE_EXT
- 0x0 , // CB_COLOR3_FMASK_BASE_EXT
- 0x0 , // CB_COLOR4_FMASK_BASE_EXT
- 0x0 , // CB_COLOR5_FMASK_BASE_EXT
- 0x0 , // CB_COLOR6_FMASK_BASE_EXT
- 0x0 , // CB_COLOR7_FMASK_BASE_EXT
- 0x0 , // CB_COLOR0_DCC_BASE_EXT
- 0x0 , // CB_COLOR1_DCC_BASE_EXT
- 0x0 , // CB_COLOR2_DCC_BASE_EXT
- 0x0 , // CB_COLOR3_DCC_BASE_EXT
- 0x0 , // CB_COLOR4_DCC_BASE_EXT
- 0x0 , // CB_COLOR5_DCC_BASE_EXT
- 0x0 , // CB_COLOR6_DCC_BASE_EXT
- 0x0 , // CB_COLOR7_DCC_BASE_EXT
- 0x0 , // CB_COLOR0_ATTRIB2
- 0x0 , // CB_COLOR1_ATTRIB2
- 0x0 , // CB_COLOR2_ATTRIB2
- 0x0 , // CB_COLOR3_ATTRIB2
- 0x0 , // CB_COLOR4_ATTRIB2
- 0x0 , // CB_COLOR5_ATTRIB2
- 0x0 , // CB_COLOR6_ATTRIB2
- 0x0 , // CB_COLOR7_ATTRIB2
- 0x0 , // CB_COLOR0_ATTRIB3
- 0x0 , // CB_COLOR1_ATTRIB3
- 0x0 , // CB_COLOR2_ATTRIB3
- 0x0 , // CB_COLOR3_ATTRIB3
- 0x0 , // CB_COLOR4_ATTRIB3
- 0x0 , // CB_COLOR5_ATTRIB3
- 0x0 , // CB_COLOR6_ATTRIB3
+ 0x0, // PA_SC_SHADER_CONTROL
+ 0x3, // PA_SC_BINNER_CNTL_0
+ 0x0, // PA_SC_BINNER_CNTL_1
+ 0x100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x0, // PA_SC_NGG_MODE_CNTL
+ 0x0, //
+ 0x1e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x20, // VGT_OUT_DEALLOC_CNTL
+ 0x0, // CB_COLOR0_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR0_VIEW
+ 0x0, // CB_COLOR0_INFO
+ 0x0, // CB_COLOR0_ATTRIB
+ 0x0, // CB_COLOR0_DCC_CONTROL
+ 0x0, // CB_COLOR0_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR0_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR0_CLEAR_WORD0
+ 0x0, // CB_COLOR0_CLEAR_WORD1
+ 0x0, // CB_COLOR0_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR1_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR1_VIEW
+ 0x0, // CB_COLOR1_INFO
+ 0x0, // CB_COLOR1_ATTRIB
+ 0x0, // CB_COLOR1_DCC_CONTROL
+ 0x0, // CB_COLOR1_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR1_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR1_CLEAR_WORD0
+ 0x0, // CB_COLOR1_CLEAR_WORD1
+ 0x0, // CB_COLOR1_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR2_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR2_VIEW
+ 0x0, // CB_COLOR2_INFO
+ 0x0, // CB_COLOR2_ATTRIB
+ 0x0, // CB_COLOR2_DCC_CONTROL
+ 0x0, // CB_COLOR2_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR2_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR2_CLEAR_WORD0
+ 0x0, // CB_COLOR2_CLEAR_WORD1
+ 0x0, // CB_COLOR2_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR3_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR3_VIEW
+ 0x0, // CB_COLOR3_INFO
+ 0x0, // CB_COLOR3_ATTRIB
+ 0x0, // CB_COLOR3_DCC_CONTROL
+ 0x0, // CB_COLOR3_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR3_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR3_CLEAR_WORD0
+ 0x0, // CB_COLOR3_CLEAR_WORD1
+ 0x0, // CB_COLOR3_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR4_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR4_VIEW
+ 0x0, // CB_COLOR4_INFO
+ 0x0, // CB_COLOR4_ATTRIB
+ 0x0, // CB_COLOR4_DCC_CONTROL
+ 0x0, // CB_COLOR4_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR4_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR4_CLEAR_WORD0
+ 0x0, // CB_COLOR4_CLEAR_WORD1
+ 0x0, // CB_COLOR4_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR5_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR5_VIEW
+ 0x0, // CB_COLOR5_INFO
+ 0x0, // CB_COLOR5_ATTRIB
+ 0x0, // CB_COLOR5_DCC_CONTROL
+ 0x0, // CB_COLOR5_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR5_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR5_CLEAR_WORD0
+ 0x0, // CB_COLOR5_CLEAR_WORD1
+ 0x0, // CB_COLOR5_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR6_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR6_VIEW
+ 0x0, // CB_COLOR6_INFO
+ 0x0, // CB_COLOR6_ATTRIB
+ 0x0, // CB_COLOR6_DCC_CONTROL
+ 0x0, // CB_COLOR6_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR6_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR6_CLEAR_WORD0
+ 0x0, // CB_COLOR6_CLEAR_WORD1
+ 0x0, // CB_COLOR6_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR7_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR7_VIEW
+ 0x0, // CB_COLOR7_INFO
+ 0x0, // CB_COLOR7_ATTRIB
+ 0x0, // CB_COLOR7_DCC_CONTROL
+ 0x0, // CB_COLOR7_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR7_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR7_CLEAR_WORD0
+ 0x0, // CB_COLOR7_CLEAR_WORD1
+ 0x0, // CB_COLOR7_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR0_BASE_EXT
+ 0x0, // CB_COLOR1_BASE_EXT
+ 0x0, // CB_COLOR2_BASE_EXT
+ 0x0, // CB_COLOR3_BASE_EXT
+ 0x0, // CB_COLOR4_BASE_EXT
+ 0x0, // CB_COLOR5_BASE_EXT
+ 0x0, // CB_COLOR6_BASE_EXT
+ 0x0, // CB_COLOR7_BASE_EXT
+ 0x0, // CB_COLOR0_CMASK_BASE_EXT
+ 0x0, // CB_COLOR1_CMASK_BASE_EXT
+ 0x0, // CB_COLOR2_CMASK_BASE_EXT
+ 0x0, // CB_COLOR3_CMASK_BASE_EXT
+ 0x0, // CB_COLOR4_CMASK_BASE_EXT
+ 0x0, // CB_COLOR5_CMASK_BASE_EXT
+ 0x0, // CB_COLOR6_CMASK_BASE_EXT
+ 0x0, // CB_COLOR7_CMASK_BASE_EXT
+ 0x0, // CB_COLOR0_FMASK_BASE_EXT
+ 0x0, // CB_COLOR1_FMASK_BASE_EXT
+ 0x0, // CB_COLOR2_FMASK_BASE_EXT
+ 0x0, // CB_COLOR3_FMASK_BASE_EXT
+ 0x0, // CB_COLOR4_FMASK_BASE_EXT
+ 0x0, // CB_COLOR5_FMASK_BASE_EXT
+ 0x0, // CB_COLOR6_FMASK_BASE_EXT
+ 0x0, // CB_COLOR7_FMASK_BASE_EXT
+ 0x0, // CB_COLOR0_DCC_BASE_EXT
+ 0x0, // CB_COLOR1_DCC_BASE_EXT
+ 0x0, // CB_COLOR2_DCC_BASE_EXT
+ 0x0, // CB_COLOR3_DCC_BASE_EXT
+ 0x0, // CB_COLOR4_DCC_BASE_EXT
+ 0x0, // CB_COLOR5_DCC_BASE_EXT
+ 0x0, // CB_COLOR6_DCC_BASE_EXT
+ 0x0, // CB_COLOR7_DCC_BASE_EXT
+ 0x0, // CB_COLOR0_ATTRIB2
+ 0x0, // CB_COLOR1_ATTRIB2
+ 0x0, // CB_COLOR2_ATTRIB2
+ 0x0, // CB_COLOR3_ATTRIB2
+ 0x0, // CB_COLOR4_ATTRIB2
+ 0x0, // CB_COLOR5_ATTRIB2
+ 0x0, // CB_COLOR6_ATTRIB2
+ 0x0, // CB_COLOR7_ATTRIB2
+ 0x0, // CB_COLOR0_ATTRIB3
+ 0x0, // CB_COLOR1_ATTRIB3
+ 0x0, // CB_COLOR2_ATTRIB3
+ 0x0, // CB_COLOR3_ATTRIB3
+ 0x0, // CB_COLOR4_ATTRIB3
+ 0x0, // CB_COLOR5_ATTRIB3
+ 0x0, // CB_COLOR6_ATTRIB3
0x0 // CB_COLOR7_ATTRIB3
};
set_context_reg_seq_array(cs, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlNv10));
set_context_reg_seq_array(cs, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Nv10));
- set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, SET(VgtMultiPrimIbResetIndxNv10));
+ set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+ SET(VgtMultiPrimIbResetIndxNv10));
set_context_reg_seq_array(cs, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Nv10));
set_context_reg_seq_array(cs, R_028754_SX_PS_DOWNCONVERT, SET(SxPsDownconvertNv10));
- set_context_reg_seq_array(cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, SET(GeMaxOutputPerSubgroupNv10));
+ set_context_reg_seq_array(cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
+ SET(GeMaxOutputPerSubgroupNv10));
set_context_reg_seq_array(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, SET(PaSuPrimFilterCntlNv10));
set_context_reg_seq_array(cs, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeNv10));
set_context_reg_seq_array(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelNv10));
set_context_reg_seq_array(cs, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnNv10));
set_context_reg_seq_array(cs, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetNv10));
set_context_reg_seq_array(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlNv10));
- set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, SET(PaScCentroidPriority0Nv10));
+ set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
+ SET(PaScCentroidPriority0Nv10));
for (unsigned i = 0; i < num_reg_pairs; i++)
set_context_reg_seq_array(cs, reg_offsets[i], 1, ®_values[i]);
set_context_reg_seq_array_fn set_context_reg_seq_array)
{
static const uint32_t DbRenderControlGfx103[] = {
- 0x0 , // DB_RENDER_CONTROL
- 0x0 , // DB_COUNT_CONTROL
- 0x0 , // DB_DEPTH_VIEW
- 0x0 , // DB_RENDER_OVERRIDE
- 0x0 , // DB_RENDER_OVERRIDE2
- 0x0 , // DB_HTILE_DATA_BASE
- 0x0 , //
- 0x0 , // DB_DEPTH_SIZE_XY
- 0x0 , // DB_DEPTH_BOUNDS_MIN
- 0x0 , // DB_DEPTH_BOUNDS_MAX
- 0x0 , // DB_STENCIL_CLEAR
- 0x0 , // DB_DEPTH_CLEAR
- 0x0 , // PA_SC_SCREEN_SCISSOR_TL
+ 0x0, // DB_RENDER_CONTROL
+ 0x0, // DB_COUNT_CONTROL
+ 0x0, // DB_DEPTH_VIEW
+ 0x0, // DB_RENDER_OVERRIDE
+ 0x0, // DB_RENDER_OVERRIDE2
+ 0x0, // DB_HTILE_DATA_BASE
+ 0x0, //
+ 0x0, // DB_DEPTH_SIZE_XY
+ 0x0, // DB_DEPTH_BOUNDS_MIN
+ 0x0, // DB_DEPTH_BOUNDS_MAX
+ 0x0, // DB_STENCIL_CLEAR
+ 0x0, // DB_DEPTH_CLEAR
+ 0x0, // PA_SC_SCREEN_SCISSOR_TL
0x40004000, // PA_SC_SCREEN_SCISSOR_BR
- 0x0 , // DB_DFSM_CONTROL
- 0x0 , // DB_RESERVED_REG_2
- 0x0 , // DB_Z_INFO
- 0x0 , // DB_STENCIL_INFO
- 0x0 , // DB_Z_READ_BASE
- 0x0 , // DB_STENCIL_READ_BASE
- 0x0 , // DB_Z_WRITE_BASE
- 0x0 , // DB_STENCIL_WRITE_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // DB_Z_READ_BASE_HI
- 0x0 , // DB_STENCIL_READ_BASE_HI
- 0x0 , // DB_Z_WRITE_BASE_HI
- 0x0 , // DB_STENCIL_WRITE_BASE_HI
- 0x0 , // DB_HTILE_DATA_BASE_HI
- 0x0 , // DB_RMI_L2_CACHE_CONTROL
- 0x0 , // TA_BC_BASE_ADDR
+ 0x0, // DB_DFSM_CONTROL
+ 0x0, // DB_RESERVED_REG_2
+ 0x0, // DB_Z_INFO
+ 0x0, // DB_STENCIL_INFO
+ 0x0, // DB_Z_READ_BASE
+ 0x0, // DB_STENCIL_READ_BASE
+ 0x0, // DB_Z_WRITE_BASE
+ 0x0, // DB_STENCIL_WRITE_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // DB_Z_READ_BASE_HI
+ 0x0, // DB_STENCIL_READ_BASE_HI
+ 0x0, // DB_Z_WRITE_BASE_HI
+ 0x0, // DB_STENCIL_WRITE_BASE_HI
+ 0x0, // DB_HTILE_DATA_BASE_HI
+ 0x0, // DB_RMI_L2_CACHE_CONTROL
+ 0x0, // TA_BC_BASE_ADDR
0x0 // TA_BC_BASE_ADDR_HI
};
static const uint32_t CoherDestBaseHi0Gfx103[] = {
- 0x0 , // COHER_DEST_BASE_HI_0
- 0x0 , // COHER_DEST_BASE_HI_1
- 0x0 , // COHER_DEST_BASE_HI_2
- 0x0 , // COHER_DEST_BASE_HI_3
- 0x0 , // COHER_DEST_BASE_2
- 0x0 , // COHER_DEST_BASE_3
- 0x0 , // PA_SC_WINDOW_OFFSET
+ 0x0, // COHER_DEST_BASE_HI_0
+ 0x0, // COHER_DEST_BASE_HI_1
+ 0x0, // COHER_DEST_BASE_HI_2
+ 0x0, // COHER_DEST_BASE_HI_3
+ 0x0, // COHER_DEST_BASE_2
+ 0x0, // COHER_DEST_BASE_3
+ 0x0, // PA_SC_WINDOW_OFFSET
0x80000000, // PA_SC_WINDOW_SCISSOR_TL
0x40004000, // PA_SC_WINDOW_SCISSOR_BR
- 0xffff , // PA_SC_CLIPRECT_RULE
- 0x0 , // PA_SC_CLIPRECT_0_TL
+ 0xffff, // PA_SC_CLIPRECT_RULE
+ 0x0, // PA_SC_CLIPRECT_0_TL
0x40004000, // PA_SC_CLIPRECT_0_BR
- 0x0 , // PA_SC_CLIPRECT_1_TL
+ 0x0, // PA_SC_CLIPRECT_1_TL
0x40004000, // PA_SC_CLIPRECT_1_BR
- 0x0 , // PA_SC_CLIPRECT_2_TL
+ 0x0, // PA_SC_CLIPRECT_2_TL
0x40004000, // PA_SC_CLIPRECT_2_BR
- 0x0 , // PA_SC_CLIPRECT_3_TL
+ 0x0, // PA_SC_CLIPRECT_3_TL
0x40004000, // PA_SC_CLIPRECT_3_BR
0xaa99aaaa, // PA_SC_EDGERULE
- 0x0 , // PA_SU_HARDWARE_SCREEN_OFFSET
+ 0x0, // PA_SU_HARDWARE_SCREEN_OFFSET
0xffffffff, // CB_TARGET_MASK
0xffffffff, // CB_SHADER_MASK
0x80000000, // PA_SC_GENERIC_SCISSOR_TL
0x40004000, // PA_SC_GENERIC_SCISSOR_BR
- 0x0 , // COHER_DEST_BASE_0
- 0x0 , // COHER_DEST_BASE_1
+ 0x0, // COHER_DEST_BASE_0
+ 0x0, // COHER_DEST_BASE_1
0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
- 0x0 , // PA_SC_VPORT_ZMIN_0
+ 0x0, // PA_SC_VPORT_ZMIN_0
0x3f800000, // PA_SC_VPORT_ZMAX_0
- 0x0 , // PA_SC_VPORT_ZMIN_1
+ 0x0, // PA_SC_VPORT_ZMIN_1
0x3f800000, // PA_SC_VPORT_ZMAX_1
- 0x0 , // PA_SC_VPORT_ZMIN_2
+ 0x0, // PA_SC_VPORT_ZMIN_2
0x3f800000, // PA_SC_VPORT_ZMAX_2
- 0x0 , // PA_SC_VPORT_ZMIN_3
+ 0x0, // PA_SC_VPORT_ZMIN_3
0x3f800000, // PA_SC_VPORT_ZMAX_3
- 0x0 , // PA_SC_VPORT_ZMIN_4
+ 0x0, // PA_SC_VPORT_ZMIN_4
0x3f800000, // PA_SC_VPORT_ZMAX_4
- 0x0 , // PA_SC_VPORT_ZMIN_5
+ 0x0, // PA_SC_VPORT_ZMIN_5
0x3f800000, // PA_SC_VPORT_ZMAX_5
- 0x0 , // PA_SC_VPORT_ZMIN_6
+ 0x0, // PA_SC_VPORT_ZMIN_6
0x3f800000, // PA_SC_VPORT_ZMAX_6
- 0x0 , // PA_SC_VPORT_ZMIN_7
+ 0x0, // PA_SC_VPORT_ZMIN_7
0x3f800000, // PA_SC_VPORT_ZMAX_7
- 0x0 , // PA_SC_VPORT_ZMIN_8
+ 0x0, // PA_SC_VPORT_ZMIN_8
0x3f800000, // PA_SC_VPORT_ZMAX_8
- 0x0 , // PA_SC_VPORT_ZMIN_9
+ 0x0, // PA_SC_VPORT_ZMIN_9
0x3f800000, // PA_SC_VPORT_ZMAX_9
- 0x0 , // PA_SC_VPORT_ZMIN_10
+ 0x0, // PA_SC_VPORT_ZMIN_10
0x3f800000, // PA_SC_VPORT_ZMAX_10
- 0x0 , // PA_SC_VPORT_ZMIN_11
+ 0x0, // PA_SC_VPORT_ZMIN_11
0x3f800000, // PA_SC_VPORT_ZMAX_11
- 0x0 , // PA_SC_VPORT_ZMIN_12
+ 0x0, // PA_SC_VPORT_ZMIN_12
0x3f800000, // PA_SC_VPORT_ZMAX_12
- 0x0 , // PA_SC_VPORT_ZMIN_13
+ 0x0, // PA_SC_VPORT_ZMIN_13
0x3f800000, // PA_SC_VPORT_ZMAX_13
- 0x0 , // PA_SC_VPORT_ZMIN_14
+ 0x0, // PA_SC_VPORT_ZMIN_14
0x3f800000, // PA_SC_VPORT_ZMAX_14
- 0x0 , // PA_SC_VPORT_ZMIN_15
+ 0x0, // PA_SC_VPORT_ZMIN_15
0x3f800000, // PA_SC_VPORT_ZMAX_15
- 0x0 , // PA_SC_RASTER_CONFIG
- 0x0 , // PA_SC_RASTER_CONFIG_1
- 0x0 , //
+ 0x0, // PA_SC_RASTER_CONFIG
+ 0x0, // PA_SC_RASTER_CONFIG_1
+ 0x0, //
0x0 // PA_SC_TILE_STEERING_OVERRIDE
};
static const uint32_t VgtMultiPrimIbResetIndxGfx103[] = {
- 0x0 , // VGT_MULTI_PRIM_IB_RESET_INDX
- 0x0 , // CB_RMI_GL2_CACHE_CONTROL
- 0x0 , // CB_BLEND_RED
- 0x0 , // CB_BLEND_GREEN
- 0x0 , // CB_BLEND_BLUE
- 0x0 , // CB_BLEND_ALPHA
- 0x0 , // CB_DCC_CONTROL
- 0x0 , // CB_COVERAGE_OUT_CONTROL
- 0x0 , // DB_STENCIL_CONTROL
- 0x1000000 , // DB_STENCILREFMASK
- 0x1000000 , // DB_STENCILREFMASK_BF
- 0x0 , //
- 0x0 , // PA_CL_VPORT_XSCALE
- 0x0 , // PA_CL_VPORT_XOFFSET
- 0x0 , // PA_CL_VPORT_YSCALE
- 0x0 , // PA_CL_VPORT_YOFFSET
- 0x0 , // PA_CL_VPORT_ZSCALE
- 0x0 , // PA_CL_VPORT_ZOFFSET
- 0x0 , // PA_CL_VPORT_XSCALE_1
- 0x0 , // PA_CL_VPORT_XOFFSET_1
- 0x0 , // PA_CL_VPORT_YSCALE_1
- 0x0 , // PA_CL_VPORT_YOFFSET_1
- 0x0 , // PA_CL_VPORT_ZSCALE_1
- 0x0 , // PA_CL_VPORT_ZOFFSET_1
- 0x0 , // PA_CL_VPORT_XSCALE_2
- 0x0 , // PA_CL_VPORT_XOFFSET_2
- 0x0 , // PA_CL_VPORT_YSCALE_2
- 0x0 , // PA_CL_VPORT_YOFFSET_2
- 0x0 , // PA_CL_VPORT_ZSCALE_2
- 0x0 , // PA_CL_VPORT_ZOFFSET_2
- 0x0 , // PA_CL_VPORT_XSCALE_3
- 0x0 , // PA_CL_VPORT_XOFFSET_3
- 0x0 , // PA_CL_VPORT_YSCALE_3
- 0x0 , // PA_CL_VPORT_YOFFSET_3
- 0x0 , // PA_CL_VPORT_ZSCALE_3
- 0x0 , // PA_CL_VPORT_ZOFFSET_3
- 0x0 , // PA_CL_VPORT_XSCALE_4
- 0x0 , // PA_CL_VPORT_XOFFSET_4
- 0x0 , // PA_CL_VPORT_YSCALE_4
- 0x0 , // PA_CL_VPORT_YOFFSET_4
- 0x0 , // PA_CL_VPORT_ZSCALE_4
- 0x0 , // PA_CL_VPORT_ZOFFSET_4
- 0x0 , // PA_CL_VPORT_XSCALE_5
- 0x0 , // PA_CL_VPORT_XOFFSET_5
- 0x0 , // PA_CL_VPORT_YSCALE_5
- 0x0 , // PA_CL_VPORT_YOFFSET_5
- 0x0 , // PA_CL_VPORT_ZSCALE_5
- 0x0 , // PA_CL_VPORT_ZOFFSET_5
- 0x0 , // PA_CL_VPORT_XSCALE_6
- 0x0 , // PA_CL_VPORT_XOFFSET_6
- 0x0 , // PA_CL_VPORT_YSCALE_6
- 0x0 , // PA_CL_VPORT_YOFFSET_6
- 0x0 , // PA_CL_VPORT_ZSCALE_6
- 0x0 , // PA_CL_VPORT_ZOFFSET_6
- 0x0 , // PA_CL_VPORT_XSCALE_7
- 0x0 , // PA_CL_VPORT_XOFFSET_7
- 0x0 , // PA_CL_VPORT_YSCALE_7
- 0x0 , // PA_CL_VPORT_YOFFSET_7
- 0x0 , // PA_CL_VPORT_ZSCALE_7
- 0x0 , // PA_CL_VPORT_ZOFFSET_7
- 0x0 , // PA_CL_VPORT_XSCALE_8
- 0x0 , // PA_CL_VPORT_XOFFSET_8
- 0x0 , // PA_CL_VPORT_YSCALE_8
- 0x0 , // PA_CL_VPORT_YOFFSET_8
- 0x0 , // PA_CL_VPORT_ZSCALE_8
- 0x0 , // PA_CL_VPORT_ZOFFSET_8
- 0x0 , // PA_CL_VPORT_XSCALE_9
- 0x0 , // PA_CL_VPORT_XOFFSET_9
- 0x0 , // PA_CL_VPORT_YSCALE_9
- 0x0 , // PA_CL_VPORT_YOFFSET_9
- 0x0 , // PA_CL_VPORT_ZSCALE_9
- 0x0 , // PA_CL_VPORT_ZOFFSET_9
- 0x0 , // PA_CL_VPORT_XSCALE_10
- 0x0 , // PA_CL_VPORT_XOFFSET_10
- 0x0 , // PA_CL_VPORT_YSCALE_10
- 0x0 , // PA_CL_VPORT_YOFFSET_10
- 0x0 , // PA_CL_VPORT_ZSCALE_10
- 0x0 , // PA_CL_VPORT_ZOFFSET_10
- 0x0 , // PA_CL_VPORT_XSCALE_11
- 0x0 , // PA_CL_VPORT_XOFFSET_11
- 0x0 , // PA_CL_VPORT_YSCALE_11
- 0x0 , // PA_CL_VPORT_YOFFSET_11
- 0x0 , // PA_CL_VPORT_ZSCALE_11
- 0x0 , // PA_CL_VPORT_ZOFFSET_11
- 0x0 , // PA_CL_VPORT_XSCALE_12
- 0x0 , // PA_CL_VPORT_XOFFSET_12
- 0x0 , // PA_CL_VPORT_YSCALE_12
- 0x0 , // PA_CL_VPORT_YOFFSET_12
- 0x0 , // PA_CL_VPORT_ZSCALE_12
- 0x0 , // PA_CL_VPORT_ZOFFSET_12
- 0x0 , // PA_CL_VPORT_XSCALE_13
- 0x0 , // PA_CL_VPORT_XOFFSET_13
- 0x0 , // PA_CL_VPORT_YSCALE_13
- 0x0 , // PA_CL_VPORT_YOFFSET_13
- 0x0 , // PA_CL_VPORT_ZSCALE_13
- 0x0 , // PA_CL_VPORT_ZOFFSET_13
- 0x0 , // PA_CL_VPORT_XSCALE_14
- 0x0 , // PA_CL_VPORT_XOFFSET_14
- 0x0 , // PA_CL_VPORT_YSCALE_14
- 0x0 , // PA_CL_VPORT_YOFFSET_14
- 0x0 , // PA_CL_VPORT_ZSCALE_14
- 0x0 , // PA_CL_VPORT_ZOFFSET_14
- 0x0 , // PA_CL_VPORT_XSCALE_15
- 0x0 , // PA_CL_VPORT_XOFFSET_15
- 0x0 , // PA_CL_VPORT_YSCALE_15
- 0x0 , // PA_CL_VPORT_YOFFSET_15
- 0x0 , // PA_CL_VPORT_ZSCALE_15
- 0x0 , // PA_CL_VPORT_ZOFFSET_15
- 0x0 , // PA_CL_UCP_0_X
- 0x0 , // PA_CL_UCP_0_Y
- 0x0 , // PA_CL_UCP_0_Z
- 0x0 , // PA_CL_UCP_0_W
- 0x0 , // PA_CL_UCP_1_X
- 0x0 , // PA_CL_UCP_1_Y
- 0x0 , // PA_CL_UCP_1_Z
- 0x0 , // PA_CL_UCP_1_W
- 0x0 , // PA_CL_UCP_2_X
- 0x0 , // PA_CL_UCP_2_Y
- 0x0 , // PA_CL_UCP_2_Z
- 0x0 , // PA_CL_UCP_2_W
- 0x0 , // PA_CL_UCP_3_X
- 0x0 , // PA_CL_UCP_3_Y
- 0x0 , // PA_CL_UCP_3_Z
- 0x0 , // PA_CL_UCP_3_W
- 0x0 , // PA_CL_UCP_4_X
- 0x0 , // PA_CL_UCP_4_Y
- 0x0 , // PA_CL_UCP_4_Z
- 0x0 , // PA_CL_UCP_4_W
- 0x0 , // PA_CL_UCP_5_X
- 0x0 , // PA_CL_UCP_5_Y
- 0x0 , // PA_CL_UCP_5_Z
- 0x0 // PA_CL_UCP_5_W
+ 0x0, // VGT_MULTI_PRIM_IB_RESET_INDX
+ 0x0, // CB_RMI_GL2_CACHE_CONTROL
+ 0x0, // CB_BLEND_RED
+ 0x0, // CB_BLEND_GREEN
+ 0x0, // CB_BLEND_BLUE
+ 0x0, // CB_BLEND_ALPHA
+ 0x0, // CB_DCC_CONTROL
+ 0x0, // CB_COVERAGE_OUT_CONTROL
+ 0x0, // DB_STENCIL_CONTROL
+ 0x1000000, // DB_STENCILREFMASK
+ 0x1000000, // DB_STENCILREFMASK_BF
+ 0x0, //
+ 0x0, // PA_CL_VPORT_XSCALE
+ 0x0, // PA_CL_VPORT_XOFFSET
+ 0x0, // PA_CL_VPORT_YSCALE
+ 0x0, // PA_CL_VPORT_YOFFSET
+ 0x0, // PA_CL_VPORT_ZSCALE
+ 0x0, // PA_CL_VPORT_ZOFFSET
+ 0x0, // PA_CL_VPORT_XSCALE_1
+ 0x0, // PA_CL_VPORT_XOFFSET_1
+ 0x0, // PA_CL_VPORT_YSCALE_1
+ 0x0, // PA_CL_VPORT_YOFFSET_1
+ 0x0, // PA_CL_VPORT_ZSCALE_1
+ 0x0, // PA_CL_VPORT_ZOFFSET_1
+ 0x0, // PA_CL_VPORT_XSCALE_2
+ 0x0, // PA_CL_VPORT_XOFFSET_2
+ 0x0, // PA_CL_VPORT_YSCALE_2
+ 0x0, // PA_CL_VPORT_YOFFSET_2
+ 0x0, // PA_CL_VPORT_ZSCALE_2
+ 0x0, // PA_CL_VPORT_ZOFFSET_2
+ 0x0, // PA_CL_VPORT_XSCALE_3
+ 0x0, // PA_CL_VPORT_XOFFSET_3
+ 0x0, // PA_CL_VPORT_YSCALE_3
+ 0x0, // PA_CL_VPORT_YOFFSET_3
+ 0x0, // PA_CL_VPORT_ZSCALE_3
+ 0x0, // PA_CL_VPORT_ZOFFSET_3
+ 0x0, // PA_CL_VPORT_XSCALE_4
+ 0x0, // PA_CL_VPORT_XOFFSET_4
+ 0x0, // PA_CL_VPORT_YSCALE_4
+ 0x0, // PA_CL_VPORT_YOFFSET_4
+ 0x0, // PA_CL_VPORT_ZSCALE_4
+ 0x0, // PA_CL_VPORT_ZOFFSET_4
+ 0x0, // PA_CL_VPORT_XSCALE_5
+ 0x0, // PA_CL_VPORT_XOFFSET_5
+ 0x0, // PA_CL_VPORT_YSCALE_5
+ 0x0, // PA_CL_VPORT_YOFFSET_5
+ 0x0, // PA_CL_VPORT_ZSCALE_5
+ 0x0, // PA_CL_VPORT_ZOFFSET_5
+ 0x0, // PA_CL_VPORT_XSCALE_6
+ 0x0, // PA_CL_VPORT_XOFFSET_6
+ 0x0, // PA_CL_VPORT_YSCALE_6
+ 0x0, // PA_CL_VPORT_YOFFSET_6
+ 0x0, // PA_CL_VPORT_ZSCALE_6
+ 0x0, // PA_CL_VPORT_ZOFFSET_6
+ 0x0, // PA_CL_VPORT_XSCALE_7
+ 0x0, // PA_CL_VPORT_XOFFSET_7
+ 0x0, // PA_CL_VPORT_YSCALE_7
+ 0x0, // PA_CL_VPORT_YOFFSET_7
+ 0x0, // PA_CL_VPORT_ZSCALE_7
+ 0x0, // PA_CL_VPORT_ZOFFSET_7
+ 0x0, // PA_CL_VPORT_XSCALE_8
+ 0x0, // PA_CL_VPORT_XOFFSET_8
+ 0x0, // PA_CL_VPORT_YSCALE_8
+ 0x0, // PA_CL_VPORT_YOFFSET_8
+ 0x0, // PA_CL_VPORT_ZSCALE_8
+ 0x0, // PA_CL_VPORT_ZOFFSET_8
+ 0x0, // PA_CL_VPORT_XSCALE_9
+ 0x0, // PA_CL_VPORT_XOFFSET_9
+ 0x0, // PA_CL_VPORT_YSCALE_9
+ 0x0, // PA_CL_VPORT_YOFFSET_9
+ 0x0, // PA_CL_VPORT_ZSCALE_9
+ 0x0, // PA_CL_VPORT_ZOFFSET_9
+ 0x0, // PA_CL_VPORT_XSCALE_10
+ 0x0, // PA_CL_VPORT_XOFFSET_10
+ 0x0, // PA_CL_VPORT_YSCALE_10
+ 0x0, // PA_CL_VPORT_YOFFSET_10
+ 0x0, // PA_CL_VPORT_ZSCALE_10
+ 0x0, // PA_CL_VPORT_ZOFFSET_10
+ 0x0, // PA_CL_VPORT_XSCALE_11
+ 0x0, // PA_CL_VPORT_XOFFSET_11
+ 0x0, // PA_CL_VPORT_YSCALE_11
+ 0x0, // PA_CL_VPORT_YOFFSET_11
+ 0x0, // PA_CL_VPORT_ZSCALE_11
+ 0x0, // PA_CL_VPORT_ZOFFSET_11
+ 0x0, // PA_CL_VPORT_XSCALE_12
+ 0x0, // PA_CL_VPORT_XOFFSET_12
+ 0x0, // PA_CL_VPORT_YSCALE_12
+ 0x0, // PA_CL_VPORT_YOFFSET_12
+ 0x0, // PA_CL_VPORT_ZSCALE_12
+ 0x0, // PA_CL_VPORT_ZOFFSET_12
+ 0x0, // PA_CL_VPORT_XSCALE_13
+ 0x0, // PA_CL_VPORT_XOFFSET_13
+ 0x0, // PA_CL_VPORT_YSCALE_13
+ 0x0, // PA_CL_VPORT_YOFFSET_13
+ 0x0, // PA_CL_VPORT_ZSCALE_13
+ 0x0, // PA_CL_VPORT_ZOFFSET_13
+ 0x0, // PA_CL_VPORT_XSCALE_14
+ 0x0, // PA_CL_VPORT_XOFFSET_14
+ 0x0, // PA_CL_VPORT_YSCALE_14
+ 0x0, // PA_CL_VPORT_YOFFSET_14
+ 0x0, // PA_CL_VPORT_ZSCALE_14
+ 0x0, // PA_CL_VPORT_ZOFFSET_14
+ 0x0, // PA_CL_VPORT_XSCALE_15
+ 0x0, // PA_CL_VPORT_XOFFSET_15
+ 0x0, // PA_CL_VPORT_YSCALE_15
+ 0x0, // PA_CL_VPORT_YOFFSET_15
+ 0x0, // PA_CL_VPORT_ZSCALE_15
+ 0x0, // PA_CL_VPORT_ZOFFSET_15
+ 0x0, // PA_CL_UCP_0_X
+ 0x0, // PA_CL_UCP_0_Y
+ 0x0, // PA_CL_UCP_0_Z
+ 0x0, // PA_CL_UCP_0_W
+ 0x0, // PA_CL_UCP_1_X
+ 0x0, // PA_CL_UCP_1_Y
+ 0x0, // PA_CL_UCP_1_Z
+ 0x0, // PA_CL_UCP_1_W
+ 0x0, // PA_CL_UCP_2_X
+ 0x0, // PA_CL_UCP_2_Y
+ 0x0, // PA_CL_UCP_2_Z
+ 0x0, // PA_CL_UCP_2_W
+ 0x0, // PA_CL_UCP_3_X
+ 0x0, // PA_CL_UCP_3_Y
+ 0x0, // PA_CL_UCP_3_Z
+ 0x0, // PA_CL_UCP_3_W
+ 0x0, // PA_CL_UCP_4_X
+ 0x0, // PA_CL_UCP_4_Y
+ 0x0, // PA_CL_UCP_4_Z
+ 0x0, // PA_CL_UCP_4_W
+ 0x0, // PA_CL_UCP_5_X
+ 0x0, // PA_CL_UCP_5_Y
+ 0x0, // PA_CL_UCP_5_Z
+ 0x0 // PA_CL_UCP_5_W
};
static const uint32_t SpiPsInputCntl0Gfx103[] = {
- 0x0 , // SPI_PS_INPUT_CNTL_0
- 0x0 , // SPI_PS_INPUT_CNTL_1
- 0x0 , // SPI_PS_INPUT_CNTL_2
- 0x0 , // SPI_PS_INPUT_CNTL_3
- 0x0 , // SPI_PS_INPUT_CNTL_4
- 0x0 , // SPI_PS_INPUT_CNTL_5
- 0x0 , // SPI_PS_INPUT_CNTL_6
- 0x0 , // SPI_PS_INPUT_CNTL_7
- 0x0 , // SPI_PS_INPUT_CNTL_8
- 0x0 , // SPI_PS_INPUT_CNTL_9
- 0x0 , // SPI_PS_INPUT_CNTL_10
- 0x0 , // SPI_PS_INPUT_CNTL_11
- 0x0 , // SPI_PS_INPUT_CNTL_12
- 0x0 , // SPI_PS_INPUT_CNTL_13
- 0x0 , // SPI_PS_INPUT_CNTL_14
- 0x0 , // SPI_PS_INPUT_CNTL_15
- 0x0 , // SPI_PS_INPUT_CNTL_16
- 0x0 , // SPI_PS_INPUT_CNTL_17
- 0x0 , // SPI_PS_INPUT_CNTL_18
- 0x0 , // SPI_PS_INPUT_CNTL_19
- 0x0 , // SPI_PS_INPUT_CNTL_20
- 0x0 , // SPI_PS_INPUT_CNTL_21
- 0x0 , // SPI_PS_INPUT_CNTL_22
- 0x0 , // SPI_PS_INPUT_CNTL_23
- 0x0 , // SPI_PS_INPUT_CNTL_24
- 0x0 , // SPI_PS_INPUT_CNTL_25
- 0x0 , // SPI_PS_INPUT_CNTL_26
- 0x0 , // SPI_PS_INPUT_CNTL_27
- 0x0 , // SPI_PS_INPUT_CNTL_28
- 0x0 , // SPI_PS_INPUT_CNTL_29
- 0x0 , // SPI_PS_INPUT_CNTL_30
- 0x0 , // SPI_PS_INPUT_CNTL_31
- 0x0 , // SPI_VS_OUT_CONFIG
- 0x0 , //
- 0x0 , // SPI_PS_INPUT_ENA
- 0x0 , // SPI_PS_INPUT_ADDR
- 0x0 , // SPI_INTERP_CONTROL_0
- 0x2 , // SPI_PS_IN_CONTROL
- 0x0 , //
- 0x0 , // SPI_BARYC_CNTL
- 0x0 , //
- 0x0 , // SPI_TMPRING_SIZE
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // SPI_SHADER_IDX_FORMAT
- 0x0 , // SPI_SHADER_POS_FORMAT
- 0x0 , // SPI_SHADER_Z_FORMAT
- 0x0 // SPI_SHADER_COL_FORMAT
+ 0x0, // SPI_PS_INPUT_CNTL_0
+ 0x0, // SPI_PS_INPUT_CNTL_1
+ 0x0, // SPI_PS_INPUT_CNTL_2
+ 0x0, // SPI_PS_INPUT_CNTL_3
+ 0x0, // SPI_PS_INPUT_CNTL_4
+ 0x0, // SPI_PS_INPUT_CNTL_5
+ 0x0, // SPI_PS_INPUT_CNTL_6
+ 0x0, // SPI_PS_INPUT_CNTL_7
+ 0x0, // SPI_PS_INPUT_CNTL_8
+ 0x0, // SPI_PS_INPUT_CNTL_9
+ 0x0, // SPI_PS_INPUT_CNTL_10
+ 0x0, // SPI_PS_INPUT_CNTL_11
+ 0x0, // SPI_PS_INPUT_CNTL_12
+ 0x0, // SPI_PS_INPUT_CNTL_13
+ 0x0, // SPI_PS_INPUT_CNTL_14
+ 0x0, // SPI_PS_INPUT_CNTL_15
+ 0x0, // SPI_PS_INPUT_CNTL_16
+ 0x0, // SPI_PS_INPUT_CNTL_17
+ 0x0, // SPI_PS_INPUT_CNTL_18
+ 0x0, // SPI_PS_INPUT_CNTL_19
+ 0x0, // SPI_PS_INPUT_CNTL_20
+ 0x0, // SPI_PS_INPUT_CNTL_21
+ 0x0, // SPI_PS_INPUT_CNTL_22
+ 0x0, // SPI_PS_INPUT_CNTL_23
+ 0x0, // SPI_PS_INPUT_CNTL_24
+ 0x0, // SPI_PS_INPUT_CNTL_25
+ 0x0, // SPI_PS_INPUT_CNTL_26
+ 0x0, // SPI_PS_INPUT_CNTL_27
+ 0x0, // SPI_PS_INPUT_CNTL_28
+ 0x0, // SPI_PS_INPUT_CNTL_29
+ 0x0, // SPI_PS_INPUT_CNTL_30
+ 0x0, // SPI_PS_INPUT_CNTL_31
+ 0x0, // SPI_VS_OUT_CONFIG
+ 0x0, //
+ 0x0, // SPI_PS_INPUT_ENA
+ 0x0, // SPI_PS_INPUT_ADDR
+ 0x0, // SPI_INTERP_CONTROL_0
+ 0x2, // SPI_PS_IN_CONTROL
+ 0x0, //
+ 0x0, // SPI_BARYC_CNTL
+ 0x0, //
+ 0x0, // SPI_TMPRING_SIZE
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // SPI_SHADER_IDX_FORMAT
+ 0x0, // SPI_SHADER_POS_FORMAT
+ 0x0, // SPI_SHADER_Z_FORMAT
+ 0x0 // SPI_SHADER_COL_FORMAT
};
static const uint32_t SxPsDownconvertControlGfx103[] = {
- 0x0 , // SX_PS_DOWNCONVERT_CONTROL
- 0x0 , // SX_PS_DOWNCONVERT
- 0x0 , // SX_BLEND_OPT_EPSILON
- 0x0 , // SX_BLEND_OPT_CONTROL
- 0x0 , // SX_MRT0_BLEND_OPT
- 0x0 , // SX_MRT1_BLEND_OPT
- 0x0 , // SX_MRT2_BLEND_OPT
- 0x0 , // SX_MRT3_BLEND_OPT
- 0x0 , // SX_MRT4_BLEND_OPT
- 0x0 , // SX_MRT5_BLEND_OPT
- 0x0 , // SX_MRT6_BLEND_OPT
- 0x0 , // SX_MRT7_BLEND_OPT
- 0x0 , // CB_BLEND0_CONTROL
- 0x0 , // CB_BLEND1_CONTROL
- 0x0 , // CB_BLEND2_CONTROL
- 0x0 , // CB_BLEND3_CONTROL
- 0x0 , // CB_BLEND4_CONTROL
- 0x0 , // CB_BLEND5_CONTROL
- 0x0 , // CB_BLEND6_CONTROL
- 0x0 // CB_BLEND7_CONTROL
+ 0x0, // SX_PS_DOWNCONVERT_CONTROL
+ 0x0, // SX_PS_DOWNCONVERT
+ 0x0, // SX_BLEND_OPT_EPSILON
+ 0x0, // SX_BLEND_OPT_CONTROL
+ 0x0, // SX_MRT0_BLEND_OPT
+ 0x0, // SX_MRT1_BLEND_OPT
+ 0x0, // SX_MRT2_BLEND_OPT
+ 0x0, // SX_MRT3_BLEND_OPT
+ 0x0, // SX_MRT4_BLEND_OPT
+ 0x0, // SX_MRT5_BLEND_OPT
+ 0x0, // SX_MRT6_BLEND_OPT
+ 0x0, // SX_MRT7_BLEND_OPT
+ 0x0, // CB_BLEND0_CONTROL
+ 0x0, // CB_BLEND1_CONTROL
+ 0x0, // CB_BLEND2_CONTROL
+ 0x0, // CB_BLEND3_CONTROL
+ 0x0, // CB_BLEND4_CONTROL
+ 0x0, // CB_BLEND5_CONTROL
+ 0x0, // CB_BLEND6_CONTROL
+ 0x0 // CB_BLEND7_CONTROL
};
static const uint32_t GeMaxOutputPerSubgroupGfx103[] = {
- 0x0 , // GE_MAX_OUTPUT_PER_SUBGROUP
- 0x0 , // DB_DEPTH_CONTROL
- 0x0 , // DB_EQAA
- 0x0 , // CB_COLOR_CONTROL
- 0x0 , // DB_SHADER_CONTROL
- 0x90000 , // PA_CL_CLIP_CNTL
- 0x4 , // PA_SU_SC_MODE_CNTL
- 0x0 , // PA_CL_VTE_CNTL
- 0x0 , // PA_CL_VS_OUT_CNTL
- 0x0 // PA_CL_NANINF_CNTL
+ 0x0, // GE_MAX_OUTPUT_PER_SUBGROUP
+ 0x0, // DB_DEPTH_CONTROL
+ 0x0, // DB_EQAA
+ 0x0, // CB_COLOR_CONTROL
+ 0x0, // DB_SHADER_CONTROL
+ 0x90000, // PA_CL_CLIP_CNTL
+ 0x4, // PA_SU_SC_MODE_CNTL
+ 0x0, // PA_CL_VTE_CNTL
+ 0x0, // PA_CL_VS_OUT_CNTL
+ 0x0 // PA_CL_NANINF_CNTL
};
static const uint32_t PaSuPrimFilterCntlGfx103[] = {
- 0x0 , // PA_SU_PRIM_FILTER_CNTL
- 0x0 , // PA_SU_SMALL_PRIM_FILTER_CNTL
- 0x0 , //
- 0x0 , // PA_CL_NGG_CNTL
- 0x0 , // PA_SU_OVER_RASTERIZATION_CNTL
- 0x0 , // PA_STEREO_CNTL
- 0x0 , // PA_STATE_STEREO_X
- 0x0 //
+ 0x0, // PA_SU_PRIM_FILTER_CNTL
+ 0x0, // PA_SU_SMALL_PRIM_FILTER_CNTL
+ 0x0, //
+ 0x0, // PA_CL_NGG_CNTL
+ 0x0, // PA_SU_OVER_RASTERIZATION_CNTL
+ 0x0, // PA_STEREO_CNTL
+ 0x0, // PA_STATE_STEREO_X
+ 0x0 //
};
static const uint32_t PaSuPointSizeGfx103[] = {
- 0x0 , // PA_SU_POINT_SIZE
- 0x0 , // PA_SU_POINT_MINMAX
- 0x0 , // PA_SU_LINE_CNTL
- 0x0 // PA_SC_LINE_STIPPLE
+ 0x0, // PA_SU_POINT_SIZE
+ 0x0, // PA_SU_POINT_MINMAX
+ 0x0, // PA_SU_LINE_CNTL
+ 0x0 // PA_SC_LINE_STIPPLE
};
static const uint32_t VgtHosMaxTessLevelGfx103[] = {
- 0x0 , // VGT_HOS_MAX_TESS_LEVEL
- 0x0 // VGT_HOS_MIN_TESS_LEVEL
+ 0x0, // VGT_HOS_MAX_TESS_LEVEL
+ 0x0 // VGT_HOS_MIN_TESS_LEVEL
};
static const uint32_t VgtGsModeGfx103[] = {
- 0x0 , // VGT_GS_MODE
- 0x0 , // VGT_GS_ONCHIP_CNTL
- 0x0 , // PA_SC_MODE_CNTL_0
- 0x0 , // PA_SC_MODE_CNTL_1
- 0x0 , // VGT_ENHANCE
- 0x100 , // VGT_GS_PER_ES
- 0x80 , // VGT_ES_PER_GS
- 0x2 , // VGT_GS_PER_VS
- 0x0 , // VGT_GSVS_RING_OFFSET_1
- 0x0 , // VGT_GSVS_RING_OFFSET_2
- 0x0 , // VGT_GSVS_RING_OFFSET_3
- 0x0 // VGT_GS_OUT_PRIM_TYPE
+ 0x0, // VGT_GS_MODE
+ 0x0, // VGT_GS_ONCHIP_CNTL
+ 0x0, // PA_SC_MODE_CNTL_0
+ 0x0, // PA_SC_MODE_CNTL_1
+ 0x0, // VGT_ENHANCE
+ 0x100, // VGT_GS_PER_ES
+ 0x80, // VGT_ES_PER_GS
+ 0x2, // VGT_GS_PER_VS
+ 0x0, // VGT_GSVS_RING_OFFSET_1
+ 0x0, // VGT_GSVS_RING_OFFSET_2
+ 0x0, // VGT_GSVS_RING_OFFSET_3
+ 0x0 // VGT_GS_OUT_PRIM_TYPE
};
static const uint32_t VgtPrimitiveidEnGfx103[] = {
- 0x0 // VGT_PRIMITIVEID_EN
+ 0x0 // VGT_PRIMITIVEID_EN
};
static const uint32_t VgtPrimitiveidResetGfx103[] = {
- 0x0 // VGT_PRIMITIVEID_RESET
+ 0x0 // VGT_PRIMITIVEID_RESET
};
static const uint32_t VgtDrawPayloadCntlGfx103[] = {
- 0x0 , // VGT_DRAW_PAYLOAD_CNTL
- 0x0 , //
- 0x0 , // VGT_INSTANCE_STEP_RATE_0
- 0x0 , // VGT_INSTANCE_STEP_RATE_1
- 0x0 , // IA_MULTI_VGT_PARAM
- 0x0 , // VGT_ESGS_RING_ITEMSIZE
- 0x0 , // VGT_GSVS_RING_ITEMSIZE
- 0x0 , // VGT_REUSE_OFF
- 0x0 , // VGT_VTX_CNT_EN
- 0x0 , // DB_HTILE_SURFACE
- 0x0 , // DB_SRESULTS_COMPARE_STATE0
- 0x0 , // DB_SRESULTS_COMPARE_STATE1
- 0x0 , // DB_PRELOAD_CONTROL
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_0
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_0
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_0
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_1
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_1
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_1
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_2
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_2
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_2
- 0x0 , // VGT_STRMOUT_BUFFER_SIZE_3
- 0x0 , // VGT_STRMOUT_VTX_STRIDE_3
- 0x0 , //
- 0x0 , // VGT_STRMOUT_BUFFER_OFFSET_3
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
- 0x0 , // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
- 0x0 , //
- 0x0 , // VGT_GS_MAX_VERT_OUT
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , //
- 0x0 , // GE_NGG_SUBGRP_CNTL
- 0x0 , // VGT_TESS_DISTRIBUTION
- 0x0 , // VGT_SHADER_STAGES_EN
- 0x0 , // VGT_LS_HS_CONFIG
- 0x0 , // VGT_GS_VERT_ITEMSIZE
- 0x0 , // VGT_GS_VERT_ITEMSIZE_1
- 0x0 , // VGT_GS_VERT_ITEMSIZE_2
- 0x0 , // VGT_GS_VERT_ITEMSIZE_3
- 0x0 , // VGT_TF_PARAM
- 0x0 , // DB_ALPHA_TO_MASK
- 0x0 , //
- 0x0 , // PA_SU_POLY_OFFSET_DB_FMT_CNTL
- 0x0 , // PA_SU_POLY_OFFSET_CLAMP
- 0x0 , // PA_SU_POLY_OFFSET_FRONT_SCALE
- 0x0 , // PA_SU_POLY_OFFSET_FRONT_OFFSET
- 0x0 , // PA_SU_POLY_OFFSET_BACK_SCALE
- 0x0 , // PA_SU_POLY_OFFSET_BACK_OFFSET
- 0x0 , // VGT_GS_INSTANCE_CNT
- 0x0 , // VGT_STRMOUT_CONFIG
- 0x0 // VGT_STRMOUT_BUFFER_CONFIG
+ 0x0, // VGT_DRAW_PAYLOAD_CNTL
+ 0x0, //
+ 0x0, // VGT_INSTANCE_STEP_RATE_0
+ 0x0, // VGT_INSTANCE_STEP_RATE_1
+ 0x0, // IA_MULTI_VGT_PARAM
+ 0x0, // VGT_ESGS_RING_ITEMSIZE
+ 0x0, // VGT_GSVS_RING_ITEMSIZE
+ 0x0, // VGT_REUSE_OFF
+ 0x0, // VGT_VTX_CNT_EN
+ 0x0, // DB_HTILE_SURFACE
+ 0x0, // DB_SRESULTS_COMPARE_STATE0
+ 0x0, // DB_SRESULTS_COMPARE_STATE1
+ 0x0, // DB_PRELOAD_CONTROL
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_0
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_0
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_0
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_1
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_1
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_1
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_2
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_2
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_2
+ 0x0, // VGT_STRMOUT_BUFFER_SIZE_3
+ 0x0, // VGT_STRMOUT_VTX_STRIDE_3
+ 0x0, //
+ 0x0, // VGT_STRMOUT_BUFFER_OFFSET_3
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+ 0x0, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+ 0x0, //
+ 0x0, // VGT_GS_MAX_VERT_OUT
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, //
+ 0x0, // GE_NGG_SUBGRP_CNTL
+ 0x0, // VGT_TESS_DISTRIBUTION
+ 0x0, // VGT_SHADER_STAGES_EN
+ 0x0, // VGT_LS_HS_CONFIG
+ 0x0, // VGT_GS_VERT_ITEMSIZE
+ 0x0, // VGT_GS_VERT_ITEMSIZE_1
+ 0x0, // VGT_GS_VERT_ITEMSIZE_2
+ 0x0, // VGT_GS_VERT_ITEMSIZE_3
+ 0x0, // VGT_TF_PARAM
+ 0x0, // DB_ALPHA_TO_MASK
+ 0x0, //
+ 0x0, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+ 0x0, // PA_SU_POLY_OFFSET_CLAMP
+ 0x0, // PA_SU_POLY_OFFSET_FRONT_SCALE
+ 0x0, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+ 0x0, // PA_SU_POLY_OFFSET_BACK_SCALE
+ 0x0, // PA_SU_POLY_OFFSET_BACK_OFFSET
+ 0x0, // VGT_GS_INSTANCE_CNT
+ 0x0, // VGT_STRMOUT_CONFIG
+ 0x0 // VGT_STRMOUT_BUFFER_CONFIG
};
static const uint32_t PaScCentroidPriority0Gfx103[] = {
- 0x0 , // PA_SC_CENTROID_PRIORITY_0
- 0x0 , // PA_SC_CENTROID_PRIORITY_1
- 0x1000 , // PA_SC_LINE_CNTL
- 0x0 , // PA_SC_AA_CONFIG
- 0x5 , // PA_SU_VTX_CNTL
+ 0x0, // PA_SC_CENTROID_PRIORITY_0
+ 0x0, // PA_SC_CENTROID_PRIORITY_1
+ 0x1000, // PA_SC_LINE_CNTL
+ 0x0, // PA_SC_AA_CONFIG
+ 0x5, // PA_SU_VTX_CNTL
0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
- 0x0 , // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+ 0x0, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
- 0x0 , // PA_SC_SHADER_CONTROL
- 0x3 , // PA_SC_BINNER_CNTL_0
- 0x0 , // PA_SC_BINNER_CNTL_1
- 0x100000 , // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
- 0x0 , // PA_SC_NGG_MODE_CNTL
- 0x0 , //
- 0x1e , // VGT_VERTEX_REUSE_BLOCK_CNTL
- 0x20 , // VGT_OUT_DEALLOC_CNTL
- 0x0 , // CB_COLOR0_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR0_VIEW
- 0x0 , // CB_COLOR0_INFO
- 0x0 , // CB_COLOR0_ATTRIB
- 0x0 , // CB_COLOR0_DCC_CONTROL
- 0x0 , // CB_COLOR0_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR0_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR0_CLEAR_WORD0
- 0x0 , // CB_COLOR0_CLEAR_WORD1
- 0x0 , // CB_COLOR0_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR1_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR1_VIEW
- 0x0 , // CB_COLOR1_INFO
- 0x0 , // CB_COLOR1_ATTRIB
- 0x0 , // CB_COLOR1_DCC_CONTROL
- 0x0 , // CB_COLOR1_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR1_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR1_CLEAR_WORD0
- 0x0 , // CB_COLOR1_CLEAR_WORD1
- 0x0 , // CB_COLOR1_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR2_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR2_VIEW
- 0x0 , // CB_COLOR2_INFO
- 0x0 , // CB_COLOR2_ATTRIB
- 0x0 , // CB_COLOR2_DCC_CONTROL
- 0x0 , // CB_COLOR2_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR2_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR2_CLEAR_WORD0
- 0x0 , // CB_COLOR2_CLEAR_WORD1
- 0x0 , // CB_COLOR2_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR3_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR3_VIEW
- 0x0 , // CB_COLOR3_INFO
- 0x0 , // CB_COLOR3_ATTRIB
- 0x0 , // CB_COLOR3_DCC_CONTROL
- 0x0 , // CB_COLOR3_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR3_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR3_CLEAR_WORD0
- 0x0 , // CB_COLOR3_CLEAR_WORD1
- 0x0 , // CB_COLOR3_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR4_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR4_VIEW
- 0x0 , // CB_COLOR4_INFO
- 0x0 , // CB_COLOR4_ATTRIB
- 0x0 , // CB_COLOR4_DCC_CONTROL
- 0x0 , // CB_COLOR4_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR4_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR4_CLEAR_WORD0
- 0x0 , // CB_COLOR4_CLEAR_WORD1
- 0x0 , // CB_COLOR4_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR5_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR5_VIEW
- 0x0 , // CB_COLOR5_INFO
- 0x0 , // CB_COLOR5_ATTRIB
- 0x0 , // CB_COLOR5_DCC_CONTROL
- 0x0 , // CB_COLOR5_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR5_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR5_CLEAR_WORD0
- 0x0 , // CB_COLOR5_CLEAR_WORD1
- 0x0 , // CB_COLOR5_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR6_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR6_VIEW
- 0x0 , // CB_COLOR6_INFO
- 0x0 , // CB_COLOR6_ATTRIB
- 0x0 , // CB_COLOR6_DCC_CONTROL
- 0x0 , // CB_COLOR6_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR6_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR6_CLEAR_WORD0
- 0x0 , // CB_COLOR6_CLEAR_WORD1
- 0x0 , // CB_COLOR6_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR7_BASE
- 0x0 , //
- 0x0 , //
- 0x0 , // CB_COLOR7_VIEW
- 0x0 , // CB_COLOR7_INFO
- 0x0 , // CB_COLOR7_ATTRIB
- 0x0 , // CB_COLOR7_DCC_CONTROL
- 0x0 , // CB_COLOR7_CMASK
- 0x0 , //
- 0x0 , // CB_COLOR7_FMASK
- 0x0 , //
- 0x0 , // CB_COLOR7_CLEAR_WORD0
- 0x0 , // CB_COLOR7_CLEAR_WORD1
- 0x0 , // CB_COLOR7_DCC_BASE
- 0x0 , //
- 0x0 , // CB_COLOR0_BASE_EXT
- 0x0 , // CB_COLOR1_BASE_EXT
- 0x0 , // CB_COLOR2_BASE_EXT
- 0x0 , // CB_COLOR3_BASE_EXT
- 0x0 , // CB_COLOR4_BASE_EXT
- 0x0 , // CB_COLOR5_BASE_EXT
- 0x0 , // CB_COLOR6_BASE_EXT
- 0x0 , // CB_COLOR7_BASE_EXT
- 0x0 , // CB_COLOR0_CMASK_BASE_EXT
- 0x0 , // CB_COLOR1_CMASK_BASE_EXT
- 0x0 , // CB_COLOR2_CMASK_BASE_EXT
- 0x0 , // CB_COLOR3_CMASK_BASE_EXT
- 0x0 , // CB_COLOR4_CMASK_BASE_EXT
- 0x0 , // CB_COLOR5_CMASK_BASE_EXT
- 0x0 , // CB_COLOR6_CMASK_BASE_EXT
- 0x0 , // CB_COLOR7_CMASK_BASE_EXT
- 0x0 , // CB_COLOR0_FMASK_BASE_EXT
- 0x0 , // CB_COLOR1_FMASK_BASE_EXT
- 0x0 , // CB_COLOR2_FMASK_BASE_EXT
- 0x0 , // CB_COLOR3_FMASK_BASE_EXT
- 0x0 , // CB_COLOR4_FMASK_BASE_EXT
- 0x0 , // CB_COLOR5_FMASK_BASE_EXT
- 0x0 , // CB_COLOR6_FMASK_BASE_EXT
- 0x0 , // CB_COLOR7_FMASK_BASE_EXT
- 0x0 , // CB_COLOR0_DCC_BASE_EXT
- 0x0 , // CB_COLOR1_DCC_BASE_EXT
- 0x0 , // CB_COLOR2_DCC_BASE_EXT
- 0x0 , // CB_COLOR3_DCC_BASE_EXT
- 0x0 , // CB_COLOR4_DCC_BASE_EXT
- 0x0 , // CB_COLOR5_DCC_BASE_EXT
- 0x0 , // CB_COLOR6_DCC_BASE_EXT
- 0x0 , // CB_COLOR7_DCC_BASE_EXT
- 0x0 , // CB_COLOR0_ATTRIB2
- 0x0 , // CB_COLOR1_ATTRIB2
- 0x0 , // CB_COLOR2_ATTRIB2
- 0x0 , // CB_COLOR3_ATTRIB2
- 0x0 , // CB_COLOR4_ATTRIB2
- 0x0 , // CB_COLOR5_ATTRIB2
- 0x0 , // CB_COLOR6_ATTRIB2
- 0x0 , // CB_COLOR7_ATTRIB2
- 0x0 , // CB_COLOR0_ATTRIB3
- 0x0 , // CB_COLOR1_ATTRIB3
- 0x0 , // CB_COLOR2_ATTRIB3
- 0x0 , // CB_COLOR3_ATTRIB3
- 0x0 , // CB_COLOR4_ATTRIB3
- 0x0 , // CB_COLOR5_ATTRIB3
- 0x0 , // CB_COLOR6_ATTRIB3
+ 0x0, // PA_SC_SHADER_CONTROL
+ 0x3, // PA_SC_BINNER_CNTL_0
+ 0x0, // PA_SC_BINNER_CNTL_1
+ 0x100000, // PA_SC_CONSERVATIVE_RASTERIZATION_CNTL
+ 0x0, // PA_SC_NGG_MODE_CNTL
+ 0x0, //
+ 0x1e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+ 0x20, // VGT_OUT_DEALLOC_CNTL
+ 0x0, // CB_COLOR0_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR0_VIEW
+ 0x0, // CB_COLOR0_INFO
+ 0x0, // CB_COLOR0_ATTRIB
+ 0x0, // CB_COLOR0_DCC_CONTROL
+ 0x0, // CB_COLOR0_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR0_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR0_CLEAR_WORD0
+ 0x0, // CB_COLOR0_CLEAR_WORD1
+ 0x0, // CB_COLOR0_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR1_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR1_VIEW
+ 0x0, // CB_COLOR1_INFO
+ 0x0, // CB_COLOR1_ATTRIB
+ 0x0, // CB_COLOR1_DCC_CONTROL
+ 0x0, // CB_COLOR1_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR1_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR1_CLEAR_WORD0
+ 0x0, // CB_COLOR1_CLEAR_WORD1
+ 0x0, // CB_COLOR1_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR2_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR2_VIEW
+ 0x0, // CB_COLOR2_INFO
+ 0x0, // CB_COLOR2_ATTRIB
+ 0x0, // CB_COLOR2_DCC_CONTROL
+ 0x0, // CB_COLOR2_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR2_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR2_CLEAR_WORD0
+ 0x0, // CB_COLOR2_CLEAR_WORD1
+ 0x0, // CB_COLOR2_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR3_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR3_VIEW
+ 0x0, // CB_COLOR3_INFO
+ 0x0, // CB_COLOR3_ATTRIB
+ 0x0, // CB_COLOR3_DCC_CONTROL
+ 0x0, // CB_COLOR3_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR3_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR3_CLEAR_WORD0
+ 0x0, // CB_COLOR3_CLEAR_WORD1
+ 0x0, // CB_COLOR3_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR4_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR4_VIEW
+ 0x0, // CB_COLOR4_INFO
+ 0x0, // CB_COLOR4_ATTRIB
+ 0x0, // CB_COLOR4_DCC_CONTROL
+ 0x0, // CB_COLOR4_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR4_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR4_CLEAR_WORD0
+ 0x0, // CB_COLOR4_CLEAR_WORD1
+ 0x0, // CB_COLOR4_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR5_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR5_VIEW
+ 0x0, // CB_COLOR5_INFO
+ 0x0, // CB_COLOR5_ATTRIB
+ 0x0, // CB_COLOR5_DCC_CONTROL
+ 0x0, // CB_COLOR5_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR5_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR5_CLEAR_WORD0
+ 0x0, // CB_COLOR5_CLEAR_WORD1
+ 0x0, // CB_COLOR5_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR6_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR6_VIEW
+ 0x0, // CB_COLOR6_INFO
+ 0x0, // CB_COLOR6_ATTRIB
+ 0x0, // CB_COLOR6_DCC_CONTROL
+ 0x0, // CB_COLOR6_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR6_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR6_CLEAR_WORD0
+ 0x0, // CB_COLOR6_CLEAR_WORD1
+ 0x0, // CB_COLOR6_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR7_BASE
+ 0x0, //
+ 0x0, //
+ 0x0, // CB_COLOR7_VIEW
+ 0x0, // CB_COLOR7_INFO
+ 0x0, // CB_COLOR7_ATTRIB
+ 0x0, // CB_COLOR7_DCC_CONTROL
+ 0x0, // CB_COLOR7_CMASK
+ 0x0, //
+ 0x0, // CB_COLOR7_FMASK
+ 0x0, //
+ 0x0, // CB_COLOR7_CLEAR_WORD0
+ 0x0, // CB_COLOR7_CLEAR_WORD1
+ 0x0, // CB_COLOR7_DCC_BASE
+ 0x0, //
+ 0x0, // CB_COLOR0_BASE_EXT
+ 0x0, // CB_COLOR1_BASE_EXT
+ 0x0, // CB_COLOR2_BASE_EXT
+ 0x0, // CB_COLOR3_BASE_EXT
+ 0x0, // CB_COLOR4_BASE_EXT
+ 0x0, // CB_COLOR5_BASE_EXT
+ 0x0, // CB_COLOR6_BASE_EXT
+ 0x0, // CB_COLOR7_BASE_EXT
+ 0x0, // CB_COLOR0_CMASK_BASE_EXT
+ 0x0, // CB_COLOR1_CMASK_BASE_EXT
+ 0x0, // CB_COLOR2_CMASK_BASE_EXT
+ 0x0, // CB_COLOR3_CMASK_BASE_EXT
+ 0x0, // CB_COLOR4_CMASK_BASE_EXT
+ 0x0, // CB_COLOR5_CMASK_BASE_EXT
+ 0x0, // CB_COLOR6_CMASK_BASE_EXT
+ 0x0, // CB_COLOR7_CMASK_BASE_EXT
+ 0x0, // CB_COLOR0_FMASK_BASE_EXT
+ 0x0, // CB_COLOR1_FMASK_BASE_EXT
+ 0x0, // CB_COLOR2_FMASK_BASE_EXT
+ 0x0, // CB_COLOR3_FMASK_BASE_EXT
+ 0x0, // CB_COLOR4_FMASK_BASE_EXT
+ 0x0, // CB_COLOR5_FMASK_BASE_EXT
+ 0x0, // CB_COLOR6_FMASK_BASE_EXT
+ 0x0, // CB_COLOR7_FMASK_BASE_EXT
+ 0x0, // CB_COLOR0_DCC_BASE_EXT
+ 0x0, // CB_COLOR1_DCC_BASE_EXT
+ 0x0, // CB_COLOR2_DCC_BASE_EXT
+ 0x0, // CB_COLOR3_DCC_BASE_EXT
+ 0x0, // CB_COLOR4_DCC_BASE_EXT
+ 0x0, // CB_COLOR5_DCC_BASE_EXT
+ 0x0, // CB_COLOR6_DCC_BASE_EXT
+ 0x0, // CB_COLOR7_DCC_BASE_EXT
+ 0x0, // CB_COLOR0_ATTRIB2
+ 0x0, // CB_COLOR1_ATTRIB2
+ 0x0, // CB_COLOR2_ATTRIB2
+ 0x0, // CB_COLOR3_ATTRIB2
+ 0x0, // CB_COLOR4_ATTRIB2
+ 0x0, // CB_COLOR5_ATTRIB2
+ 0x0, // CB_COLOR6_ATTRIB2
+ 0x0, // CB_COLOR7_ATTRIB2
+ 0x0, // CB_COLOR0_ATTRIB3
+ 0x0, // CB_COLOR1_ATTRIB3
+ 0x0, // CB_COLOR2_ATTRIB3
+ 0x0, // CB_COLOR3_ATTRIB3
+ 0x0, // CB_COLOR4_ATTRIB3
+ 0x0, // CB_COLOR5_ATTRIB3
+ 0x0, // CB_COLOR6_ATTRIB3
0x0 // CB_COLOR7_ATTRIB3
};
set_context_reg_seq_array(cs, R_028000_DB_RENDER_CONTROL, SET(DbRenderControlGfx103));
set_context_reg_seq_array(cs, R_0281E8_COHER_DEST_BASE_HI_0, SET(CoherDestBaseHi0Gfx103));
- set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, SET(VgtMultiPrimIbResetIndxGfx103));
+ set_context_reg_seq_array(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
+ SET(VgtMultiPrimIbResetIndxGfx103));
set_context_reg_seq_array(cs, R_028644_SPI_PS_INPUT_CNTL_0, SET(SpiPsInputCntl0Gfx103));
- set_context_reg_seq_array(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, SET(SxPsDownconvertControlGfx103));
- set_context_reg_seq_array(cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, SET(GeMaxOutputPerSubgroupGfx103));
+ set_context_reg_seq_array(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL,
+ SET(SxPsDownconvertControlGfx103));
+ set_context_reg_seq_array(cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
+ SET(GeMaxOutputPerSubgroupGfx103));
set_context_reg_seq_array(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, SET(PaSuPrimFilterCntlGfx103));
set_context_reg_seq_array(cs, R_028A00_PA_SU_POINT_SIZE, SET(PaSuPointSizeGfx103));
set_context_reg_seq_array(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, SET(VgtHosMaxTessLevelGfx103));
set_context_reg_seq_array(cs, R_028A84_VGT_PRIMITIVEID_EN, SET(VgtPrimitiveidEnGfx103));
set_context_reg_seq_array(cs, R_028A8C_VGT_PRIMITIVEID_RESET, SET(VgtPrimitiveidResetGfx103));
set_context_reg_seq_array(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, SET(VgtDrawPayloadCntlGfx103));
- set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, SET(PaScCentroidPriority0Gfx103));
+ set_context_reg_seq_array(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0,
+ SET(PaScCentroidPriority0Gfx103));
for (unsigned i = 0; i < num_reg_pairs; i++)
set_context_reg_seq_array(cs, reg_offsets[i], 1, ®_values[i]);
}
-void ac_emulate_clear_state(const struct radeon_info *info,
- struct radeon_cmdbuf *cs,
+void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf *cs,
set_context_reg_seq_array_fn set_context_reg_seq_array)
{
/* Set context registers same as CLEAR_STATE to initialize shadow memory. */
uint32_t reg_value = info->pa_sc_tile_steering_override;
if (info->chip_class == GFX10_3) {
- gfx103_emulate_clear_state(cs, 1, ®_offset, ®_value,
- set_context_reg_seq_array);
+ gfx103_emulate_clear_state(cs, 1, ®_offset, ®_value, set_context_reg_seq_array);
} else if (info->chip_class == GFX10) {
- gfx10_emulate_clear_state(cs, 1, ®_offset, ®_value,
- set_context_reg_seq_array);
+ gfx10_emulate_clear_state(cs, 1, ®_offset, ®_value, set_context_reg_seq_array);
} else if (info->chip_class == GFX9) {
gfx9_emulate_clear_state(cs, set_context_reg_seq_array);
} else {
unsigned end_range_offset = ranges[i].offset + ranges[i].size;
/* Test if the ranges interect. */
- if (MAX2(ranges[i].offset, reg_offset) <
- MIN2(end_range_offset, end_reg_offset)) {
+ if (MAX2(ranges[i].offset, reg_offset) < MIN2(end_range_offset, end_reg_offset)) {
/* Assertion: A register can be listed only once. */
assert(!found);
found = true;
for (unsigned i = 0; i < num_ranges; i++) {
for (unsigned j = 0; j < ranges[i].size / 4; j++) {
- unsigned offset = ranges[i].offset + j*4;
+ unsigned offset = ranges[i].offset + j * 4;
const char *name = ac_get_register_name(info->chip_class, offset);
unsigned value = -1;
unsigned size;
};
-enum ac_reg_range_type {
+enum ac_reg_range_type
+{
SI_REG_RANGE_UCONFIG,
SI_REG_RANGE_CONTEXT,
SI_REG_RANGE_SH,
SI_NUM_ALL_REG_RANGES,
};
-typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg,
- unsigned num, const uint32_t *values);
+typedef void (*set_context_reg_seq_array_fn)(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
+ const uint32_t *values);
void ac_get_reg_ranges(enum chip_class chip_class, enum radeon_family family,
enum ac_reg_range_type type, unsigned *num_ranges,
const struct ac_reg_range **ranges);
-void ac_emulate_clear_state(const struct radeon_info *info,
- struct radeon_cmdbuf *cs,
+void ac_emulate_clear_state(const struct radeon_info *info, struct radeon_cmdbuf *cs,
set_context_reg_seq_array_fn set_context_reg_seq_array);
void ac_check_shadowed_regs(enum chip_class chip_class, enum radeon_family family,
unsigned reg_offset, unsigned count);
*/
#include "ac_surface.h"
-#include "amd_family.h"
-#include "addrlib/src/amdgpu_asic_addr.h"
+
#include "ac_gpu_info.h"
+#include "addrlib/inc/addrinterface.h"
+#include "addrlib/src/amdgpu_asic_addr.h"
+#include "amd_family.h"
+#include "drm-uapi/amdgpu_drm.h"
+#include "sid.h"
#include "util/hash_table.h"
#include "util/macros.h"
#include "util/simple_mtx.h"
#include "util/u_atomic.h"
#include "util/u_math.h"
#include "util/u_memory.h"
-#include "sid.h"
+#include <amdgpu.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
-#include <amdgpu.h>
-#include "drm-uapi/amdgpu_drm.h"
-
-#include "addrlib/inc/addrinterface.h"
#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
#endif
struct ac_addrlib {
- ADDR_HANDLE handle;
-
- /* The cache of DCC retile maps for reuse when allocating images of
- * similar sizes.
- */
- simple_mtx_t dcc_retile_map_lock;
- struct hash_table *dcc_retile_maps;
- struct hash_table *dcc_retile_tile_indices;
+ ADDR_HANDLE handle;
+
+ /* The cache of DCC retile maps for reuse when allocating images of
+ * similar sizes.
+ */
+ simple_mtx_t dcc_retile_map_lock;
+ struct hash_table *dcc_retile_maps;
+ struct hash_table *dcc_retile_tile_indices;
};
struct dcc_retile_map_key {
- enum radeon_family family;
- unsigned retile_width;
- unsigned retile_height;
- bool rb_aligned;
- bool pipe_aligned;
- unsigned dcc_retile_num_elements;
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT input;
+ enum radeon_family family;
+ unsigned retile_width;
+ unsigned retile_height;
+ bool rb_aligned;
+ bool pipe_aligned;
+ unsigned dcc_retile_num_elements;
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT input;
};
static uint32_t dcc_retile_map_hash_key(const void *key)
{
- return _mesa_hash_data(key, sizeof(struct dcc_retile_map_key));
+ return _mesa_hash_data(key, sizeof(struct dcc_retile_map_key));
}
static bool dcc_retile_map_keys_equal(const void *a, const void *b)
{
- return memcmp(a, b, sizeof(struct dcc_retile_map_key)) == 0;
+ return memcmp(a, b, sizeof(struct dcc_retile_map_key)) == 0;
}
static void dcc_retile_map_free(struct hash_entry *entry)
{
- free((void*)entry->key);
- free(entry->data);
+ free((void *)entry->key);
+ free(entry->data);
}
struct dcc_retile_tile_key {
- enum radeon_family family;
- unsigned bpp;
- unsigned swizzle_mode;
- bool rb_aligned;
- bool pipe_aligned;
+ enum radeon_family family;
+ unsigned bpp;
+ unsigned swizzle_mode;
+ bool rb_aligned;
+ bool pipe_aligned;
};
struct dcc_retile_tile_data {
- unsigned tile_width_log2;
- unsigned tile_height_log2;
- uint16_t *data;
+ unsigned tile_width_log2;
+ unsigned tile_height_log2;
+ uint16_t *data;
};
static uint32_t dcc_retile_tile_hash_key(const void *key)
{
- return _mesa_hash_data(key, sizeof(struct dcc_retile_tile_key));
+ return _mesa_hash_data(key, sizeof(struct dcc_retile_tile_key));
}
static bool dcc_retile_tile_keys_equal(const void *a, const void *b)
{
- return memcmp(a, b, sizeof(struct dcc_retile_tile_key)) == 0;
+ return memcmp(a, b, sizeof(struct dcc_retile_tile_key)) == 0;
}
static void dcc_retile_tile_free(struct hash_entry *entry)
{
- free((void*)entry->key);
- free(((struct dcc_retile_tile_data*)entry->data)->data);
- free(entry->data);
+ free((void *)entry->key);
+ free(((struct dcc_retile_tile_data *)entry->data)->data);
+ free(entry->data);
}
/* Assumes dcc_retile_map_lock is taken. */
static const struct dcc_retile_tile_data *
-ac_compute_dcc_retile_tile_indices(struct ac_addrlib *addrlib,
- const struct radeon_info *info,
- unsigned bpp, unsigned swizzle_mode,
- bool rb_aligned, bool pipe_aligned)
+ac_compute_dcc_retile_tile_indices(struct ac_addrlib *addrlib, const struct radeon_info *info,
+ unsigned bpp, unsigned swizzle_mode, bool rb_aligned,
+ bool pipe_aligned)
{
- struct dcc_retile_tile_key key = (struct dcc_retile_tile_key) {
- .family = info->family,
- .bpp = bpp,
- .swizzle_mode = swizzle_mode,
- .rb_aligned = rb_aligned,
- .pipe_aligned = pipe_aligned
- };
-
- struct hash_entry *entry = _mesa_hash_table_search(addrlib->dcc_retile_tile_indices, &key);
- if (entry)
- return entry->data;
-
- ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
- ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
- din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
- dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
-
- din.dccKeyFlags.pipeAligned = pipe_aligned;
- din.dccKeyFlags.rbAligned = rb_aligned;
- din.resourceType = ADDR_RSRC_TEX_2D;
- din.swizzleMode = swizzle_mode;
- din.bpp = bpp;
- din.unalignedWidth = 1;
- din.unalignedHeight = 1;
- din.numSlices = 1;
- din.numFrags = 1;
- din.numMipLevels = 1;
-
- ADDR_E_RETURNCODE ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
- if (ret != ADDR_OK)
- return NULL;
-
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {0};
- addrin.size = sizeof(addrin);
- addrin.swizzleMode = swizzle_mode;
- addrin.resourceType = ADDR_RSRC_TEX_2D;
- addrin.bpp = bpp;
- addrin.numSlices = 1;
- addrin.numMipLevels = 1;
- addrin.numFrags = 1;
- addrin.pitch = dout.pitch;
- addrin.height = dout.height;
- addrin.compressBlkWidth = dout.compressBlkWidth;
- addrin.compressBlkHeight = dout.compressBlkHeight;
- addrin.compressBlkDepth = dout.compressBlkDepth;
- addrin.metaBlkWidth = dout.metaBlkWidth;
- addrin.metaBlkHeight = dout.metaBlkHeight;
- addrin.metaBlkDepth = dout.metaBlkDepth;
- addrin.dccKeyFlags.pipeAligned = pipe_aligned;
- addrin.dccKeyFlags.rbAligned = rb_aligned;
-
- unsigned w = dout.metaBlkWidth / dout.compressBlkWidth;
- unsigned h = dout.metaBlkHeight / dout.compressBlkHeight;
- uint16_t *indices = malloc(w * h * sizeof (uint16_t));
- if (!indices)
- return NULL;
-
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
- addrout.size = sizeof(addrout);
-
- for (unsigned y = 0; y < h; ++y) {
- addrin.y = y * dout.compressBlkHeight;
- for (unsigned x = 0; x < w; ++x) {
- addrin.x = x * dout.compressBlkWidth;
- addrout.addr = 0;
-
- if (Addr2ComputeDccAddrFromCoord(addrlib->handle, &addrin, &addrout) != ADDR_OK) {
- free(indices);
- return NULL;
- }
- indices[y * w + x] = addrout.addr;
- }
- }
-
- struct dcc_retile_tile_data *data = calloc(1, sizeof(*data));
- if (!data) {
- free(indices);
- return NULL;
- }
-
- data->tile_width_log2 = util_logbase2(w);
- data->tile_height_log2 = util_logbase2(h);
- data->data = indices;
-
- struct dcc_retile_tile_key *heap_key = mem_dup(&key, sizeof(key));
- if (!heap_key) {
- free(data);
- free(indices);
- return NULL;
- }
-
- entry = _mesa_hash_table_insert(addrlib->dcc_retile_tile_indices, heap_key, data);
- if (!entry) {
- free(heap_key);
- free(data);
- free(indices);
- }
- return data;
+ struct dcc_retile_tile_key key = (struct dcc_retile_tile_key){.family = info->family,
+ .bpp = bpp,
+ .swizzle_mode = swizzle_mode,
+ .rb_aligned = rb_aligned,
+ .pipe_aligned = pipe_aligned};
+
+ struct hash_entry *entry = _mesa_hash_table_search(addrlib->dcc_retile_tile_indices, &key);
+ if (entry)
+ return entry->data;
+
+ ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
+ ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
+ din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
+ dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
+
+ din.dccKeyFlags.pipeAligned = pipe_aligned;
+ din.dccKeyFlags.rbAligned = rb_aligned;
+ din.resourceType = ADDR_RSRC_TEX_2D;
+ din.swizzleMode = swizzle_mode;
+ din.bpp = bpp;
+ din.unalignedWidth = 1;
+ din.unalignedHeight = 1;
+ din.numSlices = 1;
+ din.numFrags = 1;
+ din.numMipLevels = 1;
+
+ ADDR_E_RETURNCODE ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
+ if (ret != ADDR_OK)
+ return NULL;
+
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin = {0};
+ addrin.size = sizeof(addrin);
+ addrin.swizzleMode = swizzle_mode;
+ addrin.resourceType = ADDR_RSRC_TEX_2D;
+ addrin.bpp = bpp;
+ addrin.numSlices = 1;
+ addrin.numMipLevels = 1;
+ addrin.numFrags = 1;
+ addrin.pitch = dout.pitch;
+ addrin.height = dout.height;
+ addrin.compressBlkWidth = dout.compressBlkWidth;
+ addrin.compressBlkHeight = dout.compressBlkHeight;
+ addrin.compressBlkDepth = dout.compressBlkDepth;
+ addrin.metaBlkWidth = dout.metaBlkWidth;
+ addrin.metaBlkHeight = dout.metaBlkHeight;
+ addrin.metaBlkDepth = dout.metaBlkDepth;
+ addrin.dccKeyFlags.pipeAligned = pipe_aligned;
+ addrin.dccKeyFlags.rbAligned = rb_aligned;
+
+ unsigned w = dout.metaBlkWidth / dout.compressBlkWidth;
+ unsigned h = dout.metaBlkHeight / dout.compressBlkHeight;
+ uint16_t *indices = malloc(w * h * sizeof(uint16_t));
+ if (!indices)
+ return NULL;
+
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
+ addrout.size = sizeof(addrout);
+
+ for (unsigned y = 0; y < h; ++y) {
+ addrin.y = y * dout.compressBlkHeight;
+ for (unsigned x = 0; x < w; ++x) {
+ addrin.x = x * dout.compressBlkWidth;
+ addrout.addr = 0;
+
+ if (Addr2ComputeDccAddrFromCoord(addrlib->handle, &addrin, &addrout) != ADDR_OK) {
+ free(indices);
+ return NULL;
+ }
+ indices[y * w + x] = addrout.addr;
+ }
+ }
+
+ struct dcc_retile_tile_data *data = calloc(1, sizeof(*data));
+ if (!data) {
+ free(indices);
+ return NULL;
+ }
+
+ data->tile_width_log2 = util_logbase2(w);
+ data->tile_height_log2 = util_logbase2(h);
+ data->data = indices;
+
+ struct dcc_retile_tile_key *heap_key = mem_dup(&key, sizeof(key));
+ if (!heap_key) {
+ free(data);
+ free(indices);
+ return NULL;
+ }
+
+ entry = _mesa_hash_table_insert(addrlib->dcc_retile_tile_indices, heap_key, data);
+ if (!entry) {
+ free(heap_key);
+ free(data);
+ free(indices);
+ }
+ return data;
}
static uint32_t ac_compute_retile_tile_addr(const struct dcc_retile_tile_data *tile,
unsigned stride, unsigned x, unsigned y)
{
- unsigned x_mask = (1u << tile->tile_width_log2) - 1;
- unsigned y_mask = (1u << tile->tile_height_log2) - 1;
- unsigned tile_size_log2 = tile->tile_width_log2 + tile->tile_height_log2;
-
- unsigned base = ((y >> tile->tile_height_log2) * stride + (x >> tile->tile_width_log2)) << tile_size_log2;
- unsigned offset_in_tile = tile->data[((y & y_mask) << tile->tile_width_log2) + (x & x_mask)];
- return base + offset_in_tile;
+ unsigned x_mask = (1u << tile->tile_width_log2) - 1;
+ unsigned y_mask = (1u << tile->tile_height_log2) - 1;
+ unsigned tile_size_log2 = tile->tile_width_log2 + tile->tile_height_log2;
+
+ unsigned base = ((y >> tile->tile_height_log2) * stride + (x >> tile->tile_width_log2))
+ << tile_size_log2;
+ unsigned offset_in_tile = tile->data[((y & y_mask) << tile->tile_width_log2) + (x & x_mask)];
+ return base + offset_in_tile;
}
static uint32_t *ac_compute_dcc_retile_map(struct ac_addrlib *addrlib,
- const struct radeon_info *info,
- unsigned retile_width, unsigned retile_height,
- bool rb_aligned, bool pipe_aligned, bool use_uint16,
- unsigned dcc_retile_num_elements,
- const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT *in)
+ const struct radeon_info *info, unsigned retile_width,
+ unsigned retile_height, bool rb_aligned,
+ bool pipe_aligned, bool use_uint16,
+ unsigned dcc_retile_num_elements,
+ const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT *in)
{
- unsigned dcc_retile_map_size = dcc_retile_num_elements * (use_uint16 ? 2 : 4);
- struct dcc_retile_map_key key;
-
- assert(in->numFrags == 1 && in->numSlices == 1 && in->numMipLevels == 1);
-
- memset(&key, 0, sizeof(key));
- key.family = info->family;
- key.retile_width = retile_width;
- key.retile_height = retile_height;
- key.rb_aligned = rb_aligned;
- key.pipe_aligned = pipe_aligned;
- key.dcc_retile_num_elements = dcc_retile_num_elements;
- memcpy(&key.input, in, sizeof(*in));
-
- simple_mtx_lock(&addrlib->dcc_retile_map_lock);
-
- /* If we have already computed this retile map, get it from the hash table. */
- struct hash_entry *entry = _mesa_hash_table_search(addrlib->dcc_retile_maps, &key);
- if (entry) {
- uint32_t *map = entry->data;
- simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
- return map;
- }
-
- const struct dcc_retile_tile_data *src_tile =
- ac_compute_dcc_retile_tile_indices(addrlib, info, in->bpp,
- in->swizzleMode,
- rb_aligned, pipe_aligned);
- const struct dcc_retile_tile_data *dst_tile =
- ac_compute_dcc_retile_tile_indices(addrlib, info, in->bpp,
- in->swizzleMode, false, false);
- if (!src_tile || !dst_tile) {
- simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
- return NULL;
- }
-
- void *dcc_retile_map = malloc(dcc_retile_map_size);
- if (!dcc_retile_map) {
- simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
- return NULL;
- }
-
- unsigned index = 0;
- unsigned w = DIV_ROUND_UP(retile_width, in->compressBlkWidth);
- unsigned h = DIV_ROUND_UP(retile_height, in->compressBlkHeight);
- unsigned src_stride = DIV_ROUND_UP(w, 1u << src_tile->tile_width_log2);
- unsigned dst_stride = DIV_ROUND_UP(w, 1u << dst_tile->tile_width_log2);
-
- for (unsigned y = 0; y < h; ++y) {
- for (unsigned x = 0; x < w; ++x) {
- unsigned src_addr = ac_compute_retile_tile_addr(src_tile, src_stride, x, y);
- unsigned dst_addr = ac_compute_retile_tile_addr(dst_tile, dst_stride, x, y);
-
- if (use_uint16) {
- ((uint16_t*)dcc_retile_map)[2 * index] = src_addr;
- ((uint16_t*)dcc_retile_map)[2 * index + 1] = dst_addr;
- } else {
- ((uint32_t*)dcc_retile_map)[2 * index] = src_addr;
- ((uint32_t*)dcc_retile_map)[2 * index + 1] = dst_addr;
- }
- ++index;
- }
- }
-
- /* Fill the remaining pairs with the last one (for the compute shader). */
- for (unsigned i = index * 2; i < dcc_retile_num_elements; i++) {
- if (use_uint16)
- ((uint16_t*)dcc_retile_map)[i] = ((uint16_t*)dcc_retile_map)[i - 2];
- else
- ((uint32_t*)dcc_retile_map)[i] = ((uint32_t*)dcc_retile_map)[i - 2];
- }
-
- /* Insert the retile map into the hash table, so that it can be reused and
- * the computation can be skipped for similar image sizes.
- */
- _mesa_hash_table_insert(addrlib->dcc_retile_maps,
- mem_dup(&key, sizeof(key)), dcc_retile_map);
-
- simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
- return dcc_retile_map;
+ unsigned dcc_retile_map_size = dcc_retile_num_elements * (use_uint16 ? 2 : 4);
+ struct dcc_retile_map_key key;
+
+ assert(in->numFrags == 1 && in->numSlices == 1 && in->numMipLevels == 1);
+
+ memset(&key, 0, sizeof(key));
+ key.family = info->family;
+ key.retile_width = retile_width;
+ key.retile_height = retile_height;
+ key.rb_aligned = rb_aligned;
+ key.pipe_aligned = pipe_aligned;
+ key.dcc_retile_num_elements = dcc_retile_num_elements;
+ memcpy(&key.input, in, sizeof(*in));
+
+ simple_mtx_lock(&addrlib->dcc_retile_map_lock);
+
+ /* If we have already computed this retile map, get it from the hash table. */
+ struct hash_entry *entry = _mesa_hash_table_search(addrlib->dcc_retile_maps, &key);
+ if (entry) {
+ uint32_t *map = entry->data;
+ simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
+ return map;
+ }
+
+ const struct dcc_retile_tile_data *src_tile = ac_compute_dcc_retile_tile_indices(
+ addrlib, info, in->bpp, in->swizzleMode, rb_aligned, pipe_aligned);
+ const struct dcc_retile_tile_data *dst_tile =
+ ac_compute_dcc_retile_tile_indices(addrlib, info, in->bpp, in->swizzleMode, false, false);
+ if (!src_tile || !dst_tile) {
+ simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
+ return NULL;
+ }
+
+ void *dcc_retile_map = malloc(dcc_retile_map_size);
+ if (!dcc_retile_map) {
+ simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
+ return NULL;
+ }
+
+ unsigned index = 0;
+ unsigned w = DIV_ROUND_UP(retile_width, in->compressBlkWidth);
+ unsigned h = DIV_ROUND_UP(retile_height, in->compressBlkHeight);
+ unsigned src_stride = DIV_ROUND_UP(w, 1u << src_tile->tile_width_log2);
+ unsigned dst_stride = DIV_ROUND_UP(w, 1u << dst_tile->tile_width_log2);
+
+ for (unsigned y = 0; y < h; ++y) {
+ for (unsigned x = 0; x < w; ++x) {
+ unsigned src_addr = ac_compute_retile_tile_addr(src_tile, src_stride, x, y);
+ unsigned dst_addr = ac_compute_retile_tile_addr(dst_tile, dst_stride, x, y);
+
+ if (use_uint16) {
+ ((uint16_t *)dcc_retile_map)[2 * index] = src_addr;
+ ((uint16_t *)dcc_retile_map)[2 * index + 1] = dst_addr;
+ } else {
+ ((uint32_t *)dcc_retile_map)[2 * index] = src_addr;
+ ((uint32_t *)dcc_retile_map)[2 * index + 1] = dst_addr;
+ }
+ ++index;
+ }
+ }
+
+ /* Fill the remaining pairs with the last one (for the compute shader). */
+ for (unsigned i = index * 2; i < dcc_retile_num_elements; i++) {
+ if (use_uint16)
+ ((uint16_t *)dcc_retile_map)[i] = ((uint16_t *)dcc_retile_map)[i - 2];
+ else
+ ((uint32_t *)dcc_retile_map)[i] = ((uint32_t *)dcc_retile_map)[i - 2];
+ }
+
+ /* Insert the retile map into the hash table, so that it can be reused and
+ * the computation can be skipped for similar image sizes.
+ */
+ _mesa_hash_table_insert(addrlib->dcc_retile_maps, mem_dup(&key, sizeof(key)), dcc_retile_map);
+
+ simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
+ return dcc_retile_map;
}
-static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
+static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput)
{
- return malloc(pInput->sizeInBytes);
+ return malloc(pInput->sizeInBytes);
}
-static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
+static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput)
{
- free(pInput->pVirtAddr);
- return ADDR_OK;
+ free(pInput->pVirtAddr);
+ return ADDR_OK;
}
struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
- const struct amdgpu_gpu_info *amdinfo,
- uint64_t *max_alignment)
+ const struct amdgpu_gpu_info *amdinfo, uint64_t *max_alignment)
{
- ADDR_CREATE_INPUT addrCreateInput = {0};
- ADDR_CREATE_OUTPUT addrCreateOutput = {0};
- ADDR_REGISTER_VALUE regValue = {0};
- ADDR_CREATE_FLAGS createFlags = {{0}};
- ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
- ADDR_E_RETURNCODE addrRet;
-
- addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
- addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
-
- regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
- createFlags.value = 0;
-
- addrCreateInput.chipFamily = info->family_id;
- addrCreateInput.chipRevision = info->chip_external_rev;
-
- if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
- return NULL;
-
- if (addrCreateInput.chipFamily >= FAMILY_AI) {
- addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
- } else {
- regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
- regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
-
- regValue.backendDisables = amdinfo->enabled_rb_pipes_mask;
- regValue.pTileConfig = amdinfo->gb_tile_mode;
- regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode);
- if (addrCreateInput.chipFamily == FAMILY_SI) {
- regValue.pMacroTileConfig = NULL;
- regValue.noOfMacroEntries = 0;
- } else {
- regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode;
- regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode);
- }
-
- createFlags.useTileIndex = 1;
- createFlags.useHtileSliceAlign = 1;
-
- addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
- }
-
- addrCreateInput.callbacks.allocSysMem = allocSysMem;
- addrCreateInput.callbacks.freeSysMem = freeSysMem;
- addrCreateInput.callbacks.debugPrint = 0;
- addrCreateInput.createFlags = createFlags;
- addrCreateInput.regValue = regValue;
-
- addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
- if (addrRet != ADDR_OK)
- return NULL;
-
- if (max_alignment) {
- addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
- if (addrRet == ADDR_OK){
- *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
- }
- }
-
- struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
- if (!addrlib) {
- AddrDestroy(addrCreateOutput.hLib);
- return NULL;
- }
-
- addrlib->handle = addrCreateOutput.hLib;
- simple_mtx_init(&addrlib->dcc_retile_map_lock, mtx_plain);
- addrlib->dcc_retile_maps = _mesa_hash_table_create(NULL, dcc_retile_map_hash_key,
- dcc_retile_map_keys_equal);
- addrlib->dcc_retile_tile_indices = _mesa_hash_table_create(NULL, dcc_retile_tile_hash_key,
- dcc_retile_tile_keys_equal);
- return addrlib;
+ ADDR_CREATE_INPUT addrCreateInput = {0};
+ ADDR_CREATE_OUTPUT addrCreateOutput = {0};
+ ADDR_REGISTER_VALUE regValue = {0};
+ ADDR_CREATE_FLAGS createFlags = {{0}};
+ ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
+ ADDR_E_RETURNCODE addrRet;
+
+ addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
+ addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
+
+ regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
+ createFlags.value = 0;
+
+ addrCreateInput.chipFamily = info->family_id;
+ addrCreateInput.chipRevision = info->chip_external_rev;
+
+ if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
+ return NULL;
+
+ if (addrCreateInput.chipFamily >= FAMILY_AI) {
+ addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
+ } else {
+ regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
+ regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
+
+ regValue.backendDisables = amdinfo->enabled_rb_pipes_mask;
+ regValue.pTileConfig = amdinfo->gb_tile_mode;
+ regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode);
+ if (addrCreateInput.chipFamily == FAMILY_SI) {
+ regValue.pMacroTileConfig = NULL;
+ regValue.noOfMacroEntries = 0;
+ } else {
+ regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode;
+ regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode);
+ }
+
+ createFlags.useTileIndex = 1;
+ createFlags.useHtileSliceAlign = 1;
+
+ addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
+ }
+
+ addrCreateInput.callbacks.allocSysMem = allocSysMem;
+ addrCreateInput.callbacks.freeSysMem = freeSysMem;
+ addrCreateInput.callbacks.debugPrint = 0;
+ addrCreateInput.createFlags = createFlags;
+ addrCreateInput.regValue = regValue;
+
+ addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
+ if (addrRet != ADDR_OK)
+ return NULL;
+
+ if (max_alignment) {
+ addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
+ if (addrRet == ADDR_OK) {
+ *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
+ }
+ }
+
+ struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
+ if (!addrlib) {
+ AddrDestroy(addrCreateOutput.hLib);
+ return NULL;
+ }
+
+ addrlib->handle = addrCreateOutput.hLib;
+ simple_mtx_init(&addrlib->dcc_retile_map_lock, mtx_plain);
+ addrlib->dcc_retile_maps =
+ _mesa_hash_table_create(NULL, dcc_retile_map_hash_key, dcc_retile_map_keys_equal);
+ addrlib->dcc_retile_tile_indices =
+ _mesa_hash_table_create(NULL, dcc_retile_tile_hash_key, dcc_retile_tile_keys_equal);
+ return addrlib;
}
void ac_addrlib_destroy(struct ac_addrlib *addrlib)
{
- AddrDestroy(addrlib->handle);
- simple_mtx_destroy(&addrlib->dcc_retile_map_lock);
- _mesa_hash_table_destroy(addrlib->dcc_retile_maps, dcc_retile_map_free);
- _mesa_hash_table_destroy(addrlib->dcc_retile_tile_indices, dcc_retile_tile_free);
- free(addrlib);
+ AddrDestroy(addrlib->handle);
+ simple_mtx_destroy(&addrlib->dcc_retile_map_lock);
+ _mesa_hash_table_destroy(addrlib->dcc_retile_maps, dcc_retile_map_free);
+ _mesa_hash_table_destroy(addrlib->dcc_retile_tile_indices, dcc_retile_tile_free);
+ free(addrlib);
}
-static int surf_config_sanity(const struct ac_surf_config *config,
- unsigned flags)
+static int surf_config_sanity(const struct ac_surf_config *config, unsigned flags)
{
- /* FMASK is allocated together with the color surface and can't be
- * allocated separately.
- */
- assert(!(flags & RADEON_SURF_FMASK));
- if (flags & RADEON_SURF_FMASK)
- return -EINVAL;
-
- /* all dimension must be at least 1 ! */
- if (!config->info.width || !config->info.height || !config->info.depth ||
- !config->info.array_size || !config->info.levels)
- return -EINVAL;
-
- switch (config->info.samples) {
- case 0:
- case 1:
- case 2:
- case 4:
- case 8:
- break;
- case 16:
- if (flags & RADEON_SURF_Z_OR_SBUFFER)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
- switch (config->info.storage_samples) {
- case 0:
- case 1:
- case 2:
- case 4:
- case 8:
- break;
- default:
- return -EINVAL;
- }
- }
-
- if (config->is_3d && config->info.array_size > 1)
- return -EINVAL;
- if (config->is_cube && config->info.depth > 1)
- return -EINVAL;
-
- return 0;
+ /* FMASK is allocated together with the color surface and can't be
+ * allocated separately.
+ */
+ assert(!(flags & RADEON_SURF_FMASK));
+ if (flags & RADEON_SURF_FMASK)
+ return -EINVAL;
+
+ /* all dimension must be at least 1 ! */
+ if (!config->info.width || !config->info.height || !config->info.depth ||
+ !config->info.array_size || !config->info.levels)
+ return -EINVAL;
+
+ switch (config->info.samples) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ case 16:
+ if (flags & RADEON_SURF_Z_OR_SBUFFER)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
+ switch (config->info.storage_samples) {
+ case 0:
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (config->is_3d && config->info.array_size > 1)
+ return -EINVAL;
+ if (config->is_cube && config->info.depth > 1)
+ return -EINVAL;
+
+ return 0;
}
-static int gfx6_compute_level(ADDR_HANDLE addrlib,
- const struct ac_surf_config *config,
- struct radeon_surf *surf, bool is_stencil,
- unsigned level, bool compressed,
- ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
- ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
- ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
- ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
- ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
+static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config,
+ struct radeon_surf *surf, bool is_stencil, unsigned level,
+ bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
+ ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
+ ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
+ ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
{
- struct legacy_surf_level *surf_level;
- ADDR_E_RETURNCODE ret;
-
- AddrSurfInfoIn->mipLevel = level;
- AddrSurfInfoIn->width = u_minify(config->info.width, level);
- AddrSurfInfoIn->height = u_minify(config->info.height, level);
-
- /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
- * because GFX9 needs linear alignment of 256 bytes.
- */
- if (config->info.levels == 1 &&
- AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
- AddrSurfInfoIn->bpp &&
- util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
- unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
-
- AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
- }
-
- /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
- * true for r32g32b32 formats. */
- if (AddrSurfInfoIn->bpp == 96) {
- assert(config->info.levels == 1);
- assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
-
- /* The least common multiple of 64 bytes and 12 bytes/pixel is
- * 192 bytes, or 16 pixels. */
- AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
- }
-
- if (config->is_3d)
- AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
- else if (config->is_cube)
- AddrSurfInfoIn->numSlices = 6;
- else
- AddrSurfInfoIn->numSlices = config->info.array_size;
-
- if (level > 0) {
- /* Set the base level pitch. This is needed for calculation
- * of non-zero levels. */
- if (is_stencil)
- AddrSurfInfoIn->basePitch = surf->u.legacy.stencil_level[0].nblk_x;
- else
- AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
-
- /* Convert blocks to pixels for compressed formats. */
- if (compressed)
- AddrSurfInfoIn->basePitch *= surf->blk_w;
- }
-
- ret = AddrComputeSurfaceInfo(addrlib,
- AddrSurfInfoIn,
- AddrSurfInfoOut);
- if (ret != ADDR_OK) {
- return ret;
- }
-
- surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] : &surf->u.legacy.level[level];
- surf_level->offset = align64(surf->surf_size, AddrSurfInfoOut->baseAlign);
- surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
- surf_level->nblk_x = AddrSurfInfoOut->pitch;
- surf_level->nblk_y = AddrSurfInfoOut->height;
-
- switch (AddrSurfInfoOut->tileMode) {
- case ADDR_TM_LINEAR_ALIGNED:
- surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
- break;
- case ADDR_TM_1D_TILED_THIN1:
- surf_level->mode = RADEON_SURF_MODE_1D;
- break;
- case ADDR_TM_2D_TILED_THIN1:
- surf_level->mode = RADEON_SURF_MODE_2D;
- break;
- default:
- assert(0);
- }
-
- if (is_stencil)
- surf->u.legacy.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
- else
- surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
-
- surf->surf_size = surf_level->offset + AddrSurfInfoOut->surfSize;
-
- /* Clear DCC fields at the beginning. */
- surf_level->dcc_offset = 0;
-
- /* The previous level's flag tells us if we can use DCC for this level. */
- if (AddrSurfInfoIn->flags.dccCompatible &&
- (level == 0 || AddrDccOut->subLvlCompressible)) {
- bool prev_level_clearable = level == 0 ||
- AddrDccOut->dccRamSizeAligned;
-
- AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
- AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
- AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
- AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
- AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
-
- ret = AddrComputeDccInfo(addrlib,
- AddrDccIn,
- AddrDccOut);
-
- if (ret == ADDR_OK) {
- surf_level->dcc_offset = surf->dcc_size;
- surf->num_dcc_levels = level + 1;
- surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
- surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
-
- /* If the DCC size of a subresource (1 mip level or 1 slice)
- * is not aligned, the DCC memory layout is not contiguous for
- * that subresource, which means we can't use fast clear.
- *
- * We only do fast clears for whole mipmap levels. If we did
- * per-slice fast clears, the same restriction would apply.
- * (i.e. only compute the slice size and see if it's aligned)
- *
- * The last level can be non-contiguous and still be clearable
- * if it's interleaved with the next level that doesn't exist.
- */
- if (AddrDccOut->dccRamSizeAligned ||
- (prev_level_clearable && level == config->info.levels - 1))
- surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
- else
- surf_level->dcc_fast_clear_size = 0;
-
- /* Compute the DCC slice size because addrlib doesn't
- * provide this info. As DCC memory is linear (each
- * slice is the same size) it's easy to compute.
- */
- surf->dcc_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
-
- /* For arrays, we have to compute the DCC info again
- * with one slice size to get a correct fast clear
- * size.
- */
- if (config->info.array_size > 1) {
- AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
- AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
- AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
- AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
- AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
-
- ret = AddrComputeDccInfo(addrlib,
- AddrDccIn, AddrDccOut);
- if (ret == ADDR_OK) {
- /* If the DCC memory isn't properly
- * aligned, the data are interleaved
- * accross slices.
- */
- if (AddrDccOut->dccRamSizeAligned)
- surf_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
- else
- surf_level->dcc_slice_fast_clear_size = 0;
- }
-
- if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&
- surf->dcc_slice_size != surf_level->dcc_slice_fast_clear_size) {
- surf->dcc_size = 0;
- surf->num_dcc_levels = 0;
- AddrDccOut->subLvlCompressible = false;
- }
- } else {
- surf_level->dcc_slice_fast_clear_size = surf_level->dcc_fast_clear_size;
- }
- }
- }
-
- /* HTILE. */
- if (!is_stencil &&
- AddrSurfInfoIn->flags.depth &&
- surf_level->mode == RADEON_SURF_MODE_2D &&
- level == 0 &&
- !(surf->flags & RADEON_SURF_NO_HTILE)) {
- AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
- AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
- AddrHtileIn->height = AddrSurfInfoOut->height;
- AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
- AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
- AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
- AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
- AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
- AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
-
- ret = AddrComputeHtileInfo(addrlib,
- AddrHtileIn,
- AddrHtileOut);
-
- if (ret == ADDR_OK) {
- surf->htile_size = AddrHtileOut->htileBytes;
- surf->htile_slice_size = AddrHtileOut->sliceSize;
- surf->htile_alignment = AddrHtileOut->baseAlign;
- }
- }
-
- return 0;
+ struct legacy_surf_level *surf_level;
+ ADDR_E_RETURNCODE ret;
+
+ AddrSurfInfoIn->mipLevel = level;
+ AddrSurfInfoIn->width = u_minify(config->info.width, level);
+ AddrSurfInfoIn->height = u_minify(config->info.height, level);
+
+ /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
+ * because GFX9 needs linear alignment of 256 bytes.
+ */
+ if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
+ AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
+ unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
+
+ AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
+ }
+
+ /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
+ * true for r32g32b32 formats. */
+ if (AddrSurfInfoIn->bpp == 96) {
+ assert(config->info.levels == 1);
+ assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
+
+ /* The least common multiple of 64 bytes and 12 bytes/pixel is
+ * 192 bytes, or 16 pixels. */
+ AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
+ }
+
+ if (config->is_3d)
+ AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
+ else if (config->is_cube)
+ AddrSurfInfoIn->numSlices = 6;
+ else
+ AddrSurfInfoIn->numSlices = config->info.array_size;
+
+ if (level > 0) {
+ /* Set the base level pitch. This is needed for calculation
+ * of non-zero levels. */
+ if (is_stencil)
+ AddrSurfInfoIn->basePitch = surf->u.legacy.stencil_level[0].nblk_x;
+ else
+ AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
+
+ /* Convert blocks to pixels for compressed formats. */
+ if (compressed)
+ AddrSurfInfoIn->basePitch *= surf->blk_w;
+ }
+
+ ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut);
+ if (ret != ADDR_OK) {
+ return ret;
+ }
+
+ surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] : &surf->u.legacy.level[level];
+ surf_level->offset = align64(surf->surf_size, AddrSurfInfoOut->baseAlign);
+ surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
+ surf_level->nblk_x = AddrSurfInfoOut->pitch;
+ surf_level->nblk_y = AddrSurfInfoOut->height;
+
+ switch (AddrSurfInfoOut->tileMode) {
+ case ADDR_TM_LINEAR_ALIGNED:
+ surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+ break;
+ case ADDR_TM_1D_TILED_THIN1:
+ surf_level->mode = RADEON_SURF_MODE_1D;
+ break;
+ case ADDR_TM_2D_TILED_THIN1:
+ surf_level->mode = RADEON_SURF_MODE_2D;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (is_stencil)
+ surf->u.legacy.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
+ else
+ surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
+
+ surf->surf_size = surf_level->offset + AddrSurfInfoOut->surfSize;
+
+ /* Clear DCC fields at the beginning. */
+ surf_level->dcc_offset = 0;
+
+ /* The previous level's flag tells us if we can use DCC for this level. */
+ if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) {
+ bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned;
+
+ AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
+ AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
+ AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
+ AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
+ AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+ ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
+
+ if (ret == ADDR_OK) {
+ surf_level->dcc_offset = surf->dcc_size;
+ surf->num_dcc_levels = level + 1;
+ surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
+ surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
+
+ /* If the DCC size of a subresource (1 mip level or 1 slice)
+ * is not aligned, the DCC memory layout is not contiguous for
+ * that subresource, which means we can't use fast clear.
+ *
+ * We only do fast clears for whole mipmap levels. If we did
+ * per-slice fast clears, the same restriction would apply.
+ * (i.e. only compute the slice size and see if it's aligned)
+ *
+ * The last level can be non-contiguous and still be clearable
+ * if it's interleaved with the next level that doesn't exist.
+ */
+ if (AddrDccOut->dccRamSizeAligned ||
+ (prev_level_clearable && level == config->info.levels - 1))
+ surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
+ else
+ surf_level->dcc_fast_clear_size = 0;
+
+ /* Compute the DCC slice size because addrlib doesn't
+ * provide this info. As DCC memory is linear (each
+ * slice is the same size) it's easy to compute.
+ */
+ surf->dcc_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
+
+ /* For arrays, we have to compute the DCC info again
+ * with one slice size to get a correct fast clear
+ * size.
+ */
+ if (config->info.array_size > 1) {
+ AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
+ AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
+ AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
+ AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
+ AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+ ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
+ if (ret == ADDR_OK) {
+ /* If the DCC memory isn't properly
+ * aligned, the data are interleaved
+ * accross slices.
+ */
+ if (AddrDccOut->dccRamSizeAligned)
+ surf_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
+ else
+ surf_level->dcc_slice_fast_clear_size = 0;
+ }
+
+ if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&
+ surf->dcc_slice_size != surf_level->dcc_slice_fast_clear_size) {
+ surf->dcc_size = 0;
+ surf->num_dcc_levels = 0;
+ AddrDccOut->subLvlCompressible = false;
+ }
+ } else {
+ surf_level->dcc_slice_fast_clear_size = surf_level->dcc_fast_clear_size;
+ }
+ }
+ }
+
+ /* HTILE. */
+ if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D &&
+ level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) {
+ AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
+ AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
+ AddrHtileIn->height = AddrSurfInfoOut->height;
+ AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
+ AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
+ AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
+ AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
+ AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
+ AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+ ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut);
+
+ if (ret == ADDR_OK) {
+ surf->htile_size = AddrHtileOut->htileBytes;
+ surf->htile_slice_size = AddrHtileOut->sliceSize;
+ surf->htile_alignment = AddrHtileOut->baseAlign;
+ }
+ }
+
+ return 0;
}
-static void gfx6_set_micro_tile_mode(struct radeon_surf *surf,
- const struct radeon_info *info)
+static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info)
{
- uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
+ uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
- if (info->chip_class >= GFX7)
- surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
- else
- surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
+ if (info->chip_class >= GFX7)
+ surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
+ else
+ surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
}
static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
{
- unsigned index, tileb;
+ unsigned index, tileb;
- tileb = 8 * 8 * surf->bpe;
- tileb = MIN2(surf->u.legacy.tile_split, tileb);
+ tileb = 8 * 8 * surf->bpe;
+ tileb = MIN2(surf->u.legacy.tile_split, tileb);
- for (index = 0; tileb > 64; index++)
- tileb >>= 1;
+ for (index = 0; tileb > 64; index++)
+ tileb >>= 1;
- assert(index < 16);
- return index;
+ assert(index < 16);
+ return index;
}
-static bool get_display_flag(const struct ac_surf_config *config,
- const struct radeon_surf *surf)
+static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf)
{
- unsigned num_channels = config->info.num_channels;
- unsigned bpe = surf->bpe;
-
- if (!config->is_3d &&
- !config->is_cube &&
- !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
- surf->flags & RADEON_SURF_SCANOUT &&
- config->info.samples <= 1 &&
- surf->blk_w <= 2 && surf->blk_h == 1) {
- /* subsampled */
- if (surf->blk_w == 2 && surf->blk_h == 1)
- return true;
-
- if (/* RGBA8 or RGBA16F */
- (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
- /* R5G6B5 or R5G5B5A1 */
- (bpe == 2 && num_channels >= 3) ||
- /* C8 palette */
- (bpe == 1 && num_channels == 1))
- return true;
- }
- return false;
+ unsigned num_channels = config->info.num_channels;
+ unsigned bpe = surf->bpe;
+
+ if (!config->is_3d && !config->is_cube && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
+ surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 &&
+ surf->blk_h == 1) {
+ /* subsampled */
+ if (surf->blk_w == 2 && surf->blk_h == 1)
+ return true;
+
+ if (/* RGBA8 or RGBA16F */
+ (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
+ /* R5G6B5 or R5G5B5A1 */
+ (bpe == 2 && num_channels >= 3) ||
+ /* C8 palette */
+ (bpe == 1 && num_channels == 1))
+ return true;
+ }
+ return false;
}
/**
* Copy surface-global settings like pipe/bank config from level 0 surface
* computation, and compute tile swizzle.
*/
-static int gfx6_surface_settings(ADDR_HANDLE addrlib,
- const struct radeon_info *info,
- const struct ac_surf_config *config,
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
- struct radeon_surf *surf)
+static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info,
+ const struct ac_surf_config *config,
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf)
{
- surf->surf_alignment = csio->baseAlign;
- surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
- gfx6_set_micro_tile_mode(surf, info);
-
- /* For 2D modes only. */
- if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
- surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
- surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
- surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
- surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
- surf->u.legacy.num_banks = csio->pTileInfo->banks;
- surf->u.legacy.macro_tile_index = csio->macroModeIndex;
- } else {
- surf->u.legacy.macro_tile_index = 0;
- }
-
- /* Compute tile swizzle. */
- /* TODO: fix tile swizzle with mipmapping for GFX6 */
- if ((info->chip_class >= GFX7 || config->info.levels == 1) &&
- config->info.surf_index &&
- surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
- !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
- !get_display_flag(config, surf)) {
- ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
- ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
-
- AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
- AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
-
- AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
- AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
- AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
- AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
- AddrBaseSwizzleIn.tileMode = csio->tileMode;
-
- int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
- &AddrBaseSwizzleOut);
- if (r != ADDR_OK)
- return r;
-
- assert(AddrBaseSwizzleOut.tileSwizzle <=
- u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
- surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
- }
- return 0;
+ surf->surf_alignment = csio->baseAlign;
+ surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
+ gfx6_set_micro_tile_mode(surf, info);
+
+ /* For 2D modes only. */
+ if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
+ surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
+ surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
+ surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
+ surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
+ surf->u.legacy.num_banks = csio->pTileInfo->banks;
+ surf->u.legacy.macro_tile_index = csio->macroModeIndex;
+ } else {
+ surf->u.legacy.macro_tile_index = 0;
+ }
+
+ /* Compute tile swizzle. */
+ /* TODO: fix tile swizzle with mipmapping for GFX6 */
+ if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index &&
+ surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
+ !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
+ !get_display_flag(config, surf)) {
+ ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
+
+ AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
+ AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
+
+ AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
+ AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
+ AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
+ AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
+ AddrBaseSwizzleIn.tileMode = csio->tileMode;
+
+ int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
+ if (r != ADDR_OK)
+ return r;
+
+ assert(AddrBaseSwizzleOut.tileSwizzle <=
+ u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
+ surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
+ }
+ return 0;
}
-static void ac_compute_cmask(const struct radeon_info *info,
- const struct ac_surf_config *config,
- struct radeon_surf *surf)
+static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,
+ struct radeon_surf *surf)
{
- unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
- unsigned num_pipes = info->num_tile_pipes;
- unsigned cl_width, cl_height;
-
- if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||
- (config->info.samples >= 2 && !surf->fmask_size))
- return;
-
- assert(info->chip_class <= GFX8);
-
- switch (num_pipes) {
- case 2:
- cl_width = 32;
- cl_height = 16;
- break;
- case 4:
- cl_width = 32;
- cl_height = 32;
- break;
- case 8:
- cl_width = 64;
- cl_height = 32;
- break;
- case 16: /* Hawaii */
- cl_width = 64;
- cl_height = 64;
- break;
- default:
- assert(0);
- return;
- }
-
- unsigned base_align = num_pipes * pipe_interleave_bytes;
-
- unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
- unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
- unsigned slice_elements = (width * height) / (8*8);
-
- /* Each element of CMASK is a nibble. */
- unsigned slice_bytes = slice_elements / 2;
-
- surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128);
- if (surf->u.legacy.cmask_slice_tile_max)
- surf->u.legacy.cmask_slice_tile_max -= 1;
-
- unsigned num_layers;
- if (config->is_3d)
- num_layers = config->info.depth;
- else if (config->is_cube)
- num_layers = 6;
- else
- num_layers = config->info.array_size;
-
- surf->cmask_alignment = MAX2(256, base_align);
- surf->cmask_slice_size = align(slice_bytes, base_align);
- surf->cmask_size = surf->cmask_slice_size * num_layers;
+ unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
+ unsigned num_pipes = info->num_tile_pipes;
+ unsigned cl_width, cl_height;
+
+ if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||
+ (config->info.samples >= 2 && !surf->fmask_size))
+ return;
+
+ assert(info->chip_class <= GFX8);
+
+ switch (num_pipes) {
+ case 2:
+ cl_width = 32;
+ cl_height = 16;
+ break;
+ case 4:
+ cl_width = 32;
+ cl_height = 32;
+ break;
+ case 8:
+ cl_width = 64;
+ cl_height = 32;
+ break;
+ case 16: /* Hawaii */
+ cl_width = 64;
+ cl_height = 64;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ unsigned base_align = num_pipes * pipe_interleave_bytes;
+
+ unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);
+ unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);
+ unsigned slice_elements = (width * height) / (8 * 8);
+
+ /* Each element of CMASK is a nibble. */
+ unsigned slice_bytes = slice_elements / 2;
+
+ surf->u.legacy.cmask_slice_tile_max = (width * height) / (128 * 128);
+ if (surf->u.legacy.cmask_slice_tile_max)
+ surf->u.legacy.cmask_slice_tile_max -= 1;
+
+ unsigned num_layers;
+ if (config->is_3d)
+ num_layers = config->info.depth;
+ else if (config->is_cube)
+ num_layers = 6;
+ else
+ num_layers = config->info.array_size;
+
+ surf->cmask_alignment = MAX2(256, base_align);
+ surf->cmask_slice_size = align(slice_bytes, base_align);
+ surf->cmask_size = surf->cmask_slice_size * num_layers;
}
/**
* The following fields of \p surf must be initialized by the caller:
* blk_w, blk_h, bpe, flags.
*/
-static int gfx6_compute_surface(ADDR_HANDLE addrlib,
- const struct radeon_info *info,
- const struct ac_surf_config *config,
- enum radeon_surf_mode mode,
- struct radeon_surf *surf)
+static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
+ const struct ac_surf_config *config, enum radeon_surf_mode mode,
+ struct radeon_surf *surf)
{
- unsigned level;
- bool compressed;
- ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
- ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
- ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
- ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
- ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
- ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
- ADDR_TILEINFO AddrTileInfoIn = {0};
- ADDR_TILEINFO AddrTileInfoOut = {0};
- int r;
-
- AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
- AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
- AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
- AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
- AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
- AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
- AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
-
- compressed = surf->blk_w == 4 && surf->blk_h == 4;
-
- /* MSAA requires 2D tiling. */
- if (config->info.samples > 1)
- mode = RADEON_SURF_MODE_2D;
-
- /* DB doesn't support linear layouts. */
- if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
- mode < RADEON_SURF_MODE_1D)
- mode = RADEON_SURF_MODE_1D;
-
- /* Set the requested tiling mode. */
- switch (mode) {
- case RADEON_SURF_MODE_LINEAR_ALIGNED:
- AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
- break;
- case RADEON_SURF_MODE_1D:
- AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
- break;
- case RADEON_SURF_MODE_2D:
- AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
- break;
- default:
- assert(0);
- }
-
- /* The format must be set correctly for the allocation of compressed
- * textures to work. In other cases, setting the bpp is sufficient.
- */
- if (compressed) {
- switch (surf->bpe) {
- case 8:
- AddrSurfInfoIn.format = ADDR_FMT_BC1;
- break;
- case 16:
- AddrSurfInfoIn.format = ADDR_FMT_BC3;
- break;
- default:
- assert(0);
- }
- }
- else {
- AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
- }
-
- AddrDccIn.numSamples = AddrSurfInfoIn.numSamples =
- MAX2(1, config->info.samples);
- AddrSurfInfoIn.tileIndex = -1;
-
- if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
- AddrDccIn.numSamples = AddrSurfInfoIn.numFrags =
- MAX2(1, config->info.storage_samples);
- }
-
- /* Set the micro tile type. */
- if (surf->flags & RADEON_SURF_SCANOUT)
- AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
- else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
- AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
- else
- AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
-
- AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
- AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
- AddrSurfInfoIn.flags.cube = config->is_cube;
- AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
- AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
- AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
-
- /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
- * requested, because TC-compatible HTILE requires 2D tiling.
- */
- AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
- !AddrSurfInfoIn.flags.fmask &&
- config->info.samples <= 1 &&
- !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
-
- /* DCC notes:
- * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
- * with samples >= 4.
- * - Mipmapped array textures have low performance (discovered by a closed
- * driver team).
- */
- AddrSurfInfoIn.flags.dccCompatible =
- info->chip_class >= GFX8 &&
- info->has_graphics && /* disable DCC on compute-only chips */
- !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
- !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
- !compressed &&
- ((config->info.array_size == 1 && config->info.depth == 1) ||
- config->info.levels == 1);
-
- AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
- AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
-
- /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
- * for Z and stencil. This can cause a number of problems which we work
- * around here:
- *
- * - a depth part that is incompatible with mipmapped texturing
- * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
- * incorrect tiling applied to the stencil part, stencil buffer
- * memory accesses that go out of bounds) even without mipmapping
- *
- * Some piglit tests that are prone to different types of related
- * failures:
- * ./bin/ext_framebuffer_multisample-upsample 2 stencil
- * ./bin/framebuffer-blit-levels {draw,read} stencil
- * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
- * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
- * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
- */
- int stencil_tile_idx = -1;
-
- if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
- (config->info.levels > 1 || info->family == CHIP_STONEY)) {
- /* Compute stencilTileIdx that is compatible with the (depth)
- * tileIdx. This degrades the depth surface if necessary to
- * ensure that a matching stencilTileIdx exists. */
- AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
-
- /* Keep the depth mip-tail compatible with texturing. */
- AddrSurfInfoIn.flags.noStencil = 1;
- }
-
- /* Set preferred macrotile parameters. This is usually required
- * for shared resources. This is for 2D tiling only. */
- if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
- surf->u.legacy.bankw && surf->u.legacy.bankh &&
- surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
- /* If any of these parameters are incorrect, the calculation
- * will fail. */
- AddrTileInfoIn.banks = surf->u.legacy.num_banks;
- AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
- AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
- AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
- AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
- AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
- AddrSurfInfoIn.flags.opt4Space = 0;
- AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
-
- /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
- * the tile index, because we are expected to know it if
- * we know the other parameters.
- *
- * This is something that can easily be fixed in Addrlib.
- * For now, just figure it out here.
- * Note that only 2D_TILE_THIN1 is handled here.
- */
- assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
- assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
-
- if (info->chip_class == GFX6) {
- if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
- if (surf->bpe == 2)
- AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
- else
- AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
- } else {
- if (surf->bpe == 1)
- AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
- else if (surf->bpe == 2)
- AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
- else if (surf->bpe == 4)
- AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
- else
- AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
- }
- } else {
- /* GFX7 - GFX8 */
- if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
- AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
- else
- AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
-
- /* Addrlib doesn't set this if tileIndex is forced like above. */
- AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
- }
- }
-
- surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
- surf->num_dcc_levels = 0;
- surf->surf_size = 0;
- surf->dcc_size = 0;
- surf->dcc_alignment = 1;
- surf->htile_size = 0;
- surf->htile_slice_size = 0;
- surf->htile_alignment = 1;
-
- const bool only_stencil = (surf->flags & RADEON_SURF_SBUFFER) &&
- !(surf->flags & RADEON_SURF_ZBUFFER);
-
- /* Calculate texture layout information. */
- if (!only_stencil) {
- for (level = 0; level < config->info.levels; level++) {
- r = gfx6_compute_level(addrlib, config, surf, false, level, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut,
- &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
- if (r)
- return r;
-
- if (level > 0)
- continue;
-
- if (!AddrSurfInfoOut.tcCompatible) {
- AddrSurfInfoIn.flags.tcCompatible = 0;
- surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
- }
-
- if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
- AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
- AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
- stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
-
- assert(stencil_tile_idx >= 0);
- }
-
- r = gfx6_surface_settings(addrlib, info, config,
- &AddrSurfInfoOut, surf);
- if (r)
- return r;
- }
- }
-
- /* Calculate texture layout information for stencil. */
- if (surf->flags & RADEON_SURF_SBUFFER) {
- AddrSurfInfoIn.tileIndex = stencil_tile_idx;
- AddrSurfInfoIn.bpp = 8;
- AddrSurfInfoIn.flags.depth = 0;
- AddrSurfInfoIn.flags.stencil = 1;
- AddrSurfInfoIn.flags.tcCompatible = 0;
- /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
- AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
-
- for (level = 0; level < config->info.levels; level++) {
- r = gfx6_compute_level(addrlib, config, surf, true, level, compressed,
- &AddrSurfInfoIn, &AddrSurfInfoOut,
- &AddrDccIn, &AddrDccOut,
- NULL, NULL);
- if (r)
- return r;
-
- /* DB uses the depth pitch for both stencil and depth. */
- if (!only_stencil) {
- if (surf->u.legacy.stencil_level[level].nblk_x !=
- surf->u.legacy.level[level].nblk_x)
- surf->u.legacy.stencil_adjusted = true;
- } else {
- surf->u.legacy.level[level].nblk_x =
- surf->u.legacy.stencil_level[level].nblk_x;
- }
-
- if (level == 0) {
- if (only_stencil) {
- r = gfx6_surface_settings(addrlib, info, config,
- &AddrSurfInfoOut, surf);
- if (r)
- return r;
- }
-
- /* For 2D modes only. */
- if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
- surf->u.legacy.stencil_tile_split =
- AddrSurfInfoOut.pTileInfo->tileSplitBytes;
- }
- }
- }
- }
-
- /* Compute FMASK. */
- if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color &&
- info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {
- ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
- ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
- ADDR_TILEINFO fmask_tile_info = {};
-
- fin.size = sizeof(fin);
- fout.size = sizeof(fout);
-
- fin.tileMode = AddrSurfInfoOut.tileMode;
- fin.pitch = AddrSurfInfoOut.pitch;
- fin.height = config->info.height;
- fin.numSlices = AddrSurfInfoIn.numSlices;
- fin.numSamples = AddrSurfInfoIn.numSamples;
- fin.numFrags = AddrSurfInfoIn.numFrags;
- fin.tileIndex = -1;
- fout.pTileInfo = &fmask_tile_info;
-
- r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
- if (r)
- return r;
-
- surf->fmask_size = fout.fmaskBytes;
- surf->fmask_alignment = fout.baseAlign;
- surf->fmask_tile_swizzle = 0;
-
- surf->u.legacy.fmask.slice_tile_max =
- (fout.pitch * fout.height) / 64;
- if (surf->u.legacy.fmask.slice_tile_max)
- surf->u.legacy.fmask.slice_tile_max -= 1;
-
- surf->u.legacy.fmask.tiling_index = fout.tileIndex;
- surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight;
- surf->u.legacy.fmask.pitch_in_pixels = fout.pitch;
- surf->u.legacy.fmask.slice_size = fout.sliceSize;
-
- /* Compute tile swizzle for FMASK. */
- if (config->info.fmask_surf_index &&
- !(surf->flags & RADEON_SURF_SHAREABLE)) {
- ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
- ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
-
- xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
- xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
-
- /* This counter starts from 1 instead of 0. */
- xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
- xin.tileIndex = fout.tileIndex;
- xin.macroModeIndex = fout.macroModeIndex;
- xin.pTileInfo = fout.pTileInfo;
- xin.tileMode = fin.tileMode;
-
- int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
- if (r != ADDR_OK)
- return r;
-
- assert(xout.tileSwizzle <=
- u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
- surf->fmask_tile_swizzle = xout.tileSwizzle;
- }
- }
-
- /* Recalculate the whole DCC miptree size including disabled levels.
- * This is what addrlib does, but calling addrlib would be a lot more
- * complicated.
- */
- if (surf->dcc_size && config->info.levels > 1) {
- /* The smallest miplevels that are never compressed by DCC
- * still read the DCC buffer via TC if the base level uses DCC,
- * and for some reason the DCC buffer needs to be larger if
- * the miptree uses non-zero tile_swizzle. Otherwise there are
- * VM faults.
- *
- * "dcc_alignment * 4" was determined by trial and error.
- */
- surf->dcc_size = align64(surf->surf_size >> 8,
- surf->dcc_alignment * 4);
- }
-
- /* Make sure HTILE covers the whole miptree, because the shader reads
- * TC-compatible HTILE even for levels where it's disabled by DB.
- */
- if (surf->htile_size && config->info.levels > 1 &&
- surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
- /* MSAA can't occur with levels > 1, so ignore the sample count. */
- const unsigned total_pixels = surf->surf_size / surf->bpe;
- const unsigned htile_block_size = 8 * 8;
- const unsigned htile_element_size = 4;
-
- surf->htile_size = (total_pixels / htile_block_size) *
- htile_element_size;
- surf->htile_size = align(surf->htile_size, surf->htile_alignment);
- } else if (!surf->htile_size) {
- /* Unset this if HTILE is not present. */
- surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
- }
-
- surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
- surf->is_displayable = surf->is_linear ||
- surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
- surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
-
- /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
- * used at the same time. This case is not currently expected to occur
- * because we don't use rotated. Enforce this restriction on all chips
- * to facilitate testing.
- */
- if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
- assert(!"rotate micro tile mode is unsupported");
- return ADDR_ERROR;
- }
-
- ac_compute_cmask(info, config, surf);
- return 0;
+ unsigned level;
+ bool compressed;
+ ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
+ ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
+ ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
+ ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
+ ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
+ ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
+ ADDR_TILEINFO AddrTileInfoIn = {0};
+ ADDR_TILEINFO AddrTileInfoOut = {0};
+ int r;
+
+ AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
+ AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
+ AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
+ AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
+ AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
+ AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
+ AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
+
+ compressed = surf->blk_w == 4 && surf->blk_h == 4;
+
+ /* MSAA requires 2D tiling. */
+ if (config->info.samples > 1)
+ mode = RADEON_SURF_MODE_2D;
+
+ /* DB doesn't support linear layouts. */
+ if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D)
+ mode = RADEON_SURF_MODE_1D;
+
+ /* Set the requested tiling mode. */
+ switch (mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
+ break;
+ case RADEON_SURF_MODE_1D:
+ AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
+ break;
+ case RADEON_SURF_MODE_2D:
+ AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
+ break;
+ default:
+ assert(0);
+ }
+
+ /* The format must be set correctly for the allocation of compressed
+ * textures to work. In other cases, setting the bpp is sufficient.
+ */
+ if (compressed) {
+ switch (surf->bpe) {
+ case 8:
+ AddrSurfInfoIn.format = ADDR_FMT_BC1;
+ break;
+ case 16:
+ AddrSurfInfoIn.format = ADDR_FMT_BC3;
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
+ }
+
+ AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
+ AddrSurfInfoIn.tileIndex = -1;
+
+ if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
+ AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
+ }
+
+ /* Set the micro tile type. */
+ if (surf->flags & RADEON_SURF_SCANOUT)
+ AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
+ else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
+ AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
+ else
+ AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
+
+ AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
+ AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
+ AddrSurfInfoIn.flags.cube = config->is_cube;
+ AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
+ AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
+ AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
+
+ /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
+ * requested, because TC-compatible HTILE requires 2D tiling.
+ */
+ AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
+ !AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 &&
+ !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
+
+ /* DCC notes:
+ * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
+ * with samples >= 4.
+ * - Mipmapped array textures have low performance (discovered by a closed
+ * driver team).
+ */
+ AddrSurfInfoIn.flags.dccCompatible =
+ info->chip_class >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */
+ !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
+ !compressed &&
+ ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1);
+
+ AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
+ AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
+
+ /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
+ * for Z and stencil. This can cause a number of problems which we work
+ * around here:
+ *
+ * - a depth part that is incompatible with mipmapped texturing
+ * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
+ * incorrect tiling applied to the stencil part, stencil buffer
+ * memory accesses that go out of bounds) even without mipmapping
+ *
+ * Some piglit tests that are prone to different types of related
+ * failures:
+ * ./bin/ext_framebuffer_multisample-upsample 2 stencil
+ * ./bin/framebuffer-blit-levels {draw,read} stencil
+ * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
+ * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
+ * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
+ */
+ int stencil_tile_idx = -1;
+
+ if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
+ (config->info.levels > 1 || info->family == CHIP_STONEY)) {
+ /* Compute stencilTileIdx that is compatible with the (depth)
+ * tileIdx. This degrades the depth surface if necessary to
+ * ensure that a matching stencilTileIdx exists. */
+ AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
+
+ /* Keep the depth mip-tail compatible with texturing. */
+ AddrSurfInfoIn.flags.noStencil = 1;
+ }
+
+ /* Set preferred macrotile parameters. This is usually required
+ * for shared resources. This is for 2D tiling only. */
+ if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw &&
+ surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
+ /* If any of these parameters are incorrect, the calculation
+ * will fail. */
+ AddrTileInfoIn.banks = surf->u.legacy.num_banks;
+ AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
+ AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
+ AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
+ AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
+ AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
+ AddrSurfInfoIn.flags.opt4Space = 0;
+ AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
+
+ /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
+ * the tile index, because we are expected to know it if
+ * we know the other parameters.
+ *
+ * This is something that can easily be fixed in Addrlib.
+ * For now, just figure it out here.
+ * Note that only 2D_TILE_THIN1 is handled here.
+ */
+ assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
+ assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
+
+ if (info->chip_class == GFX6) {
+ if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
+ if (surf->bpe == 2)
+ AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
+ else
+ AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
+ } else {
+ if (surf->bpe == 1)
+ AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
+ else if (surf->bpe == 2)
+ AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
+ else if (surf->bpe == 4)
+ AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
+ else
+ AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
+ }
+ } else {
+ /* GFX7 - GFX8 */
+ if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
+ AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
+ else
+ AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
+
+ /* Addrlib doesn't set this if tileIndex is forced like above. */
+ AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
+ }
+ }
+
+ surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
+ surf->num_dcc_levels = 0;
+ surf->surf_size = 0;
+ surf->dcc_size = 0;
+ surf->dcc_alignment = 1;
+ surf->htile_size = 0;
+ surf->htile_slice_size = 0;
+ surf->htile_alignment = 1;
+
+ const bool only_stencil =
+ (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
+
+ /* Calculate texture layout information. */
+ if (!only_stencil) {
+ for (level = 0; level < config->info.levels; level++) {
+ r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn,
+ &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn,
+ &AddrHtileOut);
+ if (r)
+ return r;
+
+ if (level > 0)
+ continue;
+
+ if (!AddrSurfInfoOut.tcCompatible) {
+ AddrSurfInfoIn.flags.tcCompatible = 0;
+ surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
+ }
+
+ if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
+ AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
+ AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
+ stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
+
+ assert(stencil_tile_idx >= 0);
+ }
+
+ r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
+ if (r)
+ return r;
+ }
+ }
+
+ /* Calculate texture layout information for stencil. */
+ if (surf->flags & RADEON_SURF_SBUFFER) {
+ AddrSurfInfoIn.tileIndex = stencil_tile_idx;
+ AddrSurfInfoIn.bpp = 8;
+ AddrSurfInfoIn.flags.depth = 0;
+ AddrSurfInfoIn.flags.stencil = 1;
+ AddrSurfInfoIn.flags.tcCompatible = 0;
+ /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
+ AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
+
+ for (level = 0; level < config->info.levels; level++) {
+ r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn,
+ &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL);
+ if (r)
+ return r;
+
+ /* DB uses the depth pitch for both stencil and depth. */
+ if (!only_stencil) {
+ if (surf->u.legacy.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x)
+ surf->u.legacy.stencil_adjusted = true;
+ } else {
+ surf->u.legacy.level[level].nblk_x = surf->u.legacy.stencil_level[level].nblk_x;
+ }
+
+ if (level == 0) {
+ if (only_stencil) {
+ r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
+ if (r)
+ return r;
+ }
+
+ /* For 2D modes only. */
+ if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
+ surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
+ }
+ }
+ }
+ }
+
+ /* Compute FMASK. */
+ if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics &&
+ !(surf->flags & RADEON_SURF_NO_FMASK)) {
+ ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
+ ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
+ ADDR_TILEINFO fmask_tile_info = {};
+
+ fin.size = sizeof(fin);
+ fout.size = sizeof(fout);
+
+ fin.tileMode = AddrSurfInfoOut.tileMode;
+ fin.pitch = AddrSurfInfoOut.pitch;
+ fin.height = config->info.height;
+ fin.numSlices = AddrSurfInfoIn.numSlices;
+ fin.numSamples = AddrSurfInfoIn.numSamples;
+ fin.numFrags = AddrSurfInfoIn.numFrags;
+ fin.tileIndex = -1;
+ fout.pTileInfo = &fmask_tile_info;
+
+ r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
+ if (r)
+ return r;
+
+ surf->fmask_size = fout.fmaskBytes;
+ surf->fmask_alignment = fout.baseAlign;
+ surf->fmask_tile_swizzle = 0;
+
+ surf->u.legacy.fmask.slice_tile_max = (fout.pitch * fout.height) / 64;
+ if (surf->u.legacy.fmask.slice_tile_max)
+ surf->u.legacy.fmask.slice_tile_max -= 1;
+
+ surf->u.legacy.fmask.tiling_index = fout.tileIndex;
+ surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight;
+ surf->u.legacy.fmask.pitch_in_pixels = fout.pitch;
+ surf->u.legacy.fmask.slice_size = fout.sliceSize;
+
+ /* Compute tile swizzle for FMASK. */
+ if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) {
+ ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
+ ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
+
+ xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
+ xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
+
+ /* This counter starts from 1 instead of 0. */
+ xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
+ xin.tileIndex = fout.tileIndex;
+ xin.macroModeIndex = fout.macroModeIndex;
+ xin.pTileInfo = fout.pTileInfo;
+ xin.tileMode = fin.tileMode;
+
+ int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
+ if (r != ADDR_OK)
+ return r;
+
+ assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
+ surf->fmask_tile_swizzle = xout.tileSwizzle;
+ }
+ }
+
+ /* Recalculate the whole DCC miptree size including disabled levels.
+ * This is what addrlib does, but calling addrlib would be a lot more
+ * complicated.
+ */
+ if (surf->dcc_size && config->info.levels > 1) {
+ /* The smallest miplevels that are never compressed by DCC
+ * still read the DCC buffer via TC if the base level uses DCC,
+ * and for some reason the DCC buffer needs to be larger if
+ * the miptree uses non-zero tile_swizzle. Otherwise there are
+ * VM faults.
+ *
+ * "dcc_alignment * 4" was determined by trial and error.
+ */
+ surf->dcc_size = align64(surf->surf_size >> 8, surf->dcc_alignment * 4);
+ }
+
+ /* Make sure HTILE covers the whole miptree, because the shader reads
+ * TC-compatible HTILE even for levels where it's disabled by DB.
+ */
+ if (surf->htile_size && config->info.levels > 1 &&
+ surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
+ /* MSAA can't occur with levels > 1, so ignore the sample count. */
+ const unsigned total_pixels = surf->surf_size / surf->bpe;
+ const unsigned htile_block_size = 8 * 8;
+ const unsigned htile_element_size = 4;
+
+ surf->htile_size = (total_pixels / htile_block_size) * htile_element_size;
+ surf->htile_size = align(surf->htile_size, surf->htile_alignment);
+ } else if (!surf->htile_size) {
+ /* Unset this if HTILE is not present. */
+ surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
+ }
+
+ surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
+ surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
+ surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
+
+ /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
+ * used at the same time. This case is not currently expected to occur
+ * because we don't use rotated. Enforce this restriction on all chips
+ * to facilitate testing.
+ */
+ if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
+ assert(!"rotate micro tile mode is unsupported");
+ return ADDR_ERROR;
+ }
+
+ ac_compute_cmask(info, config, surf);
+ return 0;
}
/* This is only called when expecting a tiled layout. */
-static int
-gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
- struct radeon_surf *surf,
- ADDR2_COMPUTE_SURFACE_INFO_INPUT *in,
- bool is_fmask, AddrSwizzleMode *swizzle_mode)
+static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, struct radeon_surf *surf,
+ ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask,
+ AddrSwizzleMode *swizzle_mode)
{
- ADDR_E_RETURNCODE ret;
- ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
- ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
-
- sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
- sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
-
- sin.flags = in->flags;
- sin.resourceType = in->resourceType;
- sin.format = in->format;
- sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
- /* TODO: We could allow some of these: */
- sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
- sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */
- sin.bpp = in->bpp;
- sin.width = in->width;
- sin.height = in->height;
- sin.numSlices = in->numSlices;
- sin.numMipLevels = in->numMipLevels;
- sin.numSamples = in->numSamples;
- sin.numFrags = in->numFrags;
-
- if (is_fmask) {
- sin.flags.display = 0;
- sin.flags.color = 0;
- sin.flags.fmask = 1;
- }
-
- if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
- sin.forbiddenBlock.linear = 1;
-
- if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
- sin.preferredSwSet.sw_D = 1;
- else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
- sin.preferredSwSet.sw_S = 1;
- else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
- sin.preferredSwSet.sw_Z = 1;
- else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
- sin.preferredSwSet.sw_R = 1;
- }
-
- ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
- if (ret != ADDR_OK)
- return ret;
-
- *swizzle_mode = sout.swizzleMode;
- return 0;
+ ADDR_E_RETURNCODE ret;
+ ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
+ ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
+
+ sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
+ sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
+
+ sin.flags = in->flags;
+ sin.resourceType = in->resourceType;
+ sin.format = in->format;
+ sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
+ /* TODO: We could allow some of these: */
+ sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
+ sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */
+ sin.bpp = in->bpp;
+ sin.width = in->width;
+ sin.height = in->height;
+ sin.numSlices = in->numSlices;
+ sin.numMipLevels = in->numMipLevels;
+ sin.numSamples = in->numSamples;
+ sin.numFrags = in->numFrags;
+
+ if (is_fmask) {
+ sin.flags.display = 0;
+ sin.flags.color = 0;
+ sin.flags.fmask = 1;
+ }
+
+ if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
+ sin.forbiddenBlock.linear = 1;
+
+ if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
+ sin.preferredSwSet.sw_D = 1;
+ else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
+ sin.preferredSwSet.sw_S = 1;
+ else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
+ sin.preferredSwSet.sw_Z = 1;
+ else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
+ sin.preferredSwSet.sw_R = 1;
+ }
+
+ ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ *swizzle_mode = sout.swizzleMode;
+ return 0;
}
static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
{
- if (info->chip_class >= GFX10)
- return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
+ if (info->chip_class >= GFX10)
+ return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
- return sw_mode != ADDR_SW_LINEAR;
+ return sw_mode != ADDR_SW_LINEAR;
}
ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
- const struct radeon_surf *surf)
+ const struct radeon_surf *surf)
{
- if (info->chip_class <= GFX9) {
- /* Only independent 64B blocks are supported. */
- return surf->u.gfx9.dcc.independent_64B_blocks &&
- !surf->u.gfx9.dcc.independent_128B_blocks &&
- surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
- }
-
- if (info->family == CHIP_NAVI10) {
- /* Only independent 128B blocks are supported. */
- return !surf->u.gfx9.dcc.independent_64B_blocks &&
- surf->u.gfx9.dcc.independent_128B_blocks &&
- surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
- }
-
- if (info->family == CHIP_NAVI12 ||
- info->family == CHIP_NAVI14) {
- /* Either 64B or 128B can be used, but not both.
- * If 64B is used, DCC image stores are unsupported.
- */
- return surf->u.gfx9.dcc.independent_64B_blocks !=
- surf->u.gfx9.dcc.independent_128B_blocks &&
- (!surf->u.gfx9.dcc.independent_64B_blocks ||
- surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) &&
- (!surf->u.gfx9.dcc.independent_128B_blocks ||
- surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B);
- }
-
- /* 128B is recommended, but 64B can be set too if needed for 4K by DCN.
- * Since there is no reason to ever disable 128B, require it.
- * DCC image stores are always supported.
- */
- return surf->u.gfx9.dcc.independent_128B_blocks &&
- surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
+ if (info->chip_class <= GFX9) {
+ /* Only independent 64B blocks are supported. */
+ return surf->u.gfx9.dcc.independent_64B_blocks && !surf->u.gfx9.dcc.independent_128B_blocks &&
+ surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
+ }
+
+ if (info->family == CHIP_NAVI10) {
+ /* Only independent 128B blocks are supported. */
+ return !surf->u.gfx9.dcc.independent_64B_blocks && surf->u.gfx9.dcc.independent_128B_blocks &&
+ surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
+ }
+
+ if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
+ /* Either 64B or 128B can be used, but not both.
+ * If 64B is used, DCC image stores are unsupported.
+ */
+ return surf->u.gfx9.dcc.independent_64B_blocks != surf->u.gfx9.dcc.independent_128B_blocks &&
+ (!surf->u.gfx9.dcc.independent_64B_blocks ||
+ surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) &&
+ (!surf->u.gfx9.dcc.independent_128B_blocks ||
+ surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B);
+ }
+
+ /* 128B is recommended, but 64B can be set too if needed for 4K by DCN.
+ * Since there is no reason to ever disable 128B, require it.
+ * DCC image stores are always supported.
+ */
+ return surf->u.gfx9.dcc.independent_128B_blocks &&
+ surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
}
static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
- const struct ac_surf_config *config,
- const struct radeon_surf *surf,
- bool rb_aligned, bool pipe_aligned)
+ const struct ac_surf_config *config,
+ const struct radeon_surf *surf, bool rb_aligned,
+ bool pipe_aligned)
{
- if (!info->use_display_dcc_unaligned &&
- !info->use_display_dcc_with_retile_blit)
- return false;
-
- /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
- if (surf->bpe != 4)
- return false;
-
- /* Handle unaligned DCC. */
- if (info->use_display_dcc_unaligned &&
- (rb_aligned || pipe_aligned))
- return false;
-
- switch (info->chip_class) {
- case GFX9:
- /* There are more constraints, but we always set
- * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
- * which always works.
- */
- assert(surf->u.gfx9.dcc.independent_64B_blocks &&
- surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
- return true;
- case GFX10:
- case GFX10_3:
- /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
- if (info->chip_class == GFX10 &&
- surf->u.gfx9.dcc.independent_128B_blocks)
- return false;
-
- /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1. */
- return ((config->info.width <= 2560 &&
- config->info.height <= 2560) ||
- (surf->u.gfx9.dcc.independent_64B_blocks &&
- surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
- default:
- unreachable("unhandled chip");
- return false;
- }
+ if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit)
+ return false;
+
+ /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
+ if (surf->bpe != 4)
+ return false;
+
+ /* Handle unaligned DCC. */
+ if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned))
+ return false;
+
+ switch (info->chip_class) {
+ case GFX9:
+ /* There are more constraints, but we always set
+ * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
+ * which always works.
+ */
+ assert(surf->u.gfx9.dcc.independent_64B_blocks &&
+ surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
+ return true;
+ case GFX10:
+ case GFX10_3:
+ /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
+ if (info->chip_class == GFX10 && surf->u.gfx9.dcc.independent_128B_blocks)
+ return false;
+
+ /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1. */
+ return ((config->info.width <= 2560 && config->info.height <= 2560) ||
+ (surf->u.gfx9.dcc.independent_64B_blocks &&
+ surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
+ default:
+ unreachable("unhandled chip");
+ return false;
+ }
}
-static int gfx9_compute_miptree(struct ac_addrlib *addrlib,
- const struct radeon_info *info,
- const struct ac_surf_config *config,
- struct radeon_surf *surf, bool compressed,
- ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
+static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info,
+ const struct ac_surf_config *config, struct radeon_surf *surf,
+ bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
{
- ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {};
- ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
- ADDR_E_RETURNCODE ret;
-
- out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
- out.pMipInfo = mip_info;
-
- ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
- if (ret != ADDR_OK)
- return ret;
-
- if (in->flags.stencil) {
- surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode;
- surf->u.gfx9.stencil.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 :
- out.mipChainPitch - 1;
- surf->surf_alignment = MAX2(surf->surf_alignment, out.baseAlign);
- surf->u.gfx9.stencil_offset = align(surf->surf_size, out.baseAlign);
- surf->surf_size = surf->u.gfx9.stencil_offset + out.surfSize;
- return 0;
- }
-
- surf->u.gfx9.surf.swizzle_mode = in->swizzleMode;
- surf->u.gfx9.surf.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 :
- out.mipChainPitch - 1;
-
- /* CMASK fast clear uses these even if FMASK isn't allocated.
- * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
- */
- surf->u.gfx9.fmask.swizzle_mode = surf->u.gfx9.surf.swizzle_mode & ~0x3;
- surf->u.gfx9.fmask.epitch = surf->u.gfx9.surf.epitch;
-
- surf->u.gfx9.surf_slice_size = out.sliceSize;
- surf->u.gfx9.surf_pitch = out.pitch;
- surf->u.gfx9.surf_height = out.height;
- surf->surf_size = out.surfSize;
- surf->surf_alignment = out.baseAlign;
-
- if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
- surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR) {
- /* Adjust surf_pitch to be in elements units not in pixels */
- surf->u.gfx9.surf_pitch =
- align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);
- surf->u.gfx9.surf.epitch = MAX2(surf->u.gfx9.surf.epitch,
- surf->u.gfx9.surf_pitch * surf->blk_w - 1);
- /* The surface is really a surf->bpe bytes per pixel surface even if we
- * use it as a surf->bpe bytes per element one.
- * Adjust surf_slice_size and surf_size to reflect the change
- * made to surf_pitch.
- */
- surf->u.gfx9.surf_slice_size = MAX2(
- surf->u.gfx9.surf_slice_size,
- surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
- surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
- }
-
- if (in->swizzleMode == ADDR_SW_LINEAR) {
- for (unsigned i = 0; i < in->numMipLevels; i++) {
- surf->u.gfx9.offset[i] = mip_info[i].offset;
- surf->u.gfx9.pitch[i] = mip_info[i].pitch;
- }
- }
-
- if (in->flags.depth) {
- assert(in->swizzleMode != ADDR_SW_LINEAR);
-
- if (surf->flags & RADEON_SURF_NO_HTILE)
- return 0;
-
- /* HTILE */
- ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
- ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
-
- hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
- hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
-
- assert(in->flags.metaPipeUnaligned == 0);
- assert(in->flags.metaRbUnaligned == 0);
-
- hin.hTileFlags.pipeAligned = 1;
- hin.hTileFlags.rbAligned = 1;
- hin.depthFlags = in->flags;
- hin.swizzleMode = in->swizzleMode;
- hin.unalignedWidth = in->width;
- hin.unalignedHeight = in->height;
- hin.numSlices = in->numSlices;
- hin.numMipLevels = in->numMipLevels;
- hin.firstMipIdInTail = out.firstMipIdInTail;
-
- ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
- if (ret != ADDR_OK)
- return ret;
-
- surf->htile_size = hout.htileBytes;
- surf->htile_slice_size = hout.sliceSize;
- surf->htile_alignment = hout.baseAlign;
- return 0;
- }
-
- {
- /* Compute tile swizzle for the color surface.
- * All *_X and *_T modes can use the swizzle.
- */
- if (config->info.surf_index &&
- in->swizzleMode >= ADDR_SW_64KB_Z_T &&
- !out.mipChainInTail &&
- !(surf->flags & RADEON_SURF_SHAREABLE) &&
- !in->flags.display) {
- ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
- ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
-
- xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
- xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
-
- xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
- xin.flags = in->flags;
- xin.swizzleMode = in->swizzleMode;
- xin.resourceType = in->resourceType;
- xin.format = in->format;
- xin.numSamples = in->numSamples;
- xin.numFrags = in->numFrags;
-
- ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
- if (ret != ADDR_OK)
- return ret;
-
- assert(xout.pipeBankXor <=
- u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
- surf->tile_swizzle = xout.pipeBankXor;
- }
-
- /* DCC */
- if (info->has_graphics &&
- !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
- !compressed &&
- is_dcc_supported_by_CB(info, in->swizzleMode) &&
- (!in->flags.display ||
- is_dcc_supported_by_DCN(info, config, surf,
- !in->flags.metaRbUnaligned,
- !in->flags.metaPipeUnaligned))) {
- ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
- ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
- ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
-
- din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
- dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
- dout.pMipInfo = meta_mip_info;
-
- din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
- din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
- din.resourceType = in->resourceType;
- din.swizzleMode = in->swizzleMode;
- din.bpp = in->bpp;
- din.unalignedWidth = in->width;
- din.unalignedHeight = in->height;
- din.numSlices = in->numSlices;
- din.numFrags = in->numFrags;
- din.numMipLevels = in->numMipLevels;
- din.dataSurfaceSize = out.surfSize;
- din.firstMipIdInTail = out.firstMipIdInTail;
-
- ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
- if (ret != ADDR_OK)
- return ret;
-
- surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
- surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
- surf->u.gfx9.dcc_block_width = dout.compressBlkWidth;
- surf->u.gfx9.dcc_block_height = dout.compressBlkHeight;
- surf->u.gfx9.dcc_block_depth = dout.compressBlkDepth;
- surf->dcc_size = dout.dccRamSize;
- surf->dcc_alignment = dout.dccRamBaseAlign;
- surf->num_dcc_levels = in->numMipLevels;
-
- /* Disable DCC for levels that are in the mip tail.
- *
- * There are two issues that this is intended to
- * address:
- *
- * 1. Multiple mip levels may share a cache line. This
- * can lead to corruption when switching between
- * rendering to different mip levels because the
- * RBs don't maintain coherency.
- *
- * 2. Texturing with metadata after rendering sometimes
- * fails with corruption, probably for a similar
- * reason.
- *
- * Working around these issues for all levels in the
- * mip tail may be overly conservative, but it's what
- * Vulkan does.
- *
- * Alternative solutions that also work but are worse:
- * - Disable DCC entirely.
- * - Flush TC L2 after rendering.
- */
- for (unsigned i = 0; i < in->numMipLevels; i++) {
- if (meta_mip_info[i].inMiptail) {
- /* GFX10 can only compress the first level
- * in the mip tail.
- *
- * TODO: Try to do the same thing for gfx9
- * if there are no regressions.
- */
- if (info->chip_class >= GFX10)
- surf->num_dcc_levels = i + 1;
- else
- surf->num_dcc_levels = i;
- break;
- }
- }
-
- if (!surf->num_dcc_levels)
- surf->dcc_size = 0;
-
- surf->u.gfx9.display_dcc_size = surf->dcc_size;
- surf->u.gfx9.display_dcc_alignment = surf->dcc_alignment;
- surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
-
- /* Compute displayable DCC. */
- if (in->flags.display &&
- surf->num_dcc_levels &&
- info->use_display_dcc_with_retile_blit) {
- /* Compute displayable DCC info. */
- din.dccKeyFlags.pipeAligned = 0;
- din.dccKeyFlags.rbAligned = 0;
-
- assert(din.numSlices == 1);
- assert(din.numMipLevels == 1);
- assert(din.numFrags == 1);
- assert(surf->tile_swizzle == 0);
- assert(surf->u.gfx9.dcc.pipe_aligned ||
- surf->u.gfx9.dcc.rb_aligned);
-
- ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
- if (ret != ADDR_OK)
- return ret;
-
- surf->u.gfx9.display_dcc_size = dout.dccRamSize;
- surf->u.gfx9.display_dcc_alignment = dout.dccRamBaseAlign;
- surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
- assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size);
-
- surf->u.gfx9.dcc_retile_use_uint16 =
- surf->u.gfx9.display_dcc_size <= UINT16_MAX + 1 &&
- surf->dcc_size <= UINT16_MAX + 1;
-
- /* Align the retile map size to get more hash table hits and
- * decrease the maximum memory footprint when all retile maps
- * are cached in the hash table.
- */
- unsigned retile_dim[2] = {in->width, in->height};
-
- for (unsigned i = 0; i < 2; i++) {
- /* Increase the alignment as the size increases.
- * Greater alignment increases retile compute work,
- * but decreases maximum memory footprint for the cache.
- *
- * With this alignment, the worst case memory footprint of
- * the cache is:
- * 1920x1080: 55 MB
- * 2560x1440: 99 MB
- * 3840x2160: 305 MB
- *
- * The worst case size in MB can be computed in Haskell as follows:
- * (sum (map get_retile_size (map get_dcc_size (deduplicate (map align_pair
- * [(i*16,j*16) | i <- [1..maxwidth`div`16], j <- [1..maxheight`div`16]]))))) `div` 1024^2
- * where
- * alignment x = if x <= 512 then 16 else if x <= 1024 then 32 else if x <= 2048 then 64 else 128
- * align x = (x + (alignment x) - 1) `div` (alignment x) * (alignment x)
- * align_pair e = (align (fst e), align (snd e))
- * deduplicate = map head . groupBy (\ a b -> ((fst a) == (fst b)) && ((snd a) == (snd b))) . sortBy compare
- * get_dcc_size e = ((fst e) * (snd e) * bpp) `div` 256
- * get_retile_size dcc_size = dcc_size * 2 * (if dcc_size <= 2^16 then 2 else 4)
- * bpp = 4; maxwidth = 3840; maxheight = 2160
- */
- if (retile_dim[i] <= 512)
- retile_dim[i] = align(retile_dim[i], 16);
- else if (retile_dim[i] <= 1024)
- retile_dim[i] = align(retile_dim[i], 32);
- else if (retile_dim[i] <= 2048)
- retile_dim[i] = align(retile_dim[i], 64);
- else
- retile_dim[i] = align(retile_dim[i], 128);
-
- /* Don't align more than the DCC pixel alignment. */
- assert(dout.metaBlkWidth >= 128 && dout.metaBlkHeight >= 128);
- }
-
- surf->u.gfx9.dcc_retile_num_elements =
- DIV_ROUND_UP(retile_dim[0], dout.compressBlkWidth) *
- DIV_ROUND_UP(retile_dim[1], dout.compressBlkHeight) * 2;
- /* Align the size to 4 (for the compute shader). */
- surf->u.gfx9.dcc_retile_num_elements =
- align(surf->u.gfx9.dcc_retile_num_elements, 4);
-
- if (!(surf->flags & RADEON_SURF_IMPORTED)) {
- /* Compute address mapping from non-displayable to displayable DCC. */
- ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
- memset(&addrin, 0, sizeof(addrin));
- addrin.size = sizeof(addrin);
- addrin.swizzleMode = din.swizzleMode;
- addrin.resourceType = din.resourceType;
- addrin.bpp = din.bpp;
- addrin.numSlices = 1;
- addrin.numMipLevels = 1;
- addrin.numFrags = 1;
- addrin.pitch = dout.pitch;
- addrin.height = dout.height;
- addrin.compressBlkWidth = dout.compressBlkWidth;
- addrin.compressBlkHeight = dout.compressBlkHeight;
- addrin.compressBlkDepth = dout.compressBlkDepth;
- addrin.metaBlkWidth = dout.metaBlkWidth;
- addrin.metaBlkHeight = dout.metaBlkHeight;
- addrin.metaBlkDepth = dout.metaBlkDepth;
- addrin.dccRamSliceSize = 0; /* Don't care for non-layered images. */
-
- surf->u.gfx9.dcc_retile_map =
- ac_compute_dcc_retile_map(addrlib, info,
- retile_dim[0], retile_dim[1],
- surf->u.gfx9.dcc.rb_aligned,
- surf->u.gfx9.dcc.pipe_aligned,
- surf->u.gfx9.dcc_retile_use_uint16,
- surf->u.gfx9.dcc_retile_num_elements,
- &addrin);
- if (!surf->u.gfx9.dcc_retile_map)
- return ADDR_OUTOFMEMORY;
- }
- }
- }
-
- /* FMASK */
- if (in->numSamples > 1 && info->has_graphics &&
- !(surf->flags & RADEON_SURF_NO_FMASK)) {
- ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
- ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
-
- fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
- fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
-
- ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, in,
- true, &fin.swizzleMode);
- if (ret != ADDR_OK)
- return ret;
-
- fin.unalignedWidth = in->width;
- fin.unalignedHeight = in->height;
- fin.numSlices = in->numSlices;
- fin.numSamples = in->numSamples;
- fin.numFrags = in->numFrags;
-
- ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
- if (ret != ADDR_OK)
- return ret;
-
- surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode;
- surf->u.gfx9.fmask.epitch = fout.pitch - 1;
- surf->fmask_size = fout.fmaskBytes;
- surf->fmask_alignment = fout.baseAlign;
-
- /* Compute tile swizzle for the FMASK surface. */
- if (config->info.fmask_surf_index &&
- fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
- !(surf->flags & RADEON_SURF_SHAREABLE)) {
- ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
- ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
-
- xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
- xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
-
- /* This counter starts from 1 instead of 0. */
- xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
- xin.flags = in->flags;
- xin.swizzleMode = fin.swizzleMode;
- xin.resourceType = in->resourceType;
- xin.format = in->format;
- xin.numSamples = in->numSamples;
- xin.numFrags = in->numFrags;
-
- ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
- if (ret != ADDR_OK)
- return ret;
-
- assert(xout.pipeBankXor <=
- u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
- surf->fmask_tile_swizzle = xout.pipeBankXor;
- }
- }
-
- /* CMASK -- on GFX10 only for FMASK */
- if (in->swizzleMode != ADDR_SW_LINEAR &&
- in->resourceType == ADDR_RSRC_TEX_2D &&
- ((info->chip_class <= GFX9 &&
- in->numSamples == 1 &&
- in->flags.metaPipeUnaligned == 0 &&
- in->flags.metaRbUnaligned == 0) ||
- (surf->fmask_size && in->numSamples >= 2))) {
- ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
- ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
-
- cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
- cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
-
- assert(in->flags.metaPipeUnaligned == 0);
- assert(in->flags.metaRbUnaligned == 0);
-
- cin.cMaskFlags.pipeAligned = 1;
- cin.cMaskFlags.rbAligned = 1;
- cin.resourceType = in->resourceType;
- cin.unalignedWidth = in->width;
- cin.unalignedHeight = in->height;
- cin.numSlices = in->numSlices;
-
- if (in->numSamples > 1)
- cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode;
- else
- cin.swizzleMode = in->swizzleMode;
-
- ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
- if (ret != ADDR_OK)
- return ret;
-
- surf->cmask_size = cout.cmaskBytes;
- surf->cmask_alignment = cout.baseAlign;
- }
- }
-
- return 0;
+ ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {};
+ ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
+ ADDR_E_RETURNCODE ret;
+
+ out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
+ out.pMipInfo = mip_info;
+
+ ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
+ if (ret != ADDR_OK)
+ return ret;
+
+ if (in->flags.stencil) {
+ surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode;
+ surf->u.gfx9.stencil.epitch =
+ out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
+ surf->surf_alignment = MAX2(surf->surf_alignment, out.baseAlign);
+ surf->u.gfx9.stencil_offset = align(surf->surf_size, out.baseAlign);
+ surf->surf_size = surf->u.gfx9.stencil_offset + out.surfSize;
+ return 0;
+ }
+
+ surf->u.gfx9.surf.swizzle_mode = in->swizzleMode;
+ surf->u.gfx9.surf.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
+
+ /* CMASK fast clear uses these even if FMASK isn't allocated.
+ * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
+ */
+ surf->u.gfx9.fmask.swizzle_mode = surf->u.gfx9.surf.swizzle_mode & ~0x3;
+ surf->u.gfx9.fmask.epitch = surf->u.gfx9.surf.epitch;
+
+ surf->u.gfx9.surf_slice_size = out.sliceSize;
+ surf->u.gfx9.surf_pitch = out.pitch;
+ surf->u.gfx9.surf_height = out.height;
+ surf->surf_size = out.surfSize;
+ surf->surf_alignment = out.baseAlign;
+
+ if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
+ surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR) {
+ /* Adjust surf_pitch to be in elements units not in pixels */
+ surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);
+ surf->u.gfx9.surf.epitch =
+ MAX2(surf->u.gfx9.surf.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);
+ /* The surface is really a surf->bpe bytes per pixel surface even if we
+ * use it as a surf->bpe bytes per element one.
+ * Adjust surf_slice_size and surf_size to reflect the change
+ * made to surf_pitch.
+ */
+ surf->u.gfx9.surf_slice_size =
+ MAX2(surf->u.gfx9.surf_slice_size,
+ surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
+ surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
+ }
+
+ if (in->swizzleMode == ADDR_SW_LINEAR) {
+ for (unsigned i = 0; i < in->numMipLevels; i++) {
+ surf->u.gfx9.offset[i] = mip_info[i].offset;
+ surf->u.gfx9.pitch[i] = mip_info[i].pitch;
+ }
+ }
+
+ if (in->flags.depth) {
+ assert(in->swizzleMode != ADDR_SW_LINEAR);
+
+ if (surf->flags & RADEON_SURF_NO_HTILE)
+ return 0;
+
+ /* HTILE */
+ ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
+ ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
+
+ hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
+ hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
+
+ assert(in->flags.metaPipeUnaligned == 0);
+ assert(in->flags.metaRbUnaligned == 0);
+
+ hin.hTileFlags.pipeAligned = 1;
+ hin.hTileFlags.rbAligned = 1;
+ hin.depthFlags = in->flags;
+ hin.swizzleMode = in->swizzleMode;
+ hin.unalignedWidth = in->width;
+ hin.unalignedHeight = in->height;
+ hin.numSlices = in->numSlices;
+ hin.numMipLevels = in->numMipLevels;
+ hin.firstMipIdInTail = out.firstMipIdInTail;
+
+ ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ surf->htile_size = hout.htileBytes;
+ surf->htile_slice_size = hout.sliceSize;
+ surf->htile_alignment = hout.baseAlign;
+ return 0;
+ }
+
+ {
+ /* Compute tile swizzle for the color surface.
+ * All *_X and *_T modes can use the swizzle.
+ */
+ if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail &&
+ !(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) {
+ ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
+ ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
+
+ xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
+ xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
+
+ xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
+ xin.flags = in->flags;
+ xin.swizzleMode = in->swizzleMode;
+ xin.resourceType = in->resourceType;
+ xin.format = in->format;
+ xin.numSamples = in->numSamples;
+ xin.numFrags = in->numFrags;
+
+ ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
+ surf->tile_swizzle = xout.pipeBankXor;
+ }
+
+ /* DCC */
+ if (info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed &&
+ is_dcc_supported_by_CB(info, in->swizzleMode) &&
+ (!in->flags.display ||
+ is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned,
+ !in->flags.metaPipeUnaligned))) {
+ ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
+ ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
+ ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
+
+ din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
+ dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
+ dout.pMipInfo = meta_mip_info;
+
+ din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+ din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
+ din.resourceType = in->resourceType;
+ din.swizzleMode = in->swizzleMode;
+ din.bpp = in->bpp;
+ din.unalignedWidth = in->width;
+ din.unalignedHeight = in->height;
+ din.numSlices = in->numSlices;
+ din.numFrags = in->numFrags;
+ din.numMipLevels = in->numMipLevels;
+ din.dataSurfaceSize = out.surfSize;
+ din.firstMipIdInTail = out.firstMipIdInTail;
+
+ ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
+ surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
+ surf->u.gfx9.dcc_block_width = dout.compressBlkWidth;
+ surf->u.gfx9.dcc_block_height = dout.compressBlkHeight;
+ surf->u.gfx9.dcc_block_depth = dout.compressBlkDepth;
+ surf->dcc_size = dout.dccRamSize;
+ surf->dcc_alignment = dout.dccRamBaseAlign;
+ surf->num_dcc_levels = in->numMipLevels;
+
+ /* Disable DCC for levels that are in the mip tail.
+ *
+ * There are two issues that this is intended to
+ * address:
+ *
+ * 1. Multiple mip levels may share a cache line. This
+ * can lead to corruption when switching between
+ * rendering to different mip levels because the
+ * RBs don't maintain coherency.
+ *
+ * 2. Texturing with metadata after rendering sometimes
+ * fails with corruption, probably for a similar
+ * reason.
+ *
+ * Working around these issues for all levels in the
+ * mip tail may be overly conservative, but it's what
+ * Vulkan does.
+ *
+ * Alternative solutions that also work but are worse:
+ * - Disable DCC entirely.
+ * - Flush TC L2 after rendering.
+ */
+ for (unsigned i = 0; i < in->numMipLevels; i++) {
+ if (meta_mip_info[i].inMiptail) {
+ /* GFX10 can only compress the first level
+ * in the mip tail.
+ *
+ * TODO: Try to do the same thing for gfx9
+ * if there are no regressions.
+ */
+ if (info->chip_class >= GFX10)
+ surf->num_dcc_levels = i + 1;
+ else
+ surf->num_dcc_levels = i;
+ break;
+ }
+ }
+
+ if (!surf->num_dcc_levels)
+ surf->dcc_size = 0;
+
+ surf->u.gfx9.display_dcc_size = surf->dcc_size;
+ surf->u.gfx9.display_dcc_alignment = surf->dcc_alignment;
+ surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
+
+ /* Compute displayable DCC. */
+ if (in->flags.display && surf->num_dcc_levels && info->use_display_dcc_with_retile_blit) {
+ /* Compute displayable DCC info. */
+ din.dccKeyFlags.pipeAligned = 0;
+ din.dccKeyFlags.rbAligned = 0;
+
+ assert(din.numSlices == 1);
+ assert(din.numMipLevels == 1);
+ assert(din.numFrags == 1);
+ assert(surf->tile_swizzle == 0);
+ assert(surf->u.gfx9.dcc.pipe_aligned || surf->u.gfx9.dcc.rb_aligned);
+
+ ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ surf->u.gfx9.display_dcc_size = dout.dccRamSize;
+ surf->u.gfx9.display_dcc_alignment = dout.dccRamBaseAlign;
+ surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
+ assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size);
+
+ surf->u.gfx9.dcc_retile_use_uint16 =
+ surf->u.gfx9.display_dcc_size <= UINT16_MAX + 1 && surf->dcc_size <= UINT16_MAX + 1;
+
+ /* Align the retile map size to get more hash table hits and
+ * decrease the maximum memory footprint when all retile maps
+ * are cached in the hash table.
+ */
+ unsigned retile_dim[2] = {in->width, in->height};
+
+ for (unsigned i = 0; i < 2; i++) {
+ /* Increase the alignment as the size increases.
+ * Greater alignment increases retile compute work,
+ * but decreases maximum memory footprint for the cache.
+ *
+ * With this alignment, the worst case memory footprint of
+ * the cache is:
+ * 1920x1080: 55 MB
+ * 2560x1440: 99 MB
+ * 3840x2160: 305 MB
+ *
+ * The worst case size in MB can be computed in Haskell as follows:
+ * (sum (map get_retile_size (map get_dcc_size (deduplicate (map align_pair
+ * [(i*16,j*16) | i <- [1..maxwidth`div`16], j <- [1..maxheight`div`16]])))))
+ * `div` 1024^2 where alignment x = if x <= 512 then 16 else if x <= 1024 then 32
+ * else if x <= 2048 then 64 else 128 align x = (x + (alignment x) - 1) `div`
+ * (alignment x) * (alignment x) align_pair e = (align (fst e), align (snd e))
+ * deduplicate = map head . groupBy (\ a b -> ((fst a) == (fst b)) && ((snd a)
+ * == (snd b))) . sortBy compare get_dcc_size e = ((fst e) * (snd e) * bpp) `div` 256
+ * get_retile_size dcc_size = dcc_size * 2 * (if dcc_size <= 2^16 then 2 else
+ * 4) bpp = 4; maxwidth = 3840; maxheight = 2160
+ */
+ if (retile_dim[i] <= 512)
+ retile_dim[i] = align(retile_dim[i], 16);
+ else if (retile_dim[i] <= 1024)
+ retile_dim[i] = align(retile_dim[i], 32);
+ else if (retile_dim[i] <= 2048)
+ retile_dim[i] = align(retile_dim[i], 64);
+ else
+ retile_dim[i] = align(retile_dim[i], 128);
+
+ /* Don't align more than the DCC pixel alignment. */
+ assert(dout.metaBlkWidth >= 128 && dout.metaBlkHeight >= 128);
+ }
+
+ surf->u.gfx9.dcc_retile_num_elements =
+ DIV_ROUND_UP(retile_dim[0], dout.compressBlkWidth) *
+ DIV_ROUND_UP(retile_dim[1], dout.compressBlkHeight) * 2;
+ /* Align the size to 4 (for the compute shader). */
+ surf->u.gfx9.dcc_retile_num_elements = align(surf->u.gfx9.dcc_retile_num_elements, 4);
+
+ if (!(surf->flags & RADEON_SURF_IMPORTED)) {
+ /* Compute address mapping from non-displayable to displayable DCC. */
+ ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
+ memset(&addrin, 0, sizeof(addrin));
+ addrin.size = sizeof(addrin);
+ addrin.swizzleMode = din.swizzleMode;
+ addrin.resourceType = din.resourceType;
+ addrin.bpp = din.bpp;
+ addrin.numSlices = 1;
+ addrin.numMipLevels = 1;
+ addrin.numFrags = 1;
+ addrin.pitch = dout.pitch;
+ addrin.height = dout.height;
+ addrin.compressBlkWidth = dout.compressBlkWidth;
+ addrin.compressBlkHeight = dout.compressBlkHeight;
+ addrin.compressBlkDepth = dout.compressBlkDepth;
+ addrin.metaBlkWidth = dout.metaBlkWidth;
+ addrin.metaBlkHeight = dout.metaBlkHeight;
+ addrin.metaBlkDepth = dout.metaBlkDepth;
+ addrin.dccRamSliceSize = 0; /* Don't care for non-layered images. */
+
+ surf->u.gfx9.dcc_retile_map = ac_compute_dcc_retile_map(
+ addrlib, info, retile_dim[0], retile_dim[1], surf->u.gfx9.dcc.rb_aligned,
+ surf->u.gfx9.dcc.pipe_aligned, surf->u.gfx9.dcc_retile_use_uint16,
+ surf->u.gfx9.dcc_retile_num_elements, &addrin);
+ if (!surf->u.gfx9.dcc_retile_map)
+ return ADDR_OUTOFMEMORY;
+ }
+ }
+ }
+
+ /* FMASK */
+ if (in->numSamples > 1 && info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {
+ ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
+ ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
+
+ fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
+ fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
+
+ ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, in, true, &fin.swizzleMode);
+ if (ret != ADDR_OK)
+ return ret;
+
+ fin.unalignedWidth = in->width;
+ fin.unalignedHeight = in->height;
+ fin.numSlices = in->numSlices;
+ fin.numSamples = in->numSamples;
+ fin.numFrags = in->numFrags;
+
+ ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode;
+ surf->u.gfx9.fmask.epitch = fout.pitch - 1;
+ surf->fmask_size = fout.fmaskBytes;
+ surf->fmask_alignment = fout.baseAlign;
+
+ /* Compute tile swizzle for the FMASK surface. */
+ if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
+ !(surf->flags & RADEON_SURF_SHAREABLE)) {
+ ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
+ ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
+
+ xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
+ xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
+
+ /* This counter starts from 1 instead of 0. */
+ xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
+ xin.flags = in->flags;
+ xin.swizzleMode = fin.swizzleMode;
+ xin.resourceType = in->resourceType;
+ xin.format = in->format;
+ xin.numSamples = in->numSamples;
+ xin.numFrags = in->numFrags;
+
+ ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
+ surf->fmask_tile_swizzle = xout.pipeBankXor;
+ }
+ }
+
+ /* CMASK -- on GFX10 only for FMASK */
+ if (in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D &&
+ ((info->chip_class <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&
+ in->flags.metaRbUnaligned == 0) ||
+ (surf->fmask_size && in->numSamples >= 2))) {
+ ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
+ ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
+
+ cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
+ cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
+
+ assert(in->flags.metaPipeUnaligned == 0);
+ assert(in->flags.metaRbUnaligned == 0);
+
+ cin.cMaskFlags.pipeAligned = 1;
+ cin.cMaskFlags.rbAligned = 1;
+ cin.resourceType = in->resourceType;
+ cin.unalignedWidth = in->width;
+ cin.unalignedHeight = in->height;
+ cin.numSlices = in->numSlices;
+
+ if (in->numSamples > 1)
+ cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode;
+ else
+ cin.swizzleMode = in->swizzleMode;
+
+ ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
+ if (ret != ADDR_OK)
+ return ret;
+
+ surf->cmask_size = cout.cmaskBytes;
+ surf->cmask_alignment = cout.baseAlign;
+ }
+ }
+
+ return 0;
}
-static int gfx9_compute_surface(struct ac_addrlib *addrlib,
- const struct radeon_info *info,
- const struct ac_surf_config *config,
- enum radeon_surf_mode mode,
- struct radeon_surf *surf)
+static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
+ const struct ac_surf_config *config, enum radeon_surf_mode mode,
+ struct radeon_surf *surf)
{
- bool compressed;
- ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
- int r;
-
- AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
-
- compressed = surf->blk_w == 4 && surf->blk_h == 4;
-
- /* The format must be set correctly for the allocation of compressed
- * textures to work. In other cases, setting the bpp is sufficient. */
- if (compressed) {
- switch (surf->bpe) {
- case 8:
- AddrSurfInfoIn.format = ADDR_FMT_BC1;
- break;
- case 16:
- AddrSurfInfoIn.format = ADDR_FMT_BC3;
- break;
- default:
- assert(0);
- }
- } else {
- switch (surf->bpe) {
- case 1:
- assert(!(surf->flags & RADEON_SURF_ZBUFFER));
- AddrSurfInfoIn.format = ADDR_FMT_8;
- break;
- case 2:
- assert(surf->flags & RADEON_SURF_ZBUFFER ||
- !(surf->flags & RADEON_SURF_SBUFFER));
- AddrSurfInfoIn.format = ADDR_FMT_16;
- break;
- case 4:
- assert(surf->flags & RADEON_SURF_ZBUFFER ||
- !(surf->flags & RADEON_SURF_SBUFFER));
- AddrSurfInfoIn.format = ADDR_FMT_32;
- break;
- case 8:
- assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
- AddrSurfInfoIn.format = ADDR_FMT_32_32;
- break;
- case 12:
- assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
- AddrSurfInfoIn.format = ADDR_FMT_32_32_32;
- break;
- case 16:
- assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
- AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;
- break;
- default:
- assert(0);
- }
- AddrSurfInfoIn.bpp = surf->bpe * 8;
- }
-
- bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
- AddrSurfInfoIn.flags.color = is_color_surface &&
- !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
- AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
- AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
- /* flags.texture currently refers to TC-compatible HTILE */
- AddrSurfInfoIn.flags.texture = is_color_surface ||
- surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
- AddrSurfInfoIn.flags.opt4space = 1;
-
- AddrSurfInfoIn.numMipLevels = config->info.levels;
- AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
- AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
-
- if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
- AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
-
- /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
- * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
- * must sample 1D textures as 2D. */
- if (config->is_3d)
- AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
- else if (info->chip_class != GFX9 && config->is_1d)
- AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
- else
- AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
-
- AddrSurfInfoIn.width = config->info.width;
- AddrSurfInfoIn.height = config->info.height;
-
- if (config->is_3d)
- AddrSurfInfoIn.numSlices = config->info.depth;
- else if (config->is_cube)
- AddrSurfInfoIn.numSlices = 6;
- else
- AddrSurfInfoIn.numSlices = config->info.array_size;
-
- /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
- AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
- AddrSurfInfoIn.flags.metaRbUnaligned = 0;
-
- /* Optimal values for the L2 cache. */
- if (info->chip_class == GFX9) {
- surf->u.gfx9.dcc.independent_64B_blocks = 1;
- surf->u.gfx9.dcc.independent_128B_blocks = 0;
- surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
- } else if (info->chip_class >= GFX10) {
- surf->u.gfx9.dcc.independent_64B_blocks = 0;
- surf->u.gfx9.dcc.independent_128B_blocks = 1;
- surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
- }
-
- if (AddrSurfInfoIn.flags.display) {
- /* The display hardware can only read DCC with RB_ALIGNED=0 and
- * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
- *
- * The CB block requires RB_ALIGNED=1 except 1 RB chips.
- * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
- * after rendering, so PIPE_ALIGNED=1 is recommended.
- */
- if (info->use_display_dcc_unaligned) {
- AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
- AddrSurfInfoIn.flags.metaRbUnaligned = 1;
- }
-
- /* Adjust DCC settings to meet DCN requirements. */
- if (info->use_display_dcc_unaligned ||
- info->use_display_dcc_with_retile_blit) {
- /* Only Navi12/14 support independent 64B blocks in L2,
- * but without DCC image stores.
- */
- if (info->family == CHIP_NAVI12 ||
- info->family == CHIP_NAVI14) {
- surf->u.gfx9.dcc.independent_64B_blocks = 1;
- surf->u.gfx9.dcc.independent_128B_blocks = 0;
- surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
- }
-
- if (info->chip_class >= GFX10_3) {
- surf->u.gfx9.dcc.independent_64B_blocks = 1;
- surf->u.gfx9.dcc.independent_128B_blocks = 1;
- surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
- }
- }
- }
-
- switch (mode) {
- case RADEON_SURF_MODE_LINEAR_ALIGNED:
- assert(config->info.samples <= 1);
- assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
- AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
- break;
-
- case RADEON_SURF_MODE_1D:
- case RADEON_SURF_MODE_2D:
- if (surf->flags & RADEON_SURF_IMPORTED ||
- (info->chip_class >= GFX10 &&
- surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
- AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode;
- break;
- }
-
- r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn,
- false, &AddrSurfInfoIn.swizzleMode);
- if (r)
- return r;
- break;
-
- default:
- assert(0);
- }
-
- surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
- surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
-
- surf->num_dcc_levels = 0;
- surf->surf_size = 0;
- surf->fmask_size = 0;
- surf->dcc_size = 0;
- surf->htile_size = 0;
- surf->htile_slice_size = 0;
- surf->u.gfx9.surf_offset = 0;
- surf->u.gfx9.stencil_offset = 0;
- surf->cmask_size = 0;
- surf->u.gfx9.dcc_retile_use_uint16 = false;
- surf->u.gfx9.dcc_retile_num_elements = 0;
- surf->u.gfx9.dcc_retile_map = NULL;
-
- /* Calculate texture layout information. */
- r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
- &AddrSurfInfoIn);
- if (r)
- return r;
-
- /* Calculate texture layout information for stencil. */
- if (surf->flags & RADEON_SURF_SBUFFER) {
- AddrSurfInfoIn.flags.stencil = 1;
- AddrSurfInfoIn.bpp = 8;
- AddrSurfInfoIn.format = ADDR_FMT_8;
-
- if (!AddrSurfInfoIn.flags.depth) {
- r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn,
- false, &AddrSurfInfoIn.swizzleMode);
- if (r)
- return r;
- } else
- AddrSurfInfoIn.flags.depth = 0;
-
- r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
- &AddrSurfInfoIn);
- if (r)
- return r;
- }
-
- surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
-
- /* Query whether the surface is displayable. */
- /* This is only useful for surfaces that are allocated without SCANOUT. */
- bool displayable = false;
- if (!config->is_3d && !config->is_cube) {
- r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.surf.swizzle_mode,
- surf->bpe * 8, &displayable);
- if (r)
- return r;
-
- /* Display needs unaligned DCC. */
- if (surf->num_dcc_levels &&
- (!is_dcc_supported_by_DCN(info, config, surf,
- surf->u.gfx9.dcc.rb_aligned,
- surf->u.gfx9.dcc.pipe_aligned) ||
- /* Don't set is_displayable if displayable DCC is missing. */
- (info->use_display_dcc_with_retile_blit &&
- !surf->u.gfx9.dcc_retile_num_elements)))
- displayable = false;
- }
- surf->is_displayable = displayable;
-
- /* Validate that we allocated a displayable surface if requested. */
- assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
-
- /* Validate that DCC is set up correctly. */
- if (surf->num_dcc_levels) {
- assert(is_dcc_supported_by_L2(info, surf));
- if (AddrSurfInfoIn.flags.color)
- assert(is_dcc_supported_by_CB(info, surf->u.gfx9.surf.swizzle_mode));
- if (AddrSurfInfoIn.flags.display) {
- assert(is_dcc_supported_by_DCN(info, config, surf,
- surf->u.gfx9.dcc.rb_aligned,
- surf->u.gfx9.dcc.pipe_aligned));
- }
- }
-
- if (info->has_graphics &&
- !compressed &&
- !config->is_3d &&
- config->info.levels == 1 &&
- AddrSurfInfoIn.flags.color &&
- !surf->is_linear &&
- surf->surf_alignment >= 64 * 1024 && /* 64KB tiling */
- !(surf->flags & (RADEON_SURF_DISABLE_DCC |
- RADEON_SURF_FORCE_SWIZZLE_MODE |
- RADEON_SURF_FORCE_MICRO_TILE_MODE))) {
- /* Validate that DCC is enabled if DCN can do it. */
- if ((info->use_display_dcc_unaligned ||
- info->use_display_dcc_with_retile_blit) &&
- AddrSurfInfoIn.flags.display &&
- surf->bpe == 4) {
- assert(surf->num_dcc_levels);
- }
-
- /* Validate that non-scanout DCC is always enabled. */
- if (!AddrSurfInfoIn.flags.display)
- assert(surf->num_dcc_levels);
- }
-
- if (!surf->htile_size) {
- /* Unset this if HTILE is not present. */
- surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
- }
-
- switch (surf->u.gfx9.surf.swizzle_mode) {
- /* S = standard. */
- case ADDR_SW_256B_S:
- case ADDR_SW_4KB_S:
- case ADDR_SW_64KB_S:
- case ADDR_SW_64KB_S_T:
- case ADDR_SW_4KB_S_X:
- case ADDR_SW_64KB_S_X:
- surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
- break;
-
- /* D = display. */
- case ADDR_SW_LINEAR:
- case ADDR_SW_256B_D:
- case ADDR_SW_4KB_D:
- case ADDR_SW_64KB_D:
- case ADDR_SW_64KB_D_T:
- case ADDR_SW_4KB_D_X:
- case ADDR_SW_64KB_D_X:
- surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
- break;
-
- /* R = rotated (gfx9), render target (gfx10). */
- case ADDR_SW_256B_R:
- case ADDR_SW_4KB_R:
- case ADDR_SW_64KB_R:
- case ADDR_SW_64KB_R_T:
- case ADDR_SW_4KB_R_X:
- case ADDR_SW_64KB_R_X:
- case ADDR_SW_VAR_R_X:
- /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
- * used at the same time. We currently do not use rotated
- * in gfx9.
- */
- assert(info->chip_class >= GFX10 ||
- !"rotate micro tile mode is unsupported");
- surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
- break;
-
- /* Z = depth. */
- case ADDR_SW_4KB_Z:
- case ADDR_SW_64KB_Z:
- case ADDR_SW_64KB_Z_T:
- case ADDR_SW_4KB_Z_X:
- case ADDR_SW_64KB_Z_X:
- case ADDR_SW_VAR_Z_X:
- surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
- break;
-
- default:
- assert(0);
- }
-
- return 0;
+ bool compressed;
+ ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
+ int r;
+
+ AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
+
+ compressed = surf->blk_w == 4 && surf->blk_h == 4;
+
+ /* The format must be set correctly for the allocation of compressed
+ * textures to work. In other cases, setting the bpp is sufficient. */
+ if (compressed) {
+ switch (surf->bpe) {
+ case 8:
+ AddrSurfInfoIn.format = ADDR_FMT_BC1;
+ break;
+ case 16:
+ AddrSurfInfoIn.format = ADDR_FMT_BC3;
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ switch (surf->bpe) {
+ case 1:
+ assert(!(surf->flags & RADEON_SURF_ZBUFFER));
+ AddrSurfInfoIn.format = ADDR_FMT_8;
+ break;
+ case 2:
+ assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
+ AddrSurfInfoIn.format = ADDR_FMT_16;
+ break;
+ case 4:
+ assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
+ AddrSurfInfoIn.format = ADDR_FMT_32;
+ break;
+ case 8:
+ assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
+ AddrSurfInfoIn.format = ADDR_FMT_32_32;
+ break;
+ case 12:
+ assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
+ AddrSurfInfoIn.format = ADDR_FMT_32_32_32;
+ break;
+ case 16:
+ assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
+ AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;
+ break;
+ default:
+ assert(0);
+ }
+ AddrSurfInfoIn.bpp = surf->bpe * 8;
+ }
+
+ bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
+ AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
+ AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
+ AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
+ /* flags.texture currently refers to TC-compatible HTILE */
+ AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
+ AddrSurfInfoIn.flags.opt4space = 1;
+
+ AddrSurfInfoIn.numMipLevels = config->info.levels;
+ AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
+ AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
+
+ if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
+ AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
+
+ /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
+ * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
+ * must sample 1D textures as 2D. */
+ if (config->is_3d)
+ AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
+ else if (info->chip_class != GFX9 && config->is_1d)
+ AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
+ else
+ AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
+
+ AddrSurfInfoIn.width = config->info.width;
+ AddrSurfInfoIn.height = config->info.height;
+
+ if (config->is_3d)
+ AddrSurfInfoIn.numSlices = config->info.depth;
+ else if (config->is_cube)
+ AddrSurfInfoIn.numSlices = 6;
+ else
+ AddrSurfInfoIn.numSlices = config->info.array_size;
+
+ /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
+ AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
+ AddrSurfInfoIn.flags.metaRbUnaligned = 0;
+
+ /* Optimal values for the L2 cache. */
+ if (info->chip_class == GFX9) {
+ surf->u.gfx9.dcc.independent_64B_blocks = 1;
+ surf->u.gfx9.dcc.independent_128B_blocks = 0;
+ surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+ } else if (info->chip_class >= GFX10) {
+ surf->u.gfx9.dcc.independent_64B_blocks = 0;
+ surf->u.gfx9.dcc.independent_128B_blocks = 1;
+ surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
+ }
+
+ if (AddrSurfInfoIn.flags.display) {
+ /* The display hardware can only read DCC with RB_ALIGNED=0 and
+ * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
+ *
+ * The CB block requires RB_ALIGNED=1 except 1 RB chips.
+ * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
+ * after rendering, so PIPE_ALIGNED=1 is recommended.
+ */
+ if (info->use_display_dcc_unaligned) {
+ AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
+ AddrSurfInfoIn.flags.metaRbUnaligned = 1;
+ }
+
+ /* Adjust DCC settings to meet DCN requirements. */
+ if (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) {
+ /* Only Navi12/14 support independent 64B blocks in L2,
+ * but without DCC image stores.
+ */
+ if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
+ surf->u.gfx9.dcc.independent_64B_blocks = 1;
+ surf->u.gfx9.dcc.independent_128B_blocks = 0;
+ surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+ }
+
+ if (info->chip_class >= GFX10_3) {
+ surf->u.gfx9.dcc.independent_64B_blocks = 1;
+ surf->u.gfx9.dcc.independent_128B_blocks = 1;
+ surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+ }
+ }
+ }
+
+ switch (mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ assert(config->info.samples <= 1);
+ assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
+ AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
+ break;
+
+ case RADEON_SURF_MODE_1D:
+ case RADEON_SURF_MODE_2D:
+ if (surf->flags & RADEON_SURF_IMPORTED ||
+ (info->chip_class >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
+ AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode;
+ break;
+ }
+
+ r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn, false,
+ &AddrSurfInfoIn.swizzleMode);
+ if (r)
+ return r;
+ break;
+
+ default:
+ assert(0);
+ }
+
+ surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
+ surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
+
+ surf->num_dcc_levels = 0;
+ surf->surf_size = 0;
+ surf->fmask_size = 0;
+ surf->dcc_size = 0;
+ surf->htile_size = 0;
+ surf->htile_slice_size = 0;
+ surf->u.gfx9.surf_offset = 0;
+ surf->u.gfx9.stencil_offset = 0;
+ surf->cmask_size = 0;
+ surf->u.gfx9.dcc_retile_use_uint16 = false;
+ surf->u.gfx9.dcc_retile_num_elements = 0;
+ surf->u.gfx9.dcc_retile_map = NULL;
+
+ /* Calculate texture layout information. */
+ r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
+ if (r)
+ return r;
+
+ /* Calculate texture layout information for stencil. */
+ if (surf->flags & RADEON_SURF_SBUFFER) {
+ AddrSurfInfoIn.flags.stencil = 1;
+ AddrSurfInfoIn.bpp = 8;
+ AddrSurfInfoIn.format = ADDR_FMT_8;
+
+ if (!AddrSurfInfoIn.flags.depth) {
+ r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn, false,
+ &AddrSurfInfoIn.swizzleMode);
+ if (r)
+ return r;
+ } else
+ AddrSurfInfoIn.flags.depth = 0;
+
+ r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
+ if (r)
+ return r;
+ }
+
+ surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
+
+ /* Query whether the surface is displayable. */
+ /* This is only useful for surfaces that are allocated without SCANOUT. */
+ bool displayable = false;
+ if (!config->is_3d && !config->is_cube) {
+ r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.surf.swizzle_mode,
+ surf->bpe * 8, &displayable);
+ if (r)
+ return r;
+
+ /* Display needs unaligned DCC. */
+ if (surf->num_dcc_levels &&
+ (!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.dcc.rb_aligned,
+ surf->u.gfx9.dcc.pipe_aligned) ||
+ /* Don't set is_displayable if displayable DCC is missing. */
+ (info->use_display_dcc_with_retile_blit && !surf->u.gfx9.dcc_retile_num_elements)))
+ displayable = false;
+ }
+ surf->is_displayable = displayable;
+
+ /* Validate that we allocated a displayable surface if requested. */
+ assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
+
+ /* Validate that DCC is set up correctly. */
+ if (surf->num_dcc_levels) {
+ assert(is_dcc_supported_by_L2(info, surf));
+ if (AddrSurfInfoIn.flags.color)
+ assert(is_dcc_supported_by_CB(info, surf->u.gfx9.surf.swizzle_mode));
+ if (AddrSurfInfoIn.flags.display) {
+ assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.dcc.rb_aligned,
+ surf->u.gfx9.dcc.pipe_aligned));
+ }
+ }
+
+ if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 &&
+ AddrSurfInfoIn.flags.color && !surf->is_linear &&
+ surf->surf_alignment >= 64 * 1024 && /* 64KB tiling */
+ !(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE |
+ RADEON_SURF_FORCE_MICRO_TILE_MODE))) {
+ /* Validate that DCC is enabled if DCN can do it. */
+ if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) &&
+ AddrSurfInfoIn.flags.display && surf->bpe == 4) {
+ assert(surf->num_dcc_levels);
+ }
+
+ /* Validate that non-scanout DCC is always enabled. */
+ if (!AddrSurfInfoIn.flags.display)
+ assert(surf->num_dcc_levels);
+ }
+
+ if (!surf->htile_size) {
+ /* Unset this if HTILE is not present. */
+ surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
+ }
+
+ switch (surf->u.gfx9.surf.swizzle_mode) {
+ /* S = standard. */
+ case ADDR_SW_256B_S:
+ case ADDR_SW_4KB_S:
+ case ADDR_SW_64KB_S:
+ case ADDR_SW_64KB_S_T:
+ case ADDR_SW_4KB_S_X:
+ case ADDR_SW_64KB_S_X:
+ surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
+ break;
+
+ /* D = display. */
+ case ADDR_SW_LINEAR:
+ case ADDR_SW_256B_D:
+ case ADDR_SW_4KB_D:
+ case ADDR_SW_64KB_D:
+ case ADDR_SW_64KB_D_T:
+ case ADDR_SW_4KB_D_X:
+ case ADDR_SW_64KB_D_X:
+ surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
+ break;
+
+ /* R = rotated (gfx9), render target (gfx10). */
+ case ADDR_SW_256B_R:
+ case ADDR_SW_4KB_R:
+ case ADDR_SW_64KB_R:
+ case ADDR_SW_64KB_R_T:
+ case ADDR_SW_4KB_R_X:
+ case ADDR_SW_64KB_R_X:
+ case ADDR_SW_VAR_R_X:
+ /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
+ * used at the same time. We currently do not use rotated
+ * in gfx9.
+ */
+ assert(info->chip_class >= GFX10 || !"rotate micro tile mode is unsupported");
+ surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
+ break;
+
+ /* Z = depth. */
+ case ADDR_SW_4KB_Z:
+ case ADDR_SW_64KB_Z:
+ case ADDR_SW_64KB_Z_T:
+ case ADDR_SW_4KB_Z_X:
+ case ADDR_SW_64KB_Z_X:
+ case ADDR_SW_VAR_Z_X:
+ surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
+ break;
+
+ default:
+ assert(0);
+ }
+
+ return 0;
}
int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
- const struct ac_surf_config *config,
- enum radeon_surf_mode mode,
- struct radeon_surf *surf)
+ const struct ac_surf_config *config, enum radeon_surf_mode mode,
+ struct radeon_surf *surf)
{
- int r;
-
- r = surf_config_sanity(config, surf->flags);
- if (r)
- return r;
-
- if (info->chip_class >= GFX9)
- r = gfx9_compute_surface(addrlib, info, config, mode, surf);
- else
- r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
-
- if (r)
- return r;
-
- /* Determine the memory layout of multiple allocations in one buffer. */
- surf->total_size = surf->surf_size;
- surf->alignment = surf->surf_alignment;
-
- if (surf->htile_size) {
- surf->htile_offset = align64(surf->total_size, surf->htile_alignment);
- surf->total_size = surf->htile_offset + surf->htile_size;
- surf->alignment = MAX2(surf->alignment, surf->htile_alignment);
- }
-
- if (surf->fmask_size) {
- assert(config->info.samples >= 2);
- surf->fmask_offset = align64(surf->total_size, surf->fmask_alignment);
- surf->total_size = surf->fmask_offset + surf->fmask_size;
- surf->alignment = MAX2(surf->alignment, surf->fmask_alignment);
- }
-
- /* Single-sample CMASK is in a separate buffer. */
- if (surf->cmask_size && config->info.samples >= 2) {
- surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
- surf->total_size = surf->cmask_offset + surf->cmask_size;
- surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
- }
-
- if (surf->is_displayable)
- surf->flags |= RADEON_SURF_SCANOUT;
-
- if (surf->dcc_size &&
- /* dcc_size is computed on GFX9+ only if it's displayable. */
- (info->chip_class >= GFX9 || !get_display_flag(config, surf))) {
- /* It's better when displayable DCC is immediately after
- * the image due to hw-specific reasons.
- */
- if (info->chip_class >= GFX9 &&
- surf->u.gfx9.dcc_retile_num_elements) {
- /* Add space for the displayable DCC buffer. */
- surf->display_dcc_offset =
- align64(surf->total_size, surf->u.gfx9.display_dcc_alignment);
- surf->total_size = surf->display_dcc_offset +
- surf->u.gfx9.display_dcc_size;
-
- /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
- surf->dcc_retile_map_offset =
- align64(surf->total_size, info->tcc_cache_line_size);
-
- if (surf->u.gfx9.dcc_retile_use_uint16) {
- surf->total_size = surf->dcc_retile_map_offset +
- surf->u.gfx9.dcc_retile_num_elements * 2;
- } else {
- surf->total_size = surf->dcc_retile_map_offset +
- surf->u.gfx9.dcc_retile_num_elements * 4;
- }
- }
-
- surf->dcc_offset = align64(surf->total_size, surf->dcc_alignment);
- surf->total_size = surf->dcc_offset + surf->dcc_size;
- surf->alignment = MAX2(surf->alignment, surf->dcc_alignment);
- }
-
- return 0;
+ int r;
+
+ r = surf_config_sanity(config, surf->flags);
+ if (r)
+ return r;
+
+ if (info->chip_class >= GFX9)
+ r = gfx9_compute_surface(addrlib, info, config, mode, surf);
+ else
+ r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
+
+ if (r)
+ return r;
+
+ /* Determine the memory layout of multiple allocations in one buffer. */
+ surf->total_size = surf->surf_size;
+ surf->alignment = surf->surf_alignment;
+
+ if (surf->htile_size) {
+ surf->htile_offset = align64(surf->total_size, surf->htile_alignment);
+ surf->total_size = surf->htile_offset + surf->htile_size;
+ surf->alignment = MAX2(surf->alignment, surf->htile_alignment);
+ }
+
+ if (surf->fmask_size) {
+ assert(config->info.samples >= 2);
+ surf->fmask_offset = align64(surf->total_size, surf->fmask_alignment);
+ surf->total_size = surf->fmask_offset + surf->fmask_size;
+ surf->alignment = MAX2(surf->alignment, surf->fmask_alignment);
+ }
+
+ /* Single-sample CMASK is in a separate buffer. */
+ if (surf->cmask_size && config->info.samples >= 2) {
+ surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
+ surf->total_size = surf->cmask_offset + surf->cmask_size;
+ surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
+ }
+
+ if (surf->is_displayable)
+ surf->flags |= RADEON_SURF_SCANOUT;
+
+ if (surf->dcc_size &&
+ /* dcc_size is computed on GFX9+ only if it's displayable. */
+ (info->chip_class >= GFX9 || !get_display_flag(config, surf))) {
+ /* It's better when displayable DCC is immediately after
+ * the image due to hw-specific reasons.
+ */
+ if (info->chip_class >= GFX9 && surf->u.gfx9.dcc_retile_num_elements) {
+ /* Add space for the displayable DCC buffer. */
+ surf->display_dcc_offset = align64(surf->total_size, surf->u.gfx9.display_dcc_alignment);
+ surf->total_size = surf->display_dcc_offset + surf->u.gfx9.display_dcc_size;
+
+ /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
+ surf->dcc_retile_map_offset = align64(surf->total_size, info->tcc_cache_line_size);
+
+ if (surf->u.gfx9.dcc_retile_use_uint16) {
+ surf->total_size =
+ surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 2;
+ } else {
+ surf->total_size =
+ surf->dcc_retile_map_offset + surf->u.gfx9.dcc_retile_num_elements * 4;
+ }
+ }
+
+ surf->dcc_offset = align64(surf->total_size, surf->dcc_alignment);
+ surf->total_size = surf->dcc_offset + surf->dcc_size;
+ surf->alignment = MAX2(surf->alignment, surf->dcc_alignment);
+ }
+
+ return 0;
}
/* This is meant to be used for disabling DCC. */
static unsigned eg_tile_split(unsigned tile_split)
{
switch (tile_split) {
- case 0: tile_split = 64; break;
- case 1: tile_split = 128; break;
- case 2: tile_split = 256; break;
- case 3: tile_split = 512; break;
+ case 0:
+ tile_split = 64;
+ break;
+ case 1:
+ tile_split = 128;
+ break;
+ case 2:
+ tile_split = 256;
+ break;
+ case 3:
+ tile_split = 512;
+ break;
default:
- case 4: tile_split = 1024; break;
- case 5: tile_split = 2048; break;
- case 6: tile_split = 4096; break;
+ case 4:
+ tile_split = 1024;
+ break;
+ case 5:
+ tile_split = 2048;
+ break;
+ case 6:
+ tile_split = 4096;
+ break;
}
return tile_split;
}
static unsigned eg_tile_split_rev(unsigned eg_tile_split)
{
switch (eg_tile_split) {
- case 64: return 0;
- case 128: return 1;
- case 256: return 2;
- case 512: return 3;
+ case 64:
+ return 0;
+ case 128:
+ return 1;
+ case 256:
+ return 2;
+ case 512:
+ return 3;
default:
- case 1024: return 4;
- case 2048: return 5;
- case 4096: return 6;
+ case 1024:
+ return 4;
+ case 2048:
+ return 5;
+ case 4096:
+ return 6;
}
}
-#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
-#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
/* This should be called before ac_compute_surface. */
-void ac_surface_set_bo_metadata(const struct radeon_info *info,
- struct radeon_surf *surf, uint64_t tiling_flags,
- enum radeon_surf_mode *mode)
+void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ uint64_t tiling_flags, enum radeon_surf_mode *mode)
{
bool scanout;
if (info->chip_class >= GFX9) {
surf->u.gfx9.surf.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
- surf->u.gfx9.dcc.independent_64B_blocks = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
- surf->u.gfx9.dcc.independent_128B_blocks = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
- surf->u.gfx9.dcc.max_compressed_block_size = AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
+ surf->u.gfx9.dcc.independent_64B_blocks =
+ AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
+ surf->u.gfx9.dcc.independent_128B_blocks =
+ AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
+ surf->u.gfx9.dcc.max_compressed_block_size =
+ AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
surf->u.gfx9.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
- *mode = surf->u.gfx9.surf.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;
+ *mode =
+ surf->u.gfx9.surf.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;
} else {
surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
*mode = RADEON_SURF_MODE_2D;
else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
*mode = RADEON_SURF_MODE_1D;
surf->flags &= ~RADEON_SURF_SCANOUT;
}
-void ac_surface_get_bo_metadata(const struct radeon_info *info,
- struct radeon_surf *surf, uint64_t *tiling_flags)
+void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ uint64_t *tiling_flags)
{
*tiling_flags = 0;
uint64_t dcc_offset = 0;
if (surf->dcc_offset) {
- dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset
- : surf->dcc_offset;
+ dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->dcc_offset;
assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
}
*tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.surf.swizzle_mode);
*tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8);
*tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.display_dcc_pitch_max);
- *tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.dcc.independent_64B_blocks);
- *tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.dcc.independent_128B_blocks);
- *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, surf->u.gfx9.dcc.max_compressed_block_size);
+ *tiling_flags |=
+ AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.dcc.independent_64B_blocks);
+ *tiling_flags |=
+ AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.dcc.independent_128B_blocks);
+ *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE,
+ surf->u.gfx9.dcc.max_compressed_block_size);
*tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
} else {
if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
*tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw));
*tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh));
if (surf->u.legacy.tile_split)
- *tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));
+ *tiling_flags |=
+ AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));
*tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea));
- *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks)-1);
+ *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1);
if (surf->flags & RADEON_SURF_SCANOUT)
*tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
}
/* This should be called after ac_compute_surface. */
-bool ac_surface_set_umd_metadata(const struct radeon_info *info,
- struct radeon_surf *surf,
- unsigned num_storage_samples,
- unsigned num_mipmap_levels,
- unsigned size_metadata,
- uint32_t metadata[64])
+bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ unsigned num_storage_samples, unsigned num_mipmap_levels,
+ unsigned size_metadata, uint32_t metadata[64])
{
uint32_t *desc = &metadata[2];
uint64_t offset;
return true;
}
-void ac_surface_get_umd_metadata(const struct radeon_info *info,
- struct radeon_surf *surf,
- unsigned num_mipmap_levels,
- uint32_t desc[8],
+void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ unsigned num_mipmap_levels, uint32_t desc[8],
unsigned *size_metadata, uint32_t metadata[64])
{
/* Clear the base address and set the relative DCC offset. */
}
}
-void ac_surface_override_offset_stride(const struct radeon_info *info,
- struct radeon_surf *surf,
- unsigned num_mipmap_levels,
- uint64_t offset, unsigned pitch)
+void ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
+ unsigned num_mipmap_levels, uint64_t offset, unsigned pitch)
{
if (info->chip_class >= GFX9) {
if (pitch) {
surf->u.gfx9.surf_pitch = pitch;
if (num_mipmap_levels == 1)
surf->u.gfx9.surf.epitch = pitch - 1;
- surf->u.gfx9.surf_slice_size =
- (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;
+ surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;
}
surf->u.gfx9.surf_offset = offset;
if (surf->u.gfx9.stencil_offset)
if (pitch) {
surf->u.legacy.level[0].nblk_x = pitch;
surf->u.legacy.level[0].slice_size_dw =
- ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;
+ ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;
}
if (offset) {
#ifndef AC_SURFACE_H
#define AC_SURFACE_H
-#include <stdint.h>
-#include <stdbool.h>
-
#include "amd_family.h"
+#include <stdbool.h>
+#include <stdint.h>
+
#ifdef __cplusplus
extern "C" {
#endif
struct amdgpu_gpu_info;
struct radeon_info;
-#define RADEON_SURF_MAX_LEVELS 15
+#define RADEON_SURF_MAX_LEVELS 15
-enum radeon_surf_mode {
- RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
- RADEON_SURF_MODE_1D = 2,
- RADEON_SURF_MODE_2D = 3,
+enum radeon_surf_mode
+{
+ RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
+ RADEON_SURF_MODE_1D = 2,
+ RADEON_SURF_MODE_2D = 3,
};
/* This describes D/S/Z/R swizzle modes.
* Defined in the GB_TILE_MODEn.MICRO_TILE_MODE_NEW order.
*/
-enum radeon_micro_mode {
- RADEON_MICRO_MODE_DISPLAY = 0,
- RADEON_MICRO_MODE_STANDARD = 1,
- RADEON_MICRO_MODE_DEPTH = 2,
- RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
+enum radeon_micro_mode
+{
+ RADEON_MICRO_MODE_DISPLAY = 0,
+ RADEON_MICRO_MODE_STANDARD = 1,
+ RADEON_MICRO_MODE_DEPTH = 2,
+ RADEON_MICRO_MODE_RENDER = 3, /* gfx9 and older: rotated */
};
/* the first 16 bits are reserved for libdrm_radeon, don't use them */
-#define RADEON_SURF_SCANOUT (1 << 16)
-#define RADEON_SURF_ZBUFFER (1 << 17)
-#define RADEON_SURF_SBUFFER (1 << 18)
-#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
+#define RADEON_SURF_SCANOUT (1 << 16)
+#define RADEON_SURF_ZBUFFER (1 << 17)
+#define RADEON_SURF_SBUFFER (1 << 18)
+#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
/* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
-#define RADEON_SURF_FMASK (1 << 21)
-#define RADEON_SURF_DISABLE_DCC (1 << 22)
-#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
-#define RADEON_SURF_IMPORTED (1 << 24)
-#define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1 << 25)
-#define RADEON_SURF_SHAREABLE (1 << 26)
-#define RADEON_SURF_NO_RENDER_TARGET (1 << 27)
+#define RADEON_SURF_FMASK (1 << 21)
+#define RADEON_SURF_DISABLE_DCC (1 << 22)
+#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
+#define RADEON_SURF_IMPORTED (1 << 24)
+#define RADEON_SURF_CONTIGUOUS_DCC_LAYERS (1 << 25)
+#define RADEON_SURF_SHAREABLE (1 << 26)
+#define RADEON_SURF_NO_RENDER_TARGET (1 << 27)
/* Force a swizzle mode (gfx9+) or tile mode (gfx6-8).
* If this is not set, optimize for space. */
-#define RADEON_SURF_FORCE_SWIZZLE_MODE (1 << 28)
-#define RADEON_SURF_NO_FMASK (1 << 29)
-#define RADEON_SURF_NO_HTILE (1 << 30)
-#define RADEON_SURF_FORCE_MICRO_TILE_MODE (1u << 31)
+#define RADEON_SURF_FORCE_SWIZZLE_MODE (1 << 28)
+#define RADEON_SURF_NO_FMASK (1 << 29)
+#define RADEON_SURF_NO_HTILE (1 << 30)
+#define RADEON_SURF_FORCE_MICRO_TILE_MODE (1u << 31)
struct legacy_surf_level {
- uint64_t offset;
- uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */
- uint32_t dcc_offset; /* relative offset within DCC mip tree */
- uint32_t dcc_fast_clear_size;
- uint32_t dcc_slice_fast_clear_size;
- unsigned nblk_x:15;
- unsigned nblk_y:15;
- enum radeon_surf_mode mode:2;
+ uint64_t offset;
+ uint32_t slice_size_dw; /* in dwords; max = 4GB / 4. */
+ uint32_t dcc_offset; /* relative offset within DCC mip tree */
+ uint32_t dcc_fast_clear_size;
+ uint32_t dcc_slice_fast_clear_size;
+ unsigned nblk_x : 15;
+ unsigned nblk_y : 15;
+ enum radeon_surf_mode mode : 2;
};
struct legacy_surf_fmask {
- unsigned slice_tile_max; /* max 4M */
- uint8_t tiling_index; /* max 31 */
- uint8_t bankh; /* max 8 */
- uint16_t pitch_in_pixels;
- uint64_t slice_size;
+ unsigned slice_tile_max; /* max 4M */
+ uint8_t tiling_index; /* max 31 */
+ uint8_t bankh; /* max 8 */
+ uint16_t pitch_in_pixels;
+ uint64_t slice_size;
};
struct legacy_surf_layout {
- unsigned bankw:4; /* max 8 */
- unsigned bankh:4; /* max 8 */
- unsigned mtilea:4; /* max 8 */
- unsigned tile_split:13; /* max 4K */
- unsigned stencil_tile_split:13; /* max 4K */
- unsigned pipe_config:5; /* max 17 */
- unsigned num_banks:5; /* max 16 */
- unsigned macro_tile_index:4; /* max 15 */
-
- /* Whether the depth miptree or stencil miptree as used by the DB are
- * adjusted from their TC compatible form to ensure depth/stencil
- * compatibility. If either is true, the corresponding plane cannot be
- * sampled from.
- */
- unsigned depth_adjusted:1;
- unsigned stencil_adjusted:1;
-
- struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
- struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
- uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
- uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
- struct legacy_surf_fmask fmask;
- unsigned cmask_slice_tile_max;
+ unsigned bankw : 4; /* max 8 */
+ unsigned bankh : 4; /* max 8 */
+ unsigned mtilea : 4; /* max 8 */
+ unsigned tile_split : 13; /* max 4K */
+ unsigned stencil_tile_split : 13; /* max 4K */
+ unsigned pipe_config : 5; /* max 17 */
+ unsigned num_banks : 5; /* max 16 */
+ unsigned macro_tile_index : 4; /* max 15 */
+
+ /* Whether the depth miptree or stencil miptree as used by the DB are
+ * adjusted from their TC compatible form to ensure depth/stencil
+ * compatibility. If either is true, the corresponding plane cannot be
+ * sampled from.
+ */
+ unsigned depth_adjusted : 1;
+ unsigned stencil_adjusted : 1;
+
+ struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
+ struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
+ uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
+ uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
+ struct legacy_surf_fmask fmask;
+ unsigned cmask_slice_tile_max;
};
/* Same as addrlib - AddrResourceType. */
-enum gfx9_resource_type {
- RADEON_RESOURCE_1D = 0,
- RADEON_RESOURCE_2D,
- RADEON_RESOURCE_3D,
+enum gfx9_resource_type
+{
+ RADEON_RESOURCE_1D = 0,
+ RADEON_RESOURCE_2D,
+ RADEON_RESOURCE_3D,
};
struct gfx9_surf_flags {
- uint16_t swizzle_mode; /* tile mode */
- uint16_t epitch; /* (pitch - 1) or (height - 1) */
+ uint16_t swizzle_mode; /* tile mode */
+ uint16_t epitch; /* (pitch - 1) or (height - 1) */
};
struct gfx9_surf_meta_flags {
- unsigned rb_aligned:1; /* optimal for RBs */
- unsigned pipe_aligned:1; /* optimal for TC */
- unsigned independent_64B_blocks:1;
- unsigned independent_128B_blocks:1;
- unsigned max_compressed_block_size:2;
+ unsigned rb_aligned : 1; /* optimal for RBs */
+ unsigned pipe_aligned : 1; /* optimal for TC */
+ unsigned independent_64B_blocks : 1;
+ unsigned independent_128B_blocks : 1;
+ unsigned max_compressed_block_size : 2;
};
struct gfx9_surf_layout {
- struct gfx9_surf_flags surf; /* color or depth surface */
- struct gfx9_surf_flags fmask; /* not added to surf_size */
- struct gfx9_surf_flags stencil; /* added to surf_size, use stencil_offset */
-
- struct gfx9_surf_meta_flags dcc; /* metadata of color */
-
- enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
- uint16_t surf_pitch; /* in blocks */
- uint16_t surf_height;
-
- uint64_t surf_offset; /* 0 unless imported with an offset */
- /* The size of the 2D plane containing all mipmap levels. */
- uint64_t surf_slice_size;
- /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
- uint32_t offset[RADEON_SURF_MAX_LEVELS];
- /* Mipmap level pitch in elements. Only valid for LINEAR. */
- uint16_t pitch[RADEON_SURF_MAX_LEVELS];
-
- uint64_t stencil_offset; /* separate stencil */
-
- uint8_t dcc_block_width;
- uint8_t dcc_block_height;
- uint8_t dcc_block_depth;
-
- /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
- * The 3D engine doesn't support that layout except for chips with 1 RB.
- * All other chips must set rb_aligned=1.
- * A compute shader needs to convert from aligned DCC to unaligned.
- */
- uint32_t display_dcc_size;
- uint32_t display_dcc_alignment;
- uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
- bool dcc_retile_use_uint16; /* if all values fit into uint16_t */
- uint32_t dcc_retile_num_elements;
- void *dcc_retile_map;
+ struct gfx9_surf_flags surf; /* color or depth surface */
+ struct gfx9_surf_flags fmask; /* not added to surf_size */
+ struct gfx9_surf_flags stencil; /* added to surf_size, use stencil_offset */
+
+ struct gfx9_surf_meta_flags dcc; /* metadata of color */
+
+ enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
+ uint16_t surf_pitch; /* in blocks */
+ uint16_t surf_height;
+
+ uint64_t surf_offset; /* 0 unless imported with an offset */
+ /* The size of the 2D plane containing all mipmap levels. */
+ uint64_t surf_slice_size;
+ /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
+ uint32_t offset[RADEON_SURF_MAX_LEVELS];
+ /* Mipmap level pitch in elements. Only valid for LINEAR. */
+ uint16_t pitch[RADEON_SURF_MAX_LEVELS];
+
+ uint64_t stencil_offset; /* separate stencil */
+
+ uint8_t dcc_block_width;
+ uint8_t dcc_block_height;
+ uint8_t dcc_block_depth;
+
+ /* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
+ * The 3D engine doesn't support that layout except for chips with 1 RB.
+ * All other chips must set rb_aligned=1.
+ * A compute shader needs to convert from aligned DCC to unaligned.
+ */
+ uint32_t display_dcc_size;
+ uint32_t display_dcc_alignment;
+ uint16_t display_dcc_pitch_max; /* (mip chain pitch - 1) */
+ bool dcc_retile_use_uint16; /* if all values fit into uint16_t */
+ uint32_t dcc_retile_num_elements;
+ void *dcc_retile_map;
};
struct radeon_surf {
- /* Format properties. */
- unsigned blk_w:4;
- unsigned blk_h:4;
- unsigned bpe:5;
- /* Number of mipmap levels where DCC is enabled starting from level 0.
- * Non-zero levels may be disabled due to alignment constraints, but not
- * the first level.
- */
- unsigned num_dcc_levels:4;
- unsigned is_linear:1;
- unsigned has_stencil:1;
- /* This might be true even if micro_tile_mode isn't displayable or rotated. */
- unsigned is_displayable:1;
- /* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
- unsigned micro_tile_mode:3;
- uint32_t flags;
-
- /* These are return values. Some of them can be set by the caller, but
- * they will be treated as hints (e.g. bankw, bankh) and might be
- * changed by the calculator.
- */
-
- /* Tile swizzle can be OR'd with low bits of the BASE_256B address.
- * The value is the same for all mipmap levels. Supported tile modes:
- * - GFX6: Only macro tiling.
- * - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
- * tail.
- *
- * Only these surfaces are allowed to set it:
- * - color (if it doesn't have to be displayable)
- * - DCC (same tile swizzle as color)
- * - FMASK
- * - CMASK if it's TC-compatible or if the gen is GFX9
- * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
- */
- uint8_t tile_swizzle;
- uint8_t fmask_tile_swizzle;
-
- uint64_t surf_size;
- uint64_t fmask_size;
- uint32_t surf_alignment;
- uint32_t fmask_alignment;
-
- /* DCC and HTILE are very small. */
- uint32_t dcc_size;
- uint32_t dcc_slice_size;
- uint32_t dcc_alignment;
-
- uint32_t htile_size;
- uint32_t htile_slice_size;
- uint32_t htile_alignment;
-
- uint32_t cmask_size;
- uint32_t cmask_slice_size;
- uint32_t cmask_alignment;
-
- /* All buffers combined. */
- uint64_t htile_offset;
- uint64_t fmask_offset;
- uint64_t cmask_offset;
- uint64_t dcc_offset;
- uint64_t display_dcc_offset;
- uint64_t dcc_retile_map_offset;
- uint64_t total_size;
- uint32_t alignment;
-
- union {
- /* Return values for GFX8 and older.
- *
- * Some of them can be set by the caller if certain parameters are
- * desirable. The allocator will try to obey them.
- */
- struct legacy_surf_layout legacy;
-
- /* GFX9+ return values. */
- struct gfx9_surf_layout gfx9;
- } u;
+ /* Format properties. */
+ unsigned blk_w : 4;
+ unsigned blk_h : 4;
+ unsigned bpe : 5;
+ /* Number of mipmap levels where DCC is enabled starting from level 0.
+ * Non-zero levels may be disabled due to alignment constraints, but not
+ * the first level.
+ */
+ unsigned num_dcc_levels : 4;
+ unsigned is_linear : 1;
+ unsigned has_stencil : 1;
+ /* This might be true even if micro_tile_mode isn't displayable or rotated. */
+ unsigned is_displayable : 1;
+ /* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
+ unsigned micro_tile_mode : 3;
+ uint32_t flags;
+
+ /* These are return values. Some of them can be set by the caller, but
+ * they will be treated as hints (e.g. bankw, bankh) and might be
+ * changed by the calculator.
+ */
+
+ /* Tile swizzle can be OR'd with low bits of the BASE_256B address.
+ * The value is the same for all mipmap levels. Supported tile modes:
+ * - GFX6: Only macro tiling.
+ * - GFX9: Only *_X and *_T swizzle modes. Level 0 must not be in the mip
+ * tail.
+ *
+ * Only these surfaces are allowed to set it:
+ * - color (if it doesn't have to be displayable)
+ * - DCC (same tile swizzle as color)
+ * - FMASK
+ * - CMASK if it's TC-compatible or if the gen is GFX9
+ * - depth/stencil if HTILE is not TC-compatible and if the gen is not GFX9
+ */
+ uint8_t tile_swizzle;
+ uint8_t fmask_tile_swizzle;
+
+ uint64_t surf_size;
+ uint64_t fmask_size;
+ uint32_t surf_alignment;
+ uint32_t fmask_alignment;
+
+ /* DCC and HTILE are very small. */
+ uint32_t dcc_size;
+ uint32_t dcc_slice_size;
+ uint32_t dcc_alignment;
+
+ uint32_t htile_size;
+ uint32_t htile_slice_size;
+ uint32_t htile_alignment;
+
+ uint32_t cmask_size;
+ uint32_t cmask_slice_size;
+ uint32_t cmask_alignment;
+
+ /* All buffers combined. */
+ uint64_t htile_offset;
+ uint64_t fmask_offset;
+ uint64_t cmask_offset;
+ uint64_t dcc_offset;
+ uint64_t display_dcc_offset;
+ uint64_t dcc_retile_map_offset;
+ uint64_t total_size;
+ uint32_t alignment;
+
+ union {
+ /* Return values for GFX8 and older.
+ *
+ * Some of them can be set by the caller if certain parameters are
+ * desirable. The allocator will try to obey them.
+ */
+ struct legacy_surf_layout legacy;
+
+ /* GFX9+ return values. */
+ struct gfx9_surf_layout gfx9;
+ } u;
};
struct ac_surf_info {
- uint32_t width;
- uint32_t height;
- uint32_t depth;
- uint8_t samples; /* For Z/S: samples; For color: FMASK coverage samples */
- uint8_t storage_samples; /* For color: allocated samples */
- uint8_t levels;
- uint8_t num_channels; /* heuristic for displayability */
- uint16_t array_size;
- uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
- uint32_t *fmask_surf_index;
+ uint32_t width;
+ uint32_t height;
+ uint32_t depth;
+ uint8_t samples; /* For Z/S: samples; For color: FMASK coverage samples */
+ uint8_t storage_samples; /* For color: allocated samples */
+ uint8_t levels;
+ uint8_t num_channels; /* heuristic for displayability */
+ uint16_t array_size;
+ uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
+ uint32_t *fmask_surf_index;
};
struct ac_surf_config {
- struct ac_surf_info info;
- unsigned is_1d : 1;
- unsigned is_3d : 1;
- unsigned is_cube : 1;
+ struct ac_surf_info info;
+ unsigned is_1d : 1;
+ unsigned is_3d : 1;
+ unsigned is_cube : 1;
};
struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
- const struct amdgpu_gpu_info *amdinfo,
- uint64_t *max_alignment);
+ const struct amdgpu_gpu_info *amdinfo,
+ uint64_t *max_alignment);
void ac_addrlib_destroy(struct ac_addrlib *addrlib);
int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
- const struct ac_surf_config * config,
- enum radeon_surf_mode mode,
- struct radeon_surf *surf);
+ const struct ac_surf_config *config, enum radeon_surf_mode mode,
+ struct radeon_surf *surf);
void ac_surface_zero_dcc_fields(struct radeon_surf *surf);
-void ac_surface_set_bo_metadata(const struct radeon_info *info,
- struct radeon_surf *surf, uint64_t tiling_flags,
- enum radeon_surf_mode *mode);
-void ac_surface_get_bo_metadata(const struct radeon_info *info,
- struct radeon_surf *surf, uint64_t *tiling_flags);
-
-bool ac_surface_set_umd_metadata(const struct radeon_info *info,
- struct radeon_surf *surf,
- unsigned num_storage_samples,
- unsigned num_mipmap_levels,
- unsigned size_metadata,
- uint32_t metadata[64]);
-void ac_surface_get_umd_metadata(const struct radeon_info *info,
- struct radeon_surf *surf,
- unsigned num_mipmap_levels,
- uint32_t desc[8],
+void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ uint64_t tiling_flags, enum radeon_surf_mode *mode);
+void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ uint64_t *tiling_flags);
+
+bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ unsigned num_storage_samples, unsigned num_mipmap_levels,
+ unsigned size_metadata, uint32_t metadata[64]);
+void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
+ unsigned num_mipmap_levels, uint32_t desc[8],
unsigned *size_metadata, uint32_t metadata[64]);
-void ac_surface_override_offset_stride(const struct radeon_info *info,
- struct radeon_surf *surf,
- unsigned num_mipmap_levels,
- uint64_t offset, unsigned pitch);
+void ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
+ unsigned num_mipmap_levels, uint64_t offset, unsigned pitch);
#ifdef __cplusplus
}
#ifndef AMD_FAMILY_H
#define AMD_FAMILY_H
-enum radeon_family {
- CHIP_UNKNOWN = 0,
- CHIP_R300, /* R3xx-based cores. (GFX2) */
- CHIP_R350,
- CHIP_RV350,
- CHIP_RV370,
- CHIP_RV380,
- CHIP_RS400,
- CHIP_RC410,
- CHIP_RS480,
- CHIP_R420, /* R4xx-based cores. (GFX2) */
- CHIP_R423,
- CHIP_R430,
- CHIP_R480,
- CHIP_R481,
- CHIP_RV410,
- CHIP_RS600,
- CHIP_RS690,
- CHIP_RS740,
- CHIP_RV515, /* R5xx-based cores. (GFX2) */
- CHIP_R520,
- CHIP_RV530,
- CHIP_R580,
- CHIP_RV560,
- CHIP_RV570,
- CHIP_R600, /* GFX3 (R6xx) */
- CHIP_RV610,
- CHIP_RV630,
- CHIP_RV670,
- CHIP_RV620,
- CHIP_RV635,
- CHIP_RS780,
- CHIP_RS880,
- CHIP_RV770, /* GFX3 (R7xx) */
- CHIP_RV730,
- CHIP_RV710,
- CHIP_RV740,
- CHIP_CEDAR, /* GFX4 (Evergreen) */
- CHIP_REDWOOD,
- CHIP_JUNIPER,
- CHIP_CYPRESS,
- CHIP_HEMLOCK,
- CHIP_PALM,
- CHIP_SUMO,
- CHIP_SUMO2,
- CHIP_BARTS,
- CHIP_TURKS,
- CHIP_CAICOS,
- CHIP_CAYMAN, /* GFX5 (Northern Islands) */
- CHIP_ARUBA,
- CHIP_TAHITI, /* GFX6 (Southern Islands) */
- CHIP_PITCAIRN,
- CHIP_VERDE,
- CHIP_OLAND,
- CHIP_HAINAN,
- CHIP_BONAIRE, /* GFX7 (Sea Islands) */
- CHIP_KAVERI,
- CHIP_KABINI,
- CHIP_HAWAII,
- CHIP_TONGA, /* GFX8 (Volcanic Islands & Polaris) */
- CHIP_ICELAND,
- CHIP_CARRIZO,
- CHIP_FIJI,
- CHIP_STONEY,
- CHIP_POLARIS10,
- CHIP_POLARIS11,
- CHIP_POLARIS12,
- CHIP_VEGAM,
- CHIP_VEGA10, /* GFX9 (Vega) */
- CHIP_VEGA12,
- CHIP_VEGA20,
- CHIP_RAVEN,
- CHIP_RAVEN2,
- CHIP_RENOIR,
- CHIP_ARCTURUS,
- CHIP_NAVI10,
- CHIP_NAVI12,
- CHIP_NAVI14,
- CHIP_SIENNA_CICHLID,
- CHIP_NAVY_FLOUNDER,
- CHIP_LAST,
+enum radeon_family
+{
+ CHIP_UNKNOWN = 0,
+ CHIP_R300, /* R3xx-based cores. (GFX2) */
+ CHIP_R350,
+ CHIP_RV350,
+ CHIP_RV370,
+ CHIP_RV380,
+ CHIP_RS400,
+ CHIP_RC410,
+ CHIP_RS480,
+ CHIP_R420, /* R4xx-based cores. (GFX2) */
+ CHIP_R423,
+ CHIP_R430,
+ CHIP_R480,
+ CHIP_R481,
+ CHIP_RV410,
+ CHIP_RS600,
+ CHIP_RS690,
+ CHIP_RS740,
+ CHIP_RV515, /* R5xx-based cores. (GFX2) */
+ CHIP_R520,
+ CHIP_RV530,
+ CHIP_R580,
+ CHIP_RV560,
+ CHIP_RV570,
+ CHIP_R600, /* GFX3 (R6xx) */
+ CHIP_RV610,
+ CHIP_RV630,
+ CHIP_RV670,
+ CHIP_RV620,
+ CHIP_RV635,
+ CHIP_RS780,
+ CHIP_RS880,
+ CHIP_RV770, /* GFX3 (R7xx) */
+ CHIP_RV730,
+ CHIP_RV710,
+ CHIP_RV740,
+ CHIP_CEDAR, /* GFX4 (Evergreen) */
+ CHIP_REDWOOD,
+ CHIP_JUNIPER,
+ CHIP_CYPRESS,
+ CHIP_HEMLOCK,
+ CHIP_PALM,
+ CHIP_SUMO,
+ CHIP_SUMO2,
+ CHIP_BARTS,
+ CHIP_TURKS,
+ CHIP_CAICOS,
+ CHIP_CAYMAN, /* GFX5 (Northern Islands) */
+ CHIP_ARUBA,
+ CHIP_TAHITI, /* GFX6 (Southern Islands) */
+ CHIP_PITCAIRN,
+ CHIP_VERDE,
+ CHIP_OLAND,
+ CHIP_HAINAN,
+ CHIP_BONAIRE, /* GFX7 (Sea Islands) */
+ CHIP_KAVERI,
+ CHIP_KABINI,
+ CHIP_HAWAII,
+ CHIP_TONGA, /* GFX8 (Volcanic Islands & Polaris) */
+ CHIP_ICELAND,
+ CHIP_CARRIZO,
+ CHIP_FIJI,
+ CHIP_STONEY,
+ CHIP_POLARIS10,
+ CHIP_POLARIS11,
+ CHIP_POLARIS12,
+ CHIP_VEGAM,
+ CHIP_VEGA10, /* GFX9 (Vega) */
+ CHIP_VEGA12,
+ CHIP_VEGA20,
+ CHIP_RAVEN,
+ CHIP_RAVEN2,
+ CHIP_RENOIR,
+ CHIP_ARCTURUS,
+ CHIP_NAVI10,
+ CHIP_NAVI12,
+ CHIP_NAVI14,
+ CHIP_SIENNA_CICHLID,
+ CHIP_NAVY_FLOUNDER,
+ CHIP_LAST,
};
-enum chip_class {
- CLASS_UNKNOWN = 0,
- R300,
- R400,
- R500,
- R600,
- R700,
- EVERGREEN,
- CAYMAN,
- GFX6,
- GFX7,
- GFX8,
- GFX9,
- GFX10,
- GFX10_3,
+enum chip_class
+{
+ CLASS_UNKNOWN = 0,
+ R300,
+ R400,
+ R500,
+ R600,
+ R700,
+ EVERGREEN,
+ CAYMAN,
+ GFX6,
+ GFX7,
+ GFX8,
+ GFX9,
+ GFX10,
+ GFX10_3,
};
-enum ring_type {
- RING_GFX = 0,
- RING_COMPUTE,
- RING_DMA,
- RING_UVD,
- RING_VCE,
- RING_UVD_ENC,
- RING_VCN_DEC,
- RING_VCN_ENC,
- RING_VCN_JPEG,
- NUM_RING_TYPES,
+enum ring_type
+{
+ RING_GFX = 0,
+ RING_COMPUTE,
+ RING_DMA,
+ RING_UVD,
+ RING_VCE,
+ RING_UVD_ENC,
+ RING_VCN_DEC,
+ RING_VCN_ENC,
+ RING_VCN_JPEG,
+ NUM_RING_TYPES,
};
#endif
//---------------------------------------------------------------------------//
// Sets val bits for specified mask in specified dst packed instance.
-#define AMD_HSA_BITS_SET(dst, mask, val) \
- dst &= (~(1 << mask ## _SHIFT) & ~mask); \
- dst |= (((val) << mask ## _SHIFT) & mask)
+#define AMD_HSA_BITS_SET(dst, mask, val) \
+ dst &= (~(1 << mask##_SHIFT) & ~mask); \
+ dst |= (((val) << mask##_SHIFT) & mask)
// Gets bits for specified mask from specified src packed instance.
-#define AMD_HSA_BITS_GET(src, mask) \
- ((src & mask) >> mask ## _SHIFT)
+#define AMD_HSA_BITS_GET(src, mask) ((src & mask) >> mask##_SHIFT)
/* Every amd_*_code_t has the following properties, which are composed of
* a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
* implementation defined in the C standard and so cannot be used to
* specify an ABI)
*/
-enum amd_code_property_mask_t {
-
- /* Enable the setup of the SGPR user data registers
- * (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
- * for initial register state.
- *
- * The total number of SGPRuser data registers requested must not
- * exceed 16. Any requests beyond 16 will be ignored.
- *
- * Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
- * SGPR user data registers enabled up to 16).
- */
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
- AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
- AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
-
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
-
- AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
- AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
- AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
-
- /* Control wave ID base counter for GDS ordered-append. Used to set
- * COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
- * ORDERED_APPEND_MODE also needs to be settable)
- */
- AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 16,
- AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
- AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS = ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
-
- /* The interleave (swizzle) element size in bytes required by the
- * code for private memory. This must be 2, 4, 8 or 16. This value
- * is provided to the finalizer when it is invoked and is recorded
- * here. The hardware will interleave the memory requests of each
- * lane of a wavefront by this element size to ensure each
- * work-item gets a distinct memory memory location. Therefore, the
- * finalizer ensures that all load and store operations done to
- * private memory do not exceed this size. For example, if the
- * element size is 4 (32-bits or dword) and a 64-bit value must be
- * loaded, the finalizer will generate two 32-bit loads. This
- * ensures that the interleaving will get the work-item
- * specific dword for both halves of the 64-bit value. If it just
- * did a 64-bit load then it would get one dword which belonged to
- * its own work-item, but the second dword would belong to the
- * adjacent lane work-item since the interleaving is in dwords.
- *
- * The value used must match the value that the runtime configures
- * the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
- * is generally DWORD.
- *
- * USE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
- */
- AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 17,
- AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
- AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
-
- /* Are global memory addresses 64 bits. Must match
- * amd_kernel_code_t.hsail_machine_model ==
- * HSA_MACHINE_LARGE. Must also match
- * SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
- * SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
- */
- AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 19,
- AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
-
- /* Indicate if the generated ISA is using a dynamically sized call
- * stack. This can happen if calls are implemented using a call
- * stack and recursion, alloca or calls to indirect functions are
- * present. In these cases the Finalizer cannot compute the total
- * private segment size at compile time. In this case the
- * workitem_private_segment_byte_size only specifies the statically
- * know private segment size, and additional space must be added
- * for the call stack.
- */
- AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 20,
- AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK = ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
-
- /* Indicate if code generated has support for debugging. */
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 21,
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
-
- AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 22,
- AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
- AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT,
-
- AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23,
- AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9,
- AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED2_SHIFT
+enum amd_code_property_mask_t
+{
+
+ /* Enable the setup of the SGPR user data registers
+ * (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t
+ * for initial register state.
+ *
+ * The total number of SGPRuser data registers requested must not
+ * exceed 16. Any requests beyond 16 will be ignored.
+ *
+ * Used to set COMPUTE_PGM_RSRC2.USER_SGPR (set to total count of
+ * SGPR user data registers enabled up to 16).
+ */
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT = 0,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT = 2,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT = 3,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT = 4,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT = 5,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT = 6,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT = 7,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT = 8,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT,
+
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT = 9,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
+
+ AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
+ AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
+ AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
+
+ /* Control wave ID base counter for GDS ordered-append. Used to set
+ * COMPUTE_DISPATCH_INITIATOR.ORDERED_APPEND_ENBL. (Not sure if
+ * ORDERED_APPEND_MODE also needs to be settable)
+ */
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT = 16,
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS =
+ ((1 << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT,
+
+ /* The interleave (swizzle) element size in bytes required by the
+ * code for private memory. This must be 2, 4, 8 or 16. This value
+ * is provided to the finalizer when it is invoked and is recorded
+ * here. The hardware will interleave the memory requests of each
+ * lane of a wavefront by this element size to ensure each
+ * work-item gets a distinct memory memory location. Therefore, the
+ * finalizer ensures that all load and store operations done to
+ * private memory do not exceed this size. For example, if the
+ * element size is 4 (32-bits or dword) and a 64-bit value must be
+ * loaded, the finalizer will generate two 32-bit loads. This
+ * ensures that the interleaving will get the work-item
+ * specific dword for both halves of the 64-bit value. If it just
+ * did a 64-bit load then it would get one dword which belonged to
+ * its own work-item, but the second dword would belong to the
+ * adjacent lane work-item since the interleaving is in dwords.
+ *
+ * The value used must match the value that the runtime configures
+ * the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
+ * is generally DWORD.
+ *
+ * USE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
+ */
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 17,
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE =
+ ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
+
+ /* Are global memory addresses 64 bits. Must match
+ * amd_kernel_code_t.hsail_machine_model ==
+ * HSA_MACHINE_LARGE. Must also match
+ * SH_MEM_CONFIG.PTR32 (GFX6 (SI)/GFX7 (CI)),
+ * SH_MEM_CONFIG.ADDRESS_MODE (GFX8 (VI)+).
+ */
+ AMD_CODE_PROPERTY_IS_PTR64_SHIFT = 19,
+ AMD_CODE_PROPERTY_IS_PTR64_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_PTR64 = ((1 << AMD_CODE_PROPERTY_IS_PTR64_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_IS_PTR64_SHIFT,
+
+ /* Indicate if the generated ISA is using a dynamically sized call
+ * stack. This can happen if calls are implemented using a call
+ * stack and recursion, alloca or calls to indirect functions are
+ * present. In these cases the Finalizer cannot compute the total
+ * private segment size at compile time. In this case the
+ * workitem_private_segment_byte_size only specifies the statically
+ * know private segment size, and additional space must be added
+ * for the call stack.
+ */
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT = 20,
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK =
+ ((1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT,
+
+ /* Indicate if code generated has support for debugging. */
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 21,
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
+
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 22,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
+ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT,
+
+ AMD_CODE_PROPERTY_RESERVED2_SHIFT = 23,
+ AMD_CODE_PROPERTY_RESERVED2_WIDTH = 9,
+ AMD_CODE_PROPERTY_RESERVED2 = ((1 << AMD_CODE_PROPERTY_RESERVED2_WIDTH) - 1)
+ << AMD_CODE_PROPERTY_RESERVED2_SHIFT
};
/* AMD Kernel Code Object (amd_kernel_code_t). GPU CP uses the AMD Kernel
*/
typedef struct amd_kernel_code_s {
- uint32_t amd_kernel_code_version_major;
- uint32_t amd_kernel_code_version_minor;
- uint16_t amd_machine_kind;
- uint16_t amd_machine_version_major;
- uint16_t amd_machine_version_minor;
- uint16_t amd_machine_version_stepping;
-
- /* Byte offset (possibly negative) from start of amd_kernel_code_t
- * object to kernel's entry point instruction. The actual code for
- * the kernel is required to be 256 byte aligned to match hardware
- * requirements (SQ cache line is 16). The code must be position
- * independent code (PIC) for AMD devices to give runtime the
- * option of copying code to discrete GPU memory or APU L2
- * cache. The Finalizer should endeavour to allocate all kernel
- * machine code in contiguous memory pages so that a device
- * pre-fetcher will tend to only pre-fetch Kernel Code objects,
- * improving cache performance.
- */
- int64_t kernel_code_entry_byte_offset;
-
- /* Range of bytes to consider prefetching expressed as an offset
- * and size. The offset is from the start (possibly negative) of
- * amd_kernel_code_t object. Set both to 0 if no prefetch
- * information is available.
- */
- int64_t kernel_code_prefetch_byte_offset;
- uint64_t kernel_code_prefetch_byte_size;
-
- /* Number of bytes of scratch backing memory required for full
- * occupancy of target chip. This takes into account the number of
- * bytes of scratch per work-item, the wavefront size, the maximum
- * number of wavefronts per CU, and the number of CUs. This is an
- * upper limit on scratch. If the grid being dispatched is small it
- * may only need less than this. If the kernel uses no scratch, or
- * the Finalizer has not computed this value, it must be 0.
- */
- uint64_t max_scratch_backing_memory_byte_size;
-
- /* Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
- * COMPUTE_PGM_RSRC2 registers.
- */
- uint64_t compute_pgm_resource_registers;
-
- /* Code properties. See amd_code_property_mask_t for a full list of
- * properties.
- */
- uint32_t code_properties;
-
- /* The amount of memory required for the combined private, spill
- * and arg segments for a work-item in bytes. If
- * is_dynamic_callstack is 1 then additional space must be added to
- * this value for the call stack.
- */
- uint32_t workitem_private_segment_byte_size;
-
- /* The amount of group segment memory required by a work-group in
- * bytes. This does not include any dynamically allocated group
- * segment memory that may be added when the kernel is
- * dispatched.
- */
- uint32_t workgroup_group_segment_byte_size;
-
- /* Number of byte of GDS required by kernel dispatch. Must be 0 if
- * not using GDS.
- */
- uint32_t gds_segment_byte_size;
-
- /* The size in bytes of the kernarg segment that holds the values
- * of the arguments to the kernel. This could be used by CP to
- * prefetch the kernarg segment pointed to by the dispatch packet.
- */
- uint64_t kernarg_segment_byte_size;
-
- /* Number of fbarrier's used in the kernel and all functions it
- * calls. If the implementation uses group memory to allocate the
- * fbarriers then that amount must already be included in the
- * workgroup_group_segment_byte_size total.
- */
- uint32_t workgroup_fbarrier_count;
-
- /* Number of scalar registers used by a wavefront. This includes
- * the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
- * and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
- * trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
- */
- uint16_t wavefront_sgpr_count;
-
- /* Number of vector registers used by each work-item. Used to set
- * COMPUTE_PGM_RSRC1.VGPRS.
- */
- uint16_t workitem_vgpr_count;
-
- /* If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
- * first fixed VGPR number reserved.
- */
- uint16_t reserved_vgpr_first;
-
- /* The number of consecutive VGPRs reserved by the client. If
- * is_debug_supported then this count includes VGPRs reserved
- * for debugger use.
- */
- uint16_t reserved_vgpr_count;
-
- /* If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
- * first fixed SGPR number reserved.
- */
- uint16_t reserved_sgpr_first;
-
- /* The number of consecutive SGPRs reserved by the client. If
- * is_debug_supported then this count includes SGPRs reserved
- * for debugger use.
- */
- uint16_t reserved_sgpr_count;
-
- /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
- * fixed SGPR number used to hold the wave scratch offset for the
- * entire kernel execution, or uint16_t(-1) if the register is not
- * used or not known.
- */
- uint16_t debug_wavefront_private_segment_offset_sgpr;
-
- /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
- * fixed SGPR number of the first of 4 SGPRs used to hold the
- * scratch V# used for the entire kernel execution, or uint16_t(-1)
- * if the registers are not used or not known.
- */
- uint16_t debug_private_segment_buffer_sgpr;
-
- /* The maximum byte alignment of variables used by the kernel in
- * the specified memory segment. Expressed as a power of two. Must
- * be at least HSA_POWERTWO_16.
- */
- uint8_t kernarg_segment_alignment;
- uint8_t group_segment_alignment;
- uint8_t private_segment_alignment;
-
- /* Wavefront size expressed as a power of two. Must be a power of 2
- * in range 1..64 inclusive. Used to support runtime query that
- * obtains wavefront size, which may be used by application to
- * allocated dynamic group memory and set the dispatch work-group
- * size.
- */
- uint8_t wavefront_size;
-
- int32_t call_convention;
- uint8_t reserved3[12];
- uint64_t runtime_loader_kernel_symbol;
- uint64_t control_directives[16];
+ uint32_t amd_kernel_code_version_major;
+ uint32_t amd_kernel_code_version_minor;
+ uint16_t amd_machine_kind;
+ uint16_t amd_machine_version_major;
+ uint16_t amd_machine_version_minor;
+ uint16_t amd_machine_version_stepping;
+
+ /* Byte offset (possibly negative) from start of amd_kernel_code_t
+ * object to kernel's entry point instruction. The actual code for
+ * the kernel is required to be 256 byte aligned to match hardware
+ * requirements (SQ cache line is 16). The code must be position
+ * independent code (PIC) for AMD devices to give runtime the
+ * option of copying code to discrete GPU memory or APU L2
+ * cache. The Finalizer should endeavour to allocate all kernel
+ * machine code in contiguous memory pages so that a device
+ * pre-fetcher will tend to only pre-fetch Kernel Code objects,
+ * improving cache performance.
+ */
+ int64_t kernel_code_entry_byte_offset;
+
+ /* Range of bytes to consider prefetching expressed as an offset
+ * and size. The offset is from the start (possibly negative) of
+ * amd_kernel_code_t object. Set both to 0 if no prefetch
+ * information is available.
+ */
+ int64_t kernel_code_prefetch_byte_offset;
+ uint64_t kernel_code_prefetch_byte_size;
+
+ /* Number of bytes of scratch backing memory required for full
+ * occupancy of target chip. This takes into account the number of
+ * bytes of scratch per work-item, the wavefront size, the maximum
+ * number of wavefronts per CU, and the number of CUs. This is an
+ * upper limit on scratch. If the grid being dispatched is small it
+ * may only need less than this. If the kernel uses no scratch, or
+ * the Finalizer has not computed this value, it must be 0.
+ */
+ uint64_t max_scratch_backing_memory_byte_size;
+
+ /* Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
+ * COMPUTE_PGM_RSRC2 registers.
+ */
+ uint64_t compute_pgm_resource_registers;
+
+ /* Code properties. See amd_code_property_mask_t for a full list of
+ * properties.
+ */
+ uint32_t code_properties;
+
+ /* The amount of memory required for the combined private, spill
+ * and arg segments for a work-item in bytes. If
+ * is_dynamic_callstack is 1 then additional space must be added to
+ * this value for the call stack.
+ */
+ uint32_t workitem_private_segment_byte_size;
+
+ /* The amount of group segment memory required by a work-group in
+ * bytes. This does not include any dynamically allocated group
+ * segment memory that may be added when the kernel is
+ * dispatched.
+ */
+ uint32_t workgroup_group_segment_byte_size;
+
+ /* Number of byte of GDS required by kernel dispatch. Must be 0 if
+ * not using GDS.
+ */
+ uint32_t gds_segment_byte_size;
+
+ /* The size in bytes of the kernarg segment that holds the values
+ * of the arguments to the kernel. This could be used by CP to
+ * prefetch the kernarg segment pointed to by the dispatch packet.
+ */
+ uint64_t kernarg_segment_byte_size;
+
+ /* Number of fbarrier's used in the kernel and all functions it
+ * calls. If the implementation uses group memory to allocate the
+ * fbarriers then that amount must already be included in the
+ * workgroup_group_segment_byte_size total.
+ */
+ uint32_t workgroup_fbarrier_count;
+
+ /* Number of scalar registers used by a wavefront. This includes
+ * the special SGPRs for VCC, Flat Scratch Base, Flat Scratch Size
+ * and XNACK (for GFX8 (VI)). It does not include the 16 SGPR added if a
+ * trap handler is enabled. Used to set COMPUTE_PGM_RSRC1.SGPRS.
+ */
+ uint16_t wavefront_sgpr_count;
+
+ /* Number of vector registers used by each work-item. Used to set
+ * COMPUTE_PGM_RSRC1.VGPRS.
+ */
+ uint16_t workitem_vgpr_count;
+
+ /* If reserved_vgpr_count is 0 then must be 0. Otherwise, this is the
+ * first fixed VGPR number reserved.
+ */
+ uint16_t reserved_vgpr_first;
+
+ /* The number of consecutive VGPRs reserved by the client. If
+ * is_debug_supported then this count includes VGPRs reserved
+ * for debugger use.
+ */
+ uint16_t reserved_vgpr_count;
+
+ /* If reserved_sgpr_count is 0 then must be 0. Otherwise, this is the
+ * first fixed SGPR number reserved.
+ */
+ uint16_t reserved_sgpr_first;
+
+ /* The number of consecutive SGPRs reserved by the client. If
+ * is_debug_supported then this count includes SGPRs reserved
+ * for debugger use.
+ */
+ uint16_t reserved_sgpr_count;
+
+ /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
+ * fixed SGPR number used to hold the wave scratch offset for the
+ * entire kernel execution, or uint16_t(-1) if the register is not
+ * used or not known.
+ */
+ uint16_t debug_wavefront_private_segment_offset_sgpr;
+
+ /* If is_debug_supported is 0 then must be 0. Otherwise, this is the
+ * fixed SGPR number of the first of 4 SGPRs used to hold the
+ * scratch V# used for the entire kernel execution, or uint16_t(-1)
+ * if the registers are not used or not known.
+ */
+ uint16_t debug_private_segment_buffer_sgpr;
+
+ /* The maximum byte alignment of variables used by the kernel in
+ * the specified memory segment. Expressed as a power of two. Must
+ * be at least HSA_POWERTWO_16.
+ */
+ uint8_t kernarg_segment_alignment;
+ uint8_t group_segment_alignment;
+ uint8_t private_segment_alignment;
+
+ /* Wavefront size expressed as a power of two. Must be a power of 2
+ * in range 1..64 inclusive. Used to support runtime query that
+ * obtains wavefront size, which may be used by application to
+ * allocated dynamic group memory and set the dispatch work-group
+ * size.
+ */
+ uint8_t wavefront_size;
+
+ int32_t call_convention;
+ uint8_t reserved3[12];
+ uint64_t runtime_loader_kernel_symbol;
+ uint64_t control_directives[16];
} amd_kernel_code_t;
#endif // AMDKERNELCODET_H
#ifndef GFX10_FORMAT_TABLE_H
#define GFX10_FORMAT_TABLE_H
-#include <stdbool.h>
#include "pipe/p_format.h"
+#include <stdbool.h>
+
struct gfx10_format {
- unsigned img_format:9;
+ unsigned img_format : 9;
- /* Various formats are only supported with workarounds for vertex fetch,
- * and some 32_32_32 formats are supported natively, but only for buffers
- * (possibly with some image support, actually, but no filtering). */
- bool buffers_only:1;
+ /* Various formats are only supported with workarounds for vertex fetch,
+ * and some 32_32_32 formats are supported natively, but only for buffers
+ * (possibly with some image support, actually, but no filtering). */
+ bool buffers_only : 1;
};
extern const struct gfx10_format gfx10_format_table[PIPE_FORMAT_COUNT];
#include "amdgfxregs.h"
/* si values */
-#define SI_CONFIG_REG_OFFSET 0x00008000
-#define SI_CONFIG_REG_END 0x0000B000
-#define SI_SH_REG_OFFSET 0x0000B000
-#define SI_SH_REG_END 0x0000C000
-#define SI_CONTEXT_REG_OFFSET 0x00028000
-#define SI_CONTEXT_REG_END 0x00030000
-#define CIK_UCONFIG_REG_OFFSET 0x00030000
-#define CIK_UCONFIG_REG_END 0x00040000
-#define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
-#define SI_UCONFIG_PERF_REG_END 0x00038000
+#define SI_CONFIG_REG_OFFSET 0x00008000
+#define SI_CONFIG_REG_END 0x0000B000
+#define SI_SH_REG_OFFSET 0x0000B000
+#define SI_SH_REG_END 0x0000C000
+#define SI_CONTEXT_REG_OFFSET 0x00028000
+#define SI_CONTEXT_REG_END 0x00030000
+#define CIK_UCONFIG_REG_OFFSET 0x00030000
+#define CIK_UCONFIG_REG_END 0x00040000
+#define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
+#define SI_UCONFIG_PERF_REG_END 0x00038000
/* For register shadowing: */
-#define SI_SH_REG_SPACE_SIZE (SI_SH_REG_END - SI_SH_REG_OFFSET)
-#define SI_CONTEXT_REG_SPACE_SIZE (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
-#define SI_UCONFIG_REG_SPACE_SIZE (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
-#define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
+#define SI_SH_REG_SPACE_SIZE (SI_SH_REG_END - SI_SH_REG_OFFSET)
+#define SI_CONTEXT_REG_SPACE_SIZE (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
+#define SI_UCONFIG_REG_SPACE_SIZE (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
+#define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
-#define SI_SHADOWED_SH_REG_OFFSET 0
-#define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
-#define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
-#define SI_SHADOWED_REG_BUFFER_SIZE (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + \
- SI_UCONFIG_REG_SPACE_SIZE)
+#define SI_SHADOWED_SH_REG_OFFSET 0
+#define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
+#define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
+#define SI_SHADOWED_REG_BUFFER_SIZE \
+ (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)
#define EVENT_TYPE_CACHE_FLUSH 0x6
-#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
+#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
-#define EVENT_TYPE_ZPASS_DONE 0x15
-#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
-#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
-#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
-#define EVENT_TYPE(x) ((x) << 0)
-#define EVENT_INDEX(x) ((x) << 8)
- /* 0 - any non-TS event
- * 1 - ZPASS_DONE
- * 2 - SAMPLE_PIPELINESTAT
- * 3 - SAMPLE_STREAMOUTSTAT*
- * 4 - *S_PARTIAL_FLUSH
- * 5 - TS events
- */
+#define EVENT_TYPE_ZPASS_DONE 0x15
+#define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16
+#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f
+#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
+#define EVENT_TYPE(x) ((x) << 0)
+#define EVENT_INDEX(x) ((x) << 8)
+/* 0 - any non-TS event
+ * 1 - ZPASS_DONE
+ * 2 - SAMPLE_PIPELINESTAT
+ * 3 - SAMPLE_STREAMOUTSTAT*
+ * 4 - *S_PARTIAL_FLUSH
+ * 5 - TS events
+ */
/* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
-#define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
-#define EVENT_TC_VOL_ACTION_ENA (1 << 13)
-#define EVENT_TC_WB_ACTION_ENA (1 << 15)
-#define EVENT_TCL1_ACTION_ENA (1 << 16)
-#define EVENT_TC_ACTION_ENA (1 << 17)
-#define EVENT_TC_NC_ACTION_ENA (1 << 19) /* GFX9+ */
-#define EVENT_TC_WC_ACTION_ENA (1 << 20) /* GFX9+ */
-#define EVENT_TC_MD_ACTION_ENA (1 << 21) /* GFX9+ */
-
+#define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
+#define EVENT_TC_VOL_ACTION_ENA (1 << 13)
+#define EVENT_TC_WB_ACTION_ENA (1 << 15)
+#define EVENT_TCL1_ACTION_ENA (1 << 16)
+#define EVENT_TC_ACTION_ENA (1 << 17)
+#define EVENT_TC_NC_ACTION_ENA (1 << 19) /* GFX9+ */
+#define EVENT_TC_WC_ACTION_ENA (1 << 20) /* GFX9+ */
+#define EVENT_TC_MD_ACTION_ENA (1 << 21) /* GFX9+ */
-#define PREDICATION_OP_CLEAR 0x0
-#define PREDICATION_OP_ZPASS 0x1
+#define PREDICATION_OP_CLEAR 0x0
+#define PREDICATION_OP_ZPASS 0x1
#define PREDICATION_OP_PRIMCOUNT 0x2
-#define PREDICATION_OP_BOOL64 0x3
+#define PREDICATION_OP_BOOL64 0x3
#define PRED_OP(x) ((x) << 16)
#define PREDICATION_CONTINUE (1 << 31)
-#define PREDICATION_HINT_WAIT (0 << 12)
+#define PREDICATION_HINT_WAIT (0 << 12)
#define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
#define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
-#define PREDICATION_DRAW_VISIBLE (1 << 8)
+#define PREDICATION_DRAW_VISIBLE (1 << 8)
-#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
+#define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
/* All registers defined in this packet section don't exist and the only
* purpose of these definitions is to define packet encoding that
* the IB parser understands, and also to have an accurate documentation.
*/
-#define PKT3_NOP 0x10
-#define PKT3_SET_BASE 0x11
-#define PKT3_CLEAR_STATE 0x12
-#define PKT3_INDEX_BUFFER_SIZE 0x13
-#define PKT3_DISPATCH_DIRECT 0x15
-#define PKT3_DISPATCH_INDIRECT 0x16
-#define PKT3_OCCLUSION_QUERY 0x1F /* new for CIK */
-#define PKT3_SET_PREDICATION 0x20
-#define PKT3_COND_EXEC 0x22
-#define PKT3_PRED_EXEC 0x23
-#define PKT3_DRAW_INDIRECT 0x24
-#define PKT3_DRAW_INDEX_INDIRECT 0x25
-#define PKT3_INDEX_BASE 0x26
-#define PKT3_DRAW_INDEX_2 0x27
-#define PKT3_CONTEXT_CONTROL 0x28
-#define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x) & 0x1) << 0)
-#define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x) & 0x1) << 1)
-#define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x) & 0x1) << 15)
-#define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x) & 0x1) << 16)
-#define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x) & 0x1) << 24)
-#define CC0_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
-#define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x) & 0x1) << 31)
-#define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x) & 0x1) << 0)
-#define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x) & 0x1) << 1)
-#define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x) & 0x1) << 15)
-#define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x) & 0x1) << 16)
-#define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x) & 0x1) << 24)
-#define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x) & 0x1) << 31)
-#define PKT3_INDEX_TYPE 0x2A /* not on GFX9 */
-#define PKT3_DRAW_INDIRECT_MULTI 0x2C
-#define R_2C3_DRAW_INDEX_LOC 0x2C3
-#define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x) & 0x1) << 30)
-#define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
-#define PKT3_DRAW_INDEX_AUTO 0x2D
-#define PKT3_DRAW_INDEX_IMMD 0x2E /* not on CIK */
-#define PKT3_NUM_INSTANCES 0x2F
-#define PKT3_DRAW_INDEX_MULTI_AUTO 0x30
-#define PKT3_INDIRECT_BUFFER_SI 0x32 /* not on CIK */
-#define PKT3_INDIRECT_BUFFER_CONST 0x33
-#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
-#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
-#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x) & 0x3) << 1)
-#define STRMOUT_OFFSET_FROM_PACKET 0
-#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
-#define STRMOUT_OFFSET_FROM_MEM 2
-#define STRMOUT_OFFSET_NONE 3
-#define STRMOUT_DATA_TYPE(x) (((unsigned)(x) & 0x1) << 7)
-#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8)
-#define PKT3_DRAW_INDEX_OFFSET_2 0x35
-#define PKT3_WRITE_DATA 0x37
-#define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38
-#define PKT3_MEM_SEMAPHORE 0x39
-#define PKT3_MPEG_INDEX 0x3A /* not on CIK */
-#define PKT3_WAIT_REG_MEM 0x3C
-#define WAIT_REG_MEM_EQUAL 3
-#define WAIT_REG_MEM_NOT_EQUAL 4
-#define WAIT_REG_MEM_GREATER_OR_EQUAL 5
-#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
-#define WAIT_REG_MEM_PFP (1 << 8)
-#define PKT3_MEM_WRITE 0x3D /* not on CIK */
-#define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */
+#define PKT3_NOP 0x10
+#define PKT3_SET_BASE 0x11
+#define PKT3_CLEAR_STATE 0x12
+#define PKT3_INDEX_BUFFER_SIZE 0x13
+#define PKT3_DISPATCH_DIRECT 0x15
+#define PKT3_DISPATCH_INDIRECT 0x16
+#define PKT3_OCCLUSION_QUERY 0x1F /* new for CIK */
+#define PKT3_SET_PREDICATION 0x20
+#define PKT3_COND_EXEC 0x22
+#define PKT3_PRED_EXEC 0x23
+#define PKT3_DRAW_INDIRECT 0x24
+#define PKT3_DRAW_INDEX_INDIRECT 0x25
+#define PKT3_INDEX_BASE 0x26
+#define PKT3_DRAW_INDEX_2 0x27
+#define PKT3_CONTEXT_CONTROL 0x28
+#define CC0_LOAD_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0)
+#define CC0_LOAD_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1)
+#define CC0_LOAD_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15)
+#define CC0_LOAD_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16)
+#define CC0_LOAD_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24)
+#define CC0_LOAD_CE_RAM(x) (((unsigned)(x)&0x1) << 28)
+#define CC0_UPDATE_LOAD_ENABLES(x) (((unsigned)(x)&0x1) << 31)
+#define CC1_SHADOW_GLOBAL_CONFIG(x) (((unsigned)(x)&0x1) << 0)
+#define CC1_SHADOW_PER_CONTEXT_STATE(x) (((unsigned)(x)&0x1) << 1)
+#define CC1_SHADOW_GLOBAL_UCONFIG(x) (((unsigned)(x)&0x1) << 15)
+#define CC1_SHADOW_GFX_SH_REGS(x) (((unsigned)(x)&0x1) << 16)
+#define CC1_SHADOW_CS_SH_REGS(x) (((unsigned)(x)&0x1) << 24)
+#define CC1_UPDATE_SHADOW_ENABLES(x) (((unsigned)(x)&0x1) << 31)
+#define PKT3_INDEX_TYPE 0x2A /* not on GFX9 */
+#define PKT3_DRAW_INDIRECT_MULTI 0x2C
+#define R_2C3_DRAW_INDEX_LOC 0x2C3
+#define S_2C3_COUNT_INDIRECT_ENABLE(x) (((unsigned)(x)&0x1) << 30)
+#define S_2C3_DRAW_INDEX_ENABLE(x) (((unsigned)(x)&0x1) << 31)
+#define PKT3_DRAW_INDEX_AUTO 0x2D
+#define PKT3_DRAW_INDEX_IMMD 0x2E /* not on CIK */
+#define PKT3_NUM_INSTANCES 0x2F
+#define PKT3_DRAW_INDEX_MULTI_AUTO 0x30
+#define PKT3_INDIRECT_BUFFER_SI 0x32 /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CONST 0x33
+#define PKT3_STRMOUT_BUFFER_UPDATE 0x34
+#define STRMOUT_STORE_BUFFER_FILLED_SIZE 1
+#define STRMOUT_OFFSET_SOURCE(x) (((unsigned)(x)&0x3) << 1)
+#define STRMOUT_OFFSET_FROM_PACKET 0
+#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
+#define STRMOUT_OFFSET_FROM_MEM 2
+#define STRMOUT_OFFSET_NONE 3
+#define STRMOUT_DATA_TYPE(x) (((unsigned)(x)&0x1) << 7)
+#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x)&0x3) << 8)
+#define PKT3_DRAW_INDEX_OFFSET_2 0x35
+#define PKT3_WRITE_DATA 0x37
+#define PKT3_DRAW_INDEX_INDIRECT_MULTI 0x38
+#define PKT3_MEM_SEMAPHORE 0x39
+#define PKT3_MPEG_INDEX 0x3A /* not on CIK */
+#define PKT3_WAIT_REG_MEM 0x3C
+#define WAIT_REG_MEM_EQUAL 3
+#define WAIT_REG_MEM_NOT_EQUAL 4
+#define WAIT_REG_MEM_GREATER_OR_EQUAL 5
+#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x)&0x3) << 4)
+#define WAIT_REG_MEM_PFP (1 << 8)
+#define PKT3_MEM_WRITE 0x3D /* not on CIK */
+#define PKT3_INDIRECT_BUFFER_CIK 0x3F /* new on CIK */
-#define PKT3_COPY_DATA 0x40
-#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
-#define COPY_DATA_REG 0
-#define COPY_DATA_SRC_MEM 1 /* only valid as source */
-#define COPY_DATA_TC_L2 2
-#define COPY_DATA_GDS 3
-#define COPY_DATA_PERF 4
-#define COPY_DATA_IMM 5
-#define COPY_DATA_TIMESTAMP 9
-#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
-#define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, deprecated */
-#define COPY_DATA_TC_L2 2
-#define COPY_DATA_GDS 3
-#define COPY_DATA_PERF 4
-#define COPY_DATA_DST_MEM 5
-#define COPY_DATA_COUNT_SEL (1 << 16)
-#define COPY_DATA_WR_CONFIRM (1 << 20)
-#define COPY_DATA_ENGINE_PFP (1 << 30)
-#define PKT3_PFP_SYNC_ME 0x42
+#define PKT3_COPY_DATA 0x40
+#define COPY_DATA_SRC_SEL(x) ((x)&0xf)
+#define COPY_DATA_REG 0
+#define COPY_DATA_SRC_MEM 1 /* only valid as source */
+#define COPY_DATA_TC_L2 2
+#define COPY_DATA_GDS 3
+#define COPY_DATA_PERF 4
+#define COPY_DATA_IMM 5
+#define COPY_DATA_TIMESTAMP 9
+#define COPY_DATA_DST_SEL(x) (((unsigned)(x)&0xf) << 8)
+#define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM, deprecated */
+#define COPY_DATA_TC_L2 2
+#define COPY_DATA_GDS 3
+#define COPY_DATA_PERF 4
+#define COPY_DATA_DST_MEM 5
+#define COPY_DATA_COUNT_SEL (1 << 16)
+#define COPY_DATA_WR_CONFIRM (1 << 20)
+#define COPY_DATA_ENGINE_PFP (1 << 30)
+#define PKT3_PFP_SYNC_ME 0x42
#define PKT3_SURFACE_SYNC 0x43 /* deprecated on CIK, use ACQUIRE_MEM */
#define PKT3_ME_INITIALIZE 0x44 /* not on CIK */
#define PKT3_COND_WRITE 0x45
#define PKT3_EVENT_WRITE 0x46
#define PKT3_EVENT_WRITE_EOP 0x47 /* not on GFX9 */
-#define EOP_DST_SEL(x) ((x) << 16)
-#define EOP_DST_SEL_MEM 0
-#define EOP_DST_SEL_TC_L2 1
-#define EOP_INT_SEL(x) ((x) << 24)
-#define EOP_INT_SEL_NONE 0
-#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
-#define EOP_DATA_SEL(x) ((x) << 29)
-#define EOP_DATA_SEL_DISCARD 0
-#define EOP_DATA_SEL_VALUE_32BIT 1
-#define EOP_DATA_SEL_VALUE_64BIT 2
-#define EOP_DATA_SEL_TIMESTAMP 3
-#define EOP_DATA_SEL_GDS 5
-#define EOP_DATA_GDS(dw_offset, num_dwords) ((dw_offset) | ((unsigned)(num_dwords) << 16))
+#define EOP_DST_SEL(x) ((x) << 16)
+#define EOP_DST_SEL_MEM 0
+#define EOP_DST_SEL_TC_L2 1
+#define EOP_INT_SEL(x) ((x) << 24)
+#define EOP_INT_SEL_NONE 0
+#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
+#define EOP_DATA_SEL(x) ((x) << 29)
+#define EOP_DATA_SEL_DISCARD 0
+#define EOP_DATA_SEL_VALUE_32BIT 1
+#define EOP_DATA_SEL_VALUE_64BIT 2
+#define EOP_DATA_SEL_TIMESTAMP 3
+#define EOP_DATA_SEL_GDS 5
+#define EOP_DATA_GDS(dw_offset, num_dwords) ((dw_offset) | ((unsigned)(num_dwords) << 16))
/* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
* are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
* DST_SEL=MC. Only CIK chips are affected.
*/
/* fix CP DMA before uncommenting: */
/*#define PKT3_EVENT_WRITE_EOS 0x48*/ /* not on GFX9 */
-#define PKT3_RELEASE_MEM 0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
-#define PKT3_CONTEXT_REG_RMW 0x51 /* older firmware versions on older chips don't have this */
-#define PKT3_ONE_REG_WRITE 0x57 /* not on CIK */
-#define PKT3_ACQUIRE_MEM 0x58 /* new for CIK */
-#define PKT3_REWIND 0x59 /* VI+ [any ring] or CIK [compute ring only] */
-#define PKT3_LOAD_UCONFIG_REG 0x5E /* GFX7+ */
-#define PKT3_LOAD_SH_REG 0x5F
-#define PKT3_LOAD_CONTEXT_REG 0x61
-#define PKT3_SET_CONFIG_REG 0x68
-#define PKT3_SET_CONTEXT_REG 0x69
-#define PKT3_SET_SH_REG 0x76
-#define PKT3_SET_SH_REG_OFFSET 0x77
-#define PKT3_SET_UCONFIG_REG 0x79 /* new for CIK */
-#define PKT3_SET_UCONFIG_REG_INDEX 0x7A /* new for GFX9, CP ucode version >= 26 */
-#define PKT3_LOAD_CONST_RAM 0x80
-#define PKT3_WRITE_CONST_RAM 0x81
-#define PKT3_DUMP_CONST_RAM 0x83
-#define PKT3_INCREMENT_CE_COUNTER 0x84
-#define PKT3_INCREMENT_DE_COUNTER 0x85
-#define PKT3_WAIT_ON_CE_COUNTER 0x86
-#define PKT3_SET_SH_REG_INDEX 0x9B
-#define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */
+#define PKT3_RELEASE_MEM 0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
+#define PKT3_CONTEXT_REG_RMW 0x51 /* older firmware versions on older chips don't have this */
+#define PKT3_ONE_REG_WRITE 0x57 /* not on CIK */
+#define PKT3_ACQUIRE_MEM 0x58 /* new for CIK */
+#define PKT3_REWIND 0x59 /* VI+ [any ring] or CIK [compute ring only] */
+#define PKT3_LOAD_UCONFIG_REG 0x5E /* GFX7+ */
+#define PKT3_LOAD_SH_REG 0x5F
+#define PKT3_LOAD_CONTEXT_REG 0x61
+#define PKT3_SET_CONFIG_REG 0x68
+#define PKT3_SET_CONTEXT_REG 0x69
+#define PKT3_SET_SH_REG 0x76
+#define PKT3_SET_SH_REG_OFFSET 0x77
+#define PKT3_SET_UCONFIG_REG 0x79 /* new for CIK */
+#define PKT3_SET_UCONFIG_REG_INDEX 0x7A /* new for GFX9, CP ucode version >= 26 */
+#define PKT3_LOAD_CONST_RAM 0x80
+#define PKT3_WRITE_CONST_RAM 0x81
+#define PKT3_DUMP_CONST_RAM 0x83
+#define PKT3_INCREMENT_CE_COUNTER 0x84
+#define PKT3_INCREMENT_DE_COUNTER 0x85
+#define PKT3_WAIT_ON_CE_COUNTER 0x86
+#define PKT3_SET_SH_REG_INDEX 0x9B
+#define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */
-#define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30)
-#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
-#define PKT_TYPE_C 0x3FFFFFFF
-#define PKT_COUNT_S(x) (((unsigned)(x) & 0x3FFF) << 16)
-#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
-#define PKT_COUNT_C 0xC000FFFF
-#define PKT0_BASE_INDEX_S(x) (((unsigned)(x) & 0xFFFF) << 0)
-#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
-#define PKT0_BASE_INDEX_C 0xFFFF0000
-#define PKT3_IT_OPCODE_S(x) (((unsigned)(x) & 0xFF) << 8)
-#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
-#define PKT3_IT_OPCODE_C 0xFFFF00FF
-#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
-#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x) & 0x1) << 1)
-#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
-#define PKT3(op, count, predicate) (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
+#define PKT_TYPE_S(x) (((unsigned)(x)&0x3) << 30)
+#define PKT_TYPE_G(x) (((x) >> 30) & 0x3)
+#define PKT_TYPE_C 0x3FFFFFFF
+#define PKT_COUNT_S(x) (((unsigned)(x)&0x3FFF) << 16)
+#define PKT_COUNT_G(x) (((x) >> 16) & 0x3FFF)
+#define PKT_COUNT_C 0xC000FFFF
+#define PKT0_BASE_INDEX_S(x) (((unsigned)(x)&0xFFFF) << 0)
+#define PKT0_BASE_INDEX_G(x) (((x) >> 0) & 0xFFFF)
+#define PKT0_BASE_INDEX_C 0xFFFF0000
+#define PKT3_IT_OPCODE_S(x) (((unsigned)(x)&0xFF) << 8)
+#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
+#define PKT3_IT_OPCODE_C 0xFFFF00FF
+#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
+#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
+#define PKT0(index, count) (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
+#define PKT3(op, count, predicate) \
+ (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
-#define PKT2_NOP_PAD PKT_TYPE_S(2)
-#define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
+#define PKT2_NOP_PAD PKT_TYPE_S(2)
+#define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
-#define PKT3_CP_DMA 0x41
+#define PKT3_CP_DMA 0x41
/* 1. header
* 2. SRC_ADDR_LO [31:0] or DATA [31:0]
* 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
* 6. COMMAND [29:22] | BYTE_COUNT [20:0]
*/
-#define PKT3_DMA_DATA 0x50 /* new for CIK */
+#define PKT3_DMA_DATA 0x50 /* new for CIK */
/* 1. header
* 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
* 2. SRC_ADDR_LO [31:0] or DATA [31:0]
*/
/* SI async DMA packets */
-#define SI_DMA_PACKET(cmd, sub_cmd, n) ((((unsigned)(cmd) & 0xF) << 28) | \
- (((unsigned)(sub_cmd) & 0xFF) << 20) |\
- (((unsigned)(n) & 0xFFFFF) << 0))
+#define SI_DMA_PACKET(cmd, sub_cmd, n) \
+ ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) | \
+ (((unsigned)(n)&0xFFFFF) << 0))
/* SI async DMA Packet types */
-#define SI_DMA_PACKET_WRITE 0x2
-#define SI_DMA_PACKET_COPY 0x3
-#define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
+#define SI_DMA_PACKET_WRITE 0x2
+#define SI_DMA_PACKET_COPY 0x3
+#define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
/* The documentation says 0xffff8 is the maximum size in dwords, which is
* 0x3fffe0 in bytes. */
-#define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
-#define SI_DMA_COPY_DWORD_ALIGNED 0x00
-#define SI_DMA_COPY_BYTE_ALIGNED 0x40
-#define SI_DMA_COPY_TILED 0x8
-#define SI_DMA_PACKET_INDIRECT_BUFFER 0x4
-#define SI_DMA_PACKET_SEMAPHORE 0x5
-#define SI_DMA_PACKET_FENCE 0x6
-#define SI_DMA_PACKET_TRAP 0x7
-#define SI_DMA_PACKET_SRBM_WRITE 0x9
-#define SI_DMA_PACKET_CONSTANT_FILL 0xd
-#define SI_DMA_PACKET_NOP 0xf
+#define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
+#define SI_DMA_COPY_DWORD_ALIGNED 0x00
+#define SI_DMA_COPY_BYTE_ALIGNED 0x40
+#define SI_DMA_COPY_TILED 0x8
+#define SI_DMA_PACKET_INDIRECT_BUFFER 0x4
+#define SI_DMA_PACKET_SEMAPHORE 0x5
+#define SI_DMA_PACKET_FENCE 0x6
+#define SI_DMA_PACKET_TRAP 0x7
+#define SI_DMA_PACKET_SRBM_WRITE 0x9
+#define SI_DMA_PACKET_CONSTANT_FILL 0xd
+#define SI_DMA_PACKET_NOP 0xf
/* CIK async DMA packets */
-#define CIK_SDMA_PACKET(op, sub_op, n) ((((unsigned)(n) & 0xFFFF) << 16) | \
- (((unsigned)(sub_op) & 0xFF) << 8) | \
- (((unsigned)(op) & 0xFF) << 0))
+#define CIK_SDMA_PACKET(op, sub_op, n) \
+ ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) | \
+ (((unsigned)(op)&0xFF) << 0))
/* CIK async DMA packet types */
-#define CIK_SDMA_OPCODE_NOP 0x0
-#define CIK_SDMA_OPCODE_COPY 0x1
-#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0
-#define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1
-#define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3
-#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
-#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5
-#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6
-#define CIK_SDMA_OPCODE_WRITE 0x2
-#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0
-#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1
-#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4
-#define CIK_SDMA_PACKET_FENCE 0x5
-#define CIK_SDMA_PACKET_TRAP 0x6
-#define CIK_SDMA_PACKET_SEMAPHORE 0x7
-#define CIK_SDMA_PACKET_CONSTANT_FILL 0xb
-#define CIK_SDMA_OPCODE_TIMESTAMP 0xd
-#define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP 0x0
-#define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1
-#define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP 0x2
-#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
+#define CIK_SDMA_OPCODE_NOP 0x0
+#define CIK_SDMA_OPCODE_COPY 0x1
+#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR 0x0
+#define CIK_SDMA_COPY_SUB_OPCODE_TILED 0x1
+#define CIK_SDMA_COPY_SUB_OPCODE_SOA 0x3
+#define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
+#define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW 0x5
+#define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW 0x6
+#define CIK_SDMA_OPCODE_WRITE 0x2
+#define SDMA_WRITE_SUB_OPCODE_LINEAR 0x0
+#define SDMA_WRTIE_SUB_OPCODE_TILED 0x1
+#define CIK_SDMA_OPCODE_INDIRECT_BUFFER 0x4
+#define CIK_SDMA_PACKET_FENCE 0x5
+#define CIK_SDMA_PACKET_TRAP 0x6
+#define CIK_SDMA_PACKET_SEMAPHORE 0x7
+#define CIK_SDMA_PACKET_CONSTANT_FILL 0xb
+#define CIK_SDMA_OPCODE_TIMESTAMP 0xd
+#define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP 0x0
+#define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP 0x1
+#define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP 0x2
+#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
/* There is apparently an undocumented HW limitation that
prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */
-#define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 /* almost 4 MB*/
-#define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */
+#define CIK_SDMA_COPY_MAX_SIZE 0x3fff00 /* almost 4 MB*/
+#define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */
-enum amd_cmp_class_flags {
- S_NAN = 1 << 0, // Signaling NaN
- Q_NAN = 1 << 1, // Quiet NaN
- N_INFINITY = 1 << 2, // Negative infinity
- N_NORMAL = 1 << 3, // Negative normal
- N_SUBNORMAL = 1 << 4, // Negative subnormal
- N_ZERO = 1 << 5, // Negative zero
- P_ZERO = 1 << 6, // Positive zero
- P_SUBNORMAL = 1 << 7, // Positive subnormal
- P_NORMAL = 1 << 8, // Positive normal
- P_INFINITY = 1 << 9 // Positive infinity
+enum amd_cmp_class_flags
+{
+ S_NAN = 1 << 0, // Signaling NaN
+ Q_NAN = 1 << 1, // Quiet NaN
+ N_INFINITY = 1 << 2, // Negative infinity
+ N_NORMAL = 1 << 3, // Negative normal
+ N_SUBNORMAL = 1 << 4, // Negative subnormal
+ N_ZERO = 1 << 5, // Negative zero
+ P_ZERO = 1 << 6, // Positive zero
+ P_SUBNORMAL = 1 << 7, // Positive subnormal
+ P_NORMAL = 1 << 8, // Positive normal
+ P_INFINITY = 1 << 9 // Positive infinity
};
#endif /* _SID_H */