UPDATE_COUNTER(gui, GUI_ACTIVE);
gui_busy = GUI_ACTIVE(value);
- if (rscreen->chip_class == CIK || rscreen->chip_class == VI) {
- /* SRBM_STATUS2 */
- rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value);
-
- UPDATE_COUNTER(sdma, SDMA_BUSY);
- sdma_busy = SDMA_BUSY(value);
- }
-
- if (rscreen->chip_class >= VI) {
- /* CP_STAT */
- rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value);
-
- UPDATE_COUNTER(pfp, PFP_BUSY);
- UPDATE_COUNTER(meq, MEQ_BUSY);
- UPDATE_COUNTER(me, ME_BUSY);
- UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY);
- UPDATE_COUNTER(cp_dma, DMA_BUSY);
- UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY);
- }
-
value = gui_busy || sdma_busy;
UPDATE_COUNTER(gpu, IDENTITY);
}
event_flags;
unsigned sel = EOP_DATA_SEL(data_sel);
- /* Wait for write confirmation before writing data, but don't send
- * an interrupt. */
- if (ctx->chip_class >= SI && data_sel != EOP_DATA_SEL_DISCARD)
- sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
-
- if (ctx->chip_class >= GFX9) {
- /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
- * counters) must immediately precede every timestamp event to
- * prevent a GPU hang on GFX9.
- *
- * Occlusion queries don't need to do it here, because they
- * always do ZPASS_DONE before the timestamp.
- */
- if (ctx->chip_class == GFX9 &&
- query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
- query_type != PIPE_QUERY_OCCLUSION_PREDICATE) {
- struct r600_resource *scratch = ctx->eop_bug_scratch;
-
- assert(16 * ctx->screen->info.num_render_backends <=
- scratch->b.b.width0);
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, scratch->gpu_address);
- radeon_emit(cs, scratch->gpu_address >> 32);
-
- radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
- RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
- }
-
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, sel);
- radeon_emit(cs, va); /* address lo */
- radeon_emit(cs, va >> 32); /* address hi */
- radeon_emit(cs, new_fence); /* immediate data lo */
- radeon_emit(cs, 0); /* immediate data hi */
- radeon_emit(cs, 0); /* unused */
- } else {
- if (ctx->chip_class == CIK ||
- ctx->chip_class == VI) {
- struct r600_resource *scratch = ctx->eop_bug_scratch;
- uint64_t va = scratch->gpu_address;
-
- /* Two EOP events are required to make all engines go idle
- * (and optional cache flushes executed) before the timestamp
- * is written.
- */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
- radeon_emit(cs, 0); /* immediate data */
- radeon_emit(cs, 0); /* unused */
-
- radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
- RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
- radeon_emit(cs, new_fence); /* immediate data */
- radeon_emit(cs, 0); /* unused */
- }
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+ radeon_emit(cs, new_fence); /* immediate data */
+ radeon_emit(cs, 0); /* unused */
if (buf)
r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE,
{
unsigned dwords = 6;
- if (screen->chip_class == CIK ||
- screen->chip_class == VI)
- dwords *= 2;
-
if (!screen->info.has_virtual_memory)
dwords += 2;
{
struct radeon_winsys_cs *cs = rctx->dma.cs;
- /* NOP waits for idle on Evergreen and later. */
- if (rctx->chip_class >= CIK)
- radeon_emit(cs, 0x00000000); /* NOP */
- else if (rctx->chip_class >= EVERGREEN)
+ if (rctx->chip_class >= EVERGREEN)
radeon_emit(cs, 0xf0000000); /* NOP */
else {
/* TODO: R600-R700 should use the FENCE packet.
r600_query_init(rctx);
cayman_init_msaa(&rctx->b);
- if (rctx->chip_class == CIK ||
- rctx->chip_class == VI ||
- rctx->chip_class == GFX9) {
- rctx->eop_bug_scratch = (struct r600_resource*)
- pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
- 16 * rscreen->info.num_render_backends);
- if (!rctx->eop_bug_scratch)
- return false;
- }
-
rctx->allocator_zeroed_memory =
u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
0, PIPE_USAGE_DEFAULT, 0, true);
void r600_common_context_cleanup(struct r600_common_context *rctx)
{
- unsigned i,j;
-
- /* Release DCC stats. */
- for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
- assert(!rctx->dcc_stats[i].query_active);
-
- for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++)
- if (rctx->dcc_stats[i].ps_stats[j])
- rctx->b.destroy_query(&rctx->b,
- rctx->dcc_stats[i].ps_stats[j]);
-
- r600_texture_reference(&rctx->dcc_stats[i].tex, NULL);
- }
-
if (rctx->query_result_shader)
rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader);
{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
- { "nodcc", DBG_NO_DCC, "Disable DCC." },
- { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
- { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
- { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
- { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
{ "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" },
- { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" },
- { "nodpbb", DBG_NO_DPBB, "Disable DPBB." },
- { "nodfsm", DBG_NO_DFSM, "Disable DFSM." },
DEBUG_NAMED_VALUE_END /* must be last */
};
case CHIP_CAICOS: return "AMD CAICOS";
case CHIP_CAYMAN: return "AMD CAYMAN";
case CHIP_ARUBA: return "AMD ARUBA";
- case CHIP_TAHITI: return "AMD TAHITI";
- case CHIP_PITCAIRN: return "AMD PITCAIRN";
- case CHIP_VERDE: return "AMD CAPE VERDE";
- case CHIP_OLAND: return "AMD OLAND";
- case CHIP_HAINAN: return "AMD HAINAN";
- case CHIP_BONAIRE: return "AMD BONAIRE";
- case CHIP_KAVERI: return "AMD KAVERI";
- case CHIP_KABINI: return "AMD KABINI";
- case CHIP_HAWAII: return "AMD HAWAII";
- case CHIP_MULLINS: return "AMD MULLINS";
- case CHIP_TONGA: return "AMD TONGA";
- case CHIP_ICELAND: return "AMD ICELAND";
- case CHIP_CARRIZO: return "AMD CARRIZO";
- case CHIP_FIJI: return "AMD FIJI";
- case CHIP_POLARIS10: return "AMD POLARIS10";
- case CHIP_POLARIS11: return "AMD POLARIS11";
- case CHIP_POLARIS12: return "AMD POLARIS12";
- case CHIP_STONEY: return "AMD STONEY";
- case CHIP_VEGA10: return "AMD VEGA10";
- case CHIP_RAVEN: return "AMD RAVEN";
default: return "AMD unknown";
}
}
&mesa_timestamp)) {
char *timestamp_str;
int res = -1;
- if (rscreen->chip_class < SI) {
- res = asprintf(×tamp_str, "%u",mesa_timestamp);
- }
-#if HAVE_LLVM
- else {
- uint32_t llvm_timestamp;
- if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
- &llvm_timestamp)) {
- res = asprintf(×tamp_str, "%u_%u",
- mesa_timestamp, llvm_timestamp);
- }
- }
-#endif
+
+ res = asprintf(×tamp_str, "%u",mesa_timestamp);
if (res != -1) {
/* These flags affect shader compilation. */
uint64_t shader_debug_flags =
rscreen->debug_flags &
(DBG_FS_CORRECT_DERIVS_AFTER_KILL |
- DBG_SI_SCHED |
DBG_UNSAFE_MATH);
rscreen->disk_shader_cache =
case CHIP_ARUBA:
return "cayman";
- case CHIP_TAHITI: return "tahiti";
- case CHIP_PITCAIRN: return "pitcairn";
- case CHIP_VERDE: return "verde";
- case CHIP_OLAND: return "oland";
- case CHIP_HAINAN: return "hainan";
- case CHIP_BONAIRE: return "bonaire";
- case CHIP_KABINI: return "kabini";
- case CHIP_KAVERI: return "kaveri";
- case CHIP_HAWAII: return "hawaii";
- case CHIP_MULLINS:
- return "mullins";
- case CHIP_TONGA: return "tonga";
- case CHIP_ICELAND: return "iceland";
- case CHIP_CARRIZO: return "carrizo";
- case CHIP_FIJI:
- return "fiji";
- case CHIP_STONEY:
- return "stoney";
- case CHIP_POLARIS10:
- return "polaris10";
- case CHIP_POLARIS11:
- case CHIP_POLARIS12: /* same as polaris11 */
- return "polaris11";
- case CHIP_VEGA10:
- case CHIP_RAVEN:
- return "gfx900";
default:
return "";
}
static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
enum pipe_shader_ir ir_type)
{
- if (ir_type != PIPE_SHADER_IR_TGSI)
- return 256;
-
- /* Only 16 waves per thread-group on gfx9. */
- if (screen->chip_class >= GFX9)
- return 1024;
-
- /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
- * round number.
- */
- if (screen->chip_class >= SI)
- return 2048;
-
return 256;
}
if (ret) {
uint32_t *address_bits = ret;
address_bits[0] = 32;
- if (rscreen->chip_class >= SI)
- address_bits[0] = 64;
}
return 1 * sizeof(uint32_t);
case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
if (ret) {
uint64_t *max_variable_threads_per_block = ret;
- if (rscreen->chip_class >= SI &&
- ir_type == PIPE_SHADER_IR_TGSI)
- *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
- else
- *max_variable_threads_per_block = 0;
+ *max_variable_threads_per_block = 0;
}
return sizeof(uint64_t);
}
rscreen->family = rscreen->info.family;
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
- rscreen->has_rbplus = false;
- rscreen->rbplus_allowed = false;
r600_disk_cache_create(rscreen);
#define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
#define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
#define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
-#define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
#define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
#define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
#define DBG_INFO (1ull << 40)
#define DBG_NO_WC (1ull << 41)
#define DBG_CHECK_VM (1ull << 42)
-#define DBG_NO_DCC (1ull << 43)
-#define DBG_NO_DCC_CLEAR (1ull << 44)
-#define DBG_NO_RB_PLUS (1ull << 45)
-#define DBG_SI_SCHED (1ull << 46)
-#define DBG_MONOLITHIC_SHADERS (1ull << 47)
/* gap */
#define DBG_UNSAFE_MATH (1ull << 49)
-#define DBG_NO_DCC_FB (1ull << 50)
#define DBG_TEST_VMFAULT_CP (1ull << 51)
#define DBG_TEST_VMFAULT_SDMA (1ull << 52)
#define DBG_TEST_VMFAULT_SHADER (1ull << 53)
-#define DBG_NO_DPBB (1ull << 54)
-#define DBG_NO_DFSM (1ull << 55)
#define R600_MAP_BUFFER_ALIGNMENT 64
#define R600_MAX_VIEWPORTS 16
struct r600_fmask_info fmask;
struct r600_cmask_info cmask;
struct r600_resource *cmask_buffer;
- uint64_t dcc_offset; /* 0 = disabled */
unsigned cb_color_info; /* fast clear enable bit */
unsigned color_clear_value[2];
unsigned last_msaa_resolve_target_micro_mode;
/* Depth buffer compression and fast clear. */
uint64_t htile_offset;
- bool tc_compatible_htile;
bool depth_cleared; /* if it was cleared at least once */
float depth_clear_value;
bool stencil_cleared; /* if it was cleared at least once */
bool non_disp_tiling; /* R600-Cayman only */
- /* Whether the texture is a displayable back buffer and needs DCC
- * decompression, which is expensive. Therefore, it's enabled only
- * if statistics suggest that it will pay off and it's allocated
- * separately. It can't be bound as a sampler by apps. Limited to
- * target == 2D and last_level == 0. If enabled, dcc_offset contains
- * the absolute GPUVM address, not the relative one.
- */
- struct r600_resource *dcc_separate_buffer;
- /* When DCC is temporarily disabled, the separate buffer is here. */
- struct r600_resource *last_dcc_separate_buffer;
- /* We need to track DCC dirtiness, because st/dri usually calls
- * flush_resource twice per frame (not a bug) and we don't wanna
- * decompress DCC twice. Also, the dirty tracking must be done even
- * if DCC isn't used, because it's required by the DCC usage analysis
- * for a possible future enablement.
- */
- bool separate_dcc_dirty;
- /* Statistics gathering for the DCC enablement heuristic. */
- bool dcc_gather_statistics;
- /* Estimate of how much this color buffer is written to in units of
- * full-screen draws: ps_invocations / (width * height)
- * Shader kills, late Z, and blending with trivial discards make it
- * inaccurate (we need to count CB updates, not PS invocations).
- */
- unsigned ps_draw_ratio;
- /* The number of clears since the last DCC usage analysis. */
- unsigned num_slow_clears;
-
/* Counter that should be non-zero if the texture is bound to a
* framebuffer. Implemented in radeonsi only.
*/
bool export_16bpc;
bool color_is_int8;
bool color_is_int10;
- bool dcc_incompatible;
/* Color registers. */
unsigned cb_color_info;
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
unsigned cb_color_attrib; /* EG and later */
- unsigned cb_color_attrib2; /* GFX9 and later */
- unsigned cb_dcc_control; /* VI and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
unsigned cb_color_fmask_slice; /* EG and later */
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
unsigned cb_color_mask; /* R600 only */
- unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */
- unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
- unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
- unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
uint64_t db_htile_data_base;
unsigned db_depth_info; /* R600 only, then SI and later */
unsigned db_z_info; /* EG and later */
- unsigned db_z_info2; /* GFX9+ */
unsigned db_depth_view;
unsigned db_depth_size;
unsigned db_depth_slice; /* EG and later */
unsigned db_stencil_info; /* EG and later */
- unsigned db_stencil_info2; /* GFX9+ */
unsigned db_prefetch_limit; /* R600 only */
unsigned db_htile_surface;
unsigned db_preload_control; /* EG and later */
uint64_t debug_flags;
bool has_cp_dma;
bool has_streamout;
- bool has_rbplus; /* if RB+ registers exist */
- bool rbplus_allowed; /* if RB+ is allowed */
struct disk_cache *disk_shader_cache;
unsigned num_L2_writebacks;
unsigned num_resident_handles;
uint64_t num_alloc_tex_transfer_bytes;
- unsigned last_tex_ps_draw_ratio; /* for query */
/* Render condition. */
struct r600_atom render_cond_atom;
float sample_locations_8x[8][2];
float sample_locations_16x[16][2];
- /* Statistics gathering for the DCC enablement heuristic. It can't be
- * in r600_texture because r600_texture can be shared by multiple
- * contexts. This is for back buffers only. We shouldn't get too many
- * of those.
- *
- * X11 DRI3 rotates among a finite set of back buffers. They should
- * all fit in this array. If they don't, separate DCC might never be
- * enabled by DCC stat gathering.
- */
- struct {
- struct r600_texture *tex;
- /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
- struct pipe_query *ps_stats[3];
- /* If all slots are used and another slot is needed,
- * the least recently used slot is evicted based on this. */
- int64_t last_use_timestamp;
- bool query_active;
- } dcc_stats[5];
-
struct pipe_debug_callback debug;
struct pipe_device_reset_callback device_reset_callback;
struct u_log_context *log;
unsigned first_layer, unsigned last_layer,
unsigned first_sample, unsigned last_sample);
- void (*decompress_dcc)(struct pipe_context *ctx,
- struct r600_texture *rtex);
-
/* Reallocate the buffer and update all resource bindings where
* the buffer is bound, including all resource descriptors. */
void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
struct r600_atom *fb_state,
unsigned *buffers, ubyte *dirty_cbufs,
const union pipe_color_union *color);
-bool r600_texture_disable_dcc(struct r600_common_context *rctx,
- struct r600_texture *rtex);
void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
void r600_init_context_texture_functions(struct r600_common_context *rctx);
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
- case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
case R600_QUERY_NUM_MAPPED_BUFFERS:
query->begin_result = 0;
break;
case R600_QUERY_NUM_SHADERS_CREATED:
query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
- case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
- query->end_result = rctx->last_tex_ps_draw_ratio;
- break;
case R600_QUERY_NUM_SHADER_CACHE_HITS:
query->end_result =
p_atomic_read(&rctx->screen->num_shader_cache_hits);
emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
- if (ctx->chip_class >= SI) {
- /* Write the timestamp from the CP not waiting for
- * outstanding draws (top-of-pipe).
- */
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_COUNT_SEL |
- COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- } else {
- /* Write the timestamp after the last draw is done.
- * (bottom-of-pipe)
- */
- r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
- 0, EOP_DATA_SEL_TIMESTAMP,
- NULL, va, 0, query->b.type);
- }
+ /* Write the timestamp after the last draw is done.
+ * (bottom-of-pipe)
+ */
+ r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DATA_SEL_TIMESTAMP,
+ NULL, va, 0, query->b.type);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
{
struct radeon_winsys_cs *cs = ctx->gfx.cs;
- if (ctx->chip_class >= GFX9) {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- } else {
- radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
- radeon_emit(cs, va);
- radeon_emit(cs, op | ((va >> 32) & 0xFF));
- }
+ radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, op | ((va >> 32) & 0xFF));
r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
RADEON_PRIO_QUERY);
}
/* Compute the size of SET_PREDICATION packets. */
atom->num_dw = 0;
if (query) {
- bool needs_workaround = false;
+ for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+ atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
- /* There is a firmware regression in VI which causes successive
- * SET_PREDICATION packets to give the wrong answer for
- * non-inverted stream overflow predication.
- */
- if (rctx->chip_class >= VI && !condition &&
- (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
- (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
- (rquery->buffer.previous ||
- rquery->buffer.results_end > rquery->result_size)))) {
- needs_workaround = true;
- }
-
- if (needs_workaround && !rquery->workaround_buf) {
- bool old_force_off = rctx->render_cond_force_off;
- rctx->render_cond_force_off = true;
-
- u_suballocator_alloc(
- rctx->allocator_zeroed_memory, 8, 8,
- &rquery->workaround_offset,
- (struct pipe_resource **)&rquery->workaround_buf);
-
- /* Reset to NULL to avoid a redundant SET_PREDICATION
- * from launching the compute grid.
- */
- rctx->render_cond = NULL;
-
- ctx->get_query_result_resource(
- ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
- &rquery->workaround_buf->b.b, rquery->workaround_offset);
-
- /* Settings this in the render cond atom is too late,
- * so set it here. */
- rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
- R600_CONTEXT_FLUSH_FOR_RENDER_COND;
-
- rctx->render_cond_force_off = old_force_off;
- }
-
- if (needs_workaround) {
- atom->num_dw = 5;
- } else {
- for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
- atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
-
- if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
- atom->num_dw *= R600_MAX_STREAMS;
- }
+ if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+ atom->num_dw *= R600_MAX_STREAMS;
}
rctx->render_cond = query;
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE),
X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
- X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
/* GPIN queries are for the benefit of old versions of GPUPerfStudio,
* which use it as a fallback path to detect the GPU type.
{
if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
return ARRAY_SIZE(r600_driver_query_list);
- else if (rscreen->info.drm_major == 3) {
- if (rscreen->chip_class >= VI)
- return ARRAY_SIZE(r600_driver_query_list);
- else
- return ARRAY_SIZE(r600_driver_query_list) - 7;
- }
else
return ARRAY_SIZE(r600_driver_query_list) - 25;
}
R600_QUERY_GPU_SCRATCH_RAM_BUSY,
R600_QUERY_NUM_COMPILATIONS,
R600_QUERY_NUM_SHADERS_CREATED,
- R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO,
R600_QUERY_NUM_SHADER_CACHE_HITS,
R600_QUERY_GPIN_ASIC_ID,
R600_QUERY_GPIN_NUM_SIMD,
begin->num_dw = 12; /* flush_vgt_streamout */
- if (rctx->chip_class >= SI) {
- begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */
- } else {
- begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
+ begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */
- if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
- begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
- }
+ if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740)
+ begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */
begin->num_dw +=
num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */
reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL;
}
- if (rctx->chip_class >= CIK) {
- radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
- } else {
- radeon_set_config_reg(cs, reg_strmout_cntl, 0);
- }
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
t[i]->stride_in_dw = stride_in_dw[i];
- if (rctx->chip_class >= SI) {
- /* SI binds streamout buffers as shader resources.
- * VGT only counts primitives and tells the shader
- * through SGPRs what to do. */
- radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
- radeon_emit(cs, (t[i]->b.buffer_offset +
- t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
- radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
- } else {
- uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
+ uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address;
- update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
+ update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
- radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
- radeon_emit(cs, (t[i]->b.buffer_offset +
- t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
- radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
- radeon_emit(cs, va >> 8); /* BUFFER_BASE */
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3);
+ radeon_emit(cs, (t[i]->b.buffer_offset +
+ t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+ radeon_emit(cs, va >> 8); /* BUFFER_BASE */
- r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
+ RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
- /* R7xx requires this packet after updating BUFFER_BASE.
- * Without this, R7xx locks up. */
- if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
- radeon_emit(cs, i);
- radeon_emit(cs, va >> 8);
+ /* R7xx requires this packet after updating BUFFER_BASE.
+ * Without this, R7xx locks up. */
+ if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) {
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0));
+ radeon_emit(cs, i);
+ radeon_emit(cs, va >> 8);
- r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
- RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
- }
+ r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
+ RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER);
}
if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) {
unsigned *stride,
unsigned *layer_stride)
{
- if (rscreen->chip_class >= GFX9) {
- *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
- *layer_stride = rtex->surface.u.gfx9.surf_slice_size;
-
- if (!box)
- return 0;
-
- /* Each texture is an array of slices. Each slice is an array
- * of mipmap levels. */
- return box->z * rtex->surface.u.gfx9.surf_slice_size +
- rtex->surface.u.gfx9.offset[level] +
- (box->y / rtex->surface.blk_h *
- rtex->surface.u.gfx9.surf_pitch +
- box->x / rtex->surface.blk_w) * rtex->surface.bpe;
- } else {
- *stride = rtex->surface.u.legacy.level[level].nblk_x *
- rtex->surface.bpe;
- *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
-
- if (!box)
- return rtex->surface.u.legacy.level[level].offset;
-
- /* Each texture is an array of mipmap levels. Each level is
- * an array of slices. */
- return rtex->surface.u.legacy.level[level].offset +
- box->z * rtex->surface.u.legacy.level[level].slice_size +
- (box->y / rtex->surface.blk_h *
- rtex->surface.u.legacy.level[level].nblk_x +
- box->x / rtex->surface.blk_w) * rtex->surface.bpe;
- }
+ *stride = rtex->surface.u.legacy.level[level].nblk_x *
+ rtex->surface.bpe;
+ *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
+
+ if (!box)
+ return rtex->surface.u.legacy.level[level].offset;
+
+ /* Each texture is an array of mipmap levels. Each level is
+ * an array of slices. */
+ return rtex->surface.u.legacy.level[level].offset +
+ box->z * rtex->surface.u.legacy.level[level].slice_size +
+ (box->y / rtex->surface.blk_h *
+ rtex->surface.u.legacy.level[level].nblk_x +
+ box->x / rtex->surface.blk_w) * rtex->surface.bpe;
}
static int r600_init_surface(struct r600_common_screen *rscreen,
unsigned offset,
bool is_imported,
bool is_scanout,
- bool is_flushed_depth,
- bool tc_compatible_htile)
+ bool is_flushed_depth)
{
const struct util_format_description *desc =
util_format_description(ptex->format);
if (!is_flushed_depth && is_depth) {
flags |= RADEON_SURF_ZBUFFER;
- if (tc_compatible_htile &&
- (rscreen->chip_class >= GFX9 ||
- array_mode == RADEON_SURF_MODE_2D)) {
- /* TC-compatible HTILE only supports Z32_FLOAT.
- * GFX9 also supports Z16_UNORM.
- * On VI, promote Z16 to Z32. DB->CB copies will convert
- * the format for transfers.
- */
- if (rscreen->chip_class == VI)
- bpe = 4;
-
- flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
- }
-
if (is_stencil)
flags |= RADEON_SURF_SBUFFER;
}
- if (rscreen->chip_class >= VI &&
- (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
- ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
- flags |= RADEON_SURF_DISABLE_DCC;
-
if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
/* This should catch bugs in gallium users setting incorrect flags. */
assert(ptex->nr_samples <= 1 &&
return r;
}
- if (rscreen->chip_class >= GFX9) {
- assert(!pitch_in_bytes_override ||
- pitch_in_bytes_override == surface->u.gfx9.surf_pitch * bpe);
- surface->u.gfx9.surf_offset = offset;
- } else {
- if (pitch_in_bytes_override &&
- pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) {
- /* old ddx on evergreen over estimate alignment for 1d, only 1 level
- * for those
- */
- surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe;
- surface->u.legacy.level[0].slice_size = pitch_in_bytes_override *
- surface->u.legacy.level[0].nblk_y;
- }
+ if (pitch_in_bytes_override &&
+ pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) {
+ /* old ddx on evergreen over estimate alignment for 1d, only 1 level
+ * for those
+ */
+ surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe;
+ surface->u.legacy.level[0].slice_size = pitch_in_bytes_override *
+ surface->u.legacy.level[0].nblk_y;
+ }
- if (offset) {
- for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
- surface->u.legacy.level[i].offset += offset;
- }
+ if (offset) {
+ for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
+ surface->u.legacy.level[i].offset += offset;
}
+
return 0;
}
memset(metadata, 0, sizeof(*metadata));
- if (rscreen->chip_class >= GFX9) {
- metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
- } else {
- metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
- metadata->u.legacy.bankw = surface->u.legacy.bankw;
- metadata->u.legacy.bankh = surface->u.legacy.bankh;
- metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
- metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
- metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
- metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
- metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
- }
+ metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
+ RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+ metadata->u.legacy.bankw = surface->u.legacy.bankw;
+ metadata->u.legacy.bankh = surface->u.legacy.bankh;
+ metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+ metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+ metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+ metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
+ metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
}
static void r600_surface_import_metadata(struct r600_common_screen *rscreen,
enum radeon_surf_mode *array_mode,
bool *is_scanout)
{
- if (rscreen->chip_class >= GFX9) {
- if (metadata->u.gfx9.swizzle_mode > 0)
- *array_mode = RADEON_SURF_MODE_2D;
- else
- *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
-
- *is_scanout = metadata->u.gfx9.swizzle_mode == 0 ||
- metadata->u.gfx9.swizzle_mode % 4 == 2;
-
- surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
- } else {
- surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
- surf->u.legacy.bankw = metadata->u.legacy.bankw;
- surf->u.legacy.bankh = metadata->u.legacy.bankh;
- surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
- surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
- surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
-
- if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
- *array_mode = RADEON_SURF_MODE_2D;
- else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
- *array_mode = RADEON_SURF_MODE_1D;
- else
- *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+ surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
+ surf->u.legacy.bankw = metadata->u.legacy.bankw;
+ surf->u.legacy.bankh = metadata->u.legacy.bankh;
+ surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
+ surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
+ surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
+
+ if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_2D;
+ else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ *array_mode = RADEON_SURF_MODE_1D;
+ else
+ *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
- *is_scanout = metadata->u.legacy.scanout;
- }
+ *is_scanout = metadata->u.legacy.scanout;
}
static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
rtex->dirty_level_mask = 0;
- if (rscreen->chip_class >= SI)
- rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
- else
- rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
+ rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
if (rtex->cmask_buffer != &rtex->resource)
r600_resource_reference(&rtex->cmask_buffer, NULL);
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
-static bool r600_can_disable_dcc(struct r600_texture *rtex)
-{
- /* We can't disable DCC if it can be written by another process. */
- return rtex->dcc_offset &&
- (!rtex->resource.b.is_shared ||
- !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE));
-}
-
-static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- if (!r600_can_disable_dcc(rtex))
- return false;
-
- assert(rtex->dcc_separate_buffer == NULL);
-
- /* Disable DCC. */
- rtex->dcc_offset = 0;
-
- /* Notify all contexts about the change. */
- p_atomic_inc(&rscreen->dirty_tex_counter);
- return true;
-}
-
-/**
- * Disable DCC for the texture. (first decompress, then discard metadata).
- *
- * There is unresolved multi-context synchronization issue between
- * screen::aux_context and the current context. If applications do this with
- * multiple contexts, it's already undefined behavior for them and we don't
- * have to worry about that. The scenario is:
- *
- * If context 1 disables DCC and context 2 has queued commands that write
- * to the texture via CB with DCC enabled, and the order of operations is
- * as follows:
- * context 2 queues draw calls rendering to the texture, but doesn't flush
- * context 1 disables DCC and flushes
- * context 1 & 2 reset descriptors and FB state
- * context 2 flushes (new compressed tiles written by the draw calls)
- * context 1 & 2 read garbage, because DCC is disabled, yet there are
- * compressed tiled
- *
- * \param rctx the current context if you have one, or rscreen->aux_context
- * if you don't.
- */
-bool r600_texture_disable_dcc(struct r600_common_context *rctx,
- struct r600_texture *rtex)
-{
- struct r600_common_screen *rscreen = rctx->screen;
-
- if (!r600_can_disable_dcc(rtex))
- return false;
-
- if (&rctx->b == rscreen->aux_context)
- mtx_lock(&rscreen->aux_context_lock);
-
- /* Decompress DCC. */
- rctx->decompress_dcc(&rctx->b, rtex);
- rctx->b.flush(&rctx->b, NULL, 0);
-
- if (&rctx->b == rscreen->aux_context)
- mtx_unlock(&rscreen->aux_context_lock);
-
- return r600_texture_discard_dcc(rscreen, rtex);
-}
-
static void r600_reallocate_texture_inplace(struct r600_common_context *rctx,
struct r600_texture *rtex,
unsigned new_bind_flag,
if (new_bind_flag == PIPE_BIND_LINEAR) {
r600_texture_discard_cmask(rctx->screen, rtex);
- r600_texture_discard_dcc(rctx->screen, rtex);
}
/* Replace the structure fields of rtex. */
rtex->cb_color_info = new_tex->cb_color_info;
rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
rtex->htile_offset = new_tex->htile_offset;
- rtex->tc_compatible_htile = new_tex->tc_compatible_htile;
rtex->depth_cleared = new_tex->depth_cleared;
rtex->stencil_cleared = new_tex->stencil_cleared;
rtex->non_disp_tiling = new_tex->non_disp_tiling;
- rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
rtex->framebuffers_bound = new_tex->framebuffers_bound;
if (new_bind_flag == PIPE_BIND_LINEAR) {
assert(!rtex->htile_offset);
assert(!rtex->cmask.size);
assert(!rtex->fmask.size);
- assert(!rtex->dcc_offset);
assert(!rtex->is_depth);
}
assert(rtex->surface.tile_swizzle == 0);
}
- /* Since shader image stores don't support DCC on VI,
- * disable it for external clients that want write
- * access.
- */
- if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
- if (r600_texture_disable_dcc(rctx, rtex))
- update_metadata = true;
- }
-
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
- (rtex->cmask.size || rtex->dcc_offset)) {
- /* Eliminate fast clear (both CMASK and DCC) */
+ rtex->cmask.size) {
+ /* Eliminate fast clear (CMASK) */
r600_eliminate_fast_color_clear(rctx, rtex);
/* Disable CMASK if flush_resource isn't going
rscreen->ws->buffer_set_metadata(res->buf, &metadata);
}
- if (rscreen->chip_class >= GFX9) {
- offset = rtex->surface.u.gfx9.surf_offset;
- stride = rtex->surface.u.gfx9.surf_pitch *
- rtex->surface.bpe;
- slice_size = rtex->surface.u.gfx9.surf_slice_size;
- } else {
- offset = rtex->surface.u.legacy.level[0].offset;
- stride = rtex->surface.u.legacy.level[0].nblk_x *
- rtex->surface.bpe;
- slice_size = rtex->surface.u.legacy.level[0].slice_size;
- }
+ offset = rtex->surface.u.legacy.level[0].offset;
+ stride = rtex->surface.u.legacy.level[0].nblk_x *
+ rtex->surface.bpe;
+ slice_size = rtex->surface.u.legacy.level[0].slice_size;
} else {
/* Move a suballocated buffer into a non-suballocated allocation. */
if (rscreen->ws->buffer_is_suballocated(res->buf)) {
r600_resource_reference(&rtex->cmask_buffer, NULL);
}
pb_reference(&resource->buf, NULL);
- r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
- r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
FREE(rtex);
}
memset(out, 0, sizeof(*out));
- if (rscreen->chip_class >= GFX9) {
- out->alignment = rtex->surface.u.gfx9.fmask_alignment;
- out->size = rtex->surface.u.gfx9.fmask_size;
- return;
- }
-
templ.nr_samples = 1;
flags = rtex->surface.flags | RADEON_SURF_FMASK;
- if (rscreen->chip_class <= CAYMAN) {
- /* Use the same parameters and tile mode. */
- fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
- fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
- fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
- fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
+ /* Use the same parameters and tile mode. */
+ fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
+ fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
+ fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
+ fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
- if (nr_samples <= 4)
- fmask.u.legacy.bankh = 4;
- }
+ if (nr_samples <= 4)
+ fmask.u.legacy.bankh = 4;
switch (nr_samples) {
case 2:
align(slice_bytes, base_align);
}
-static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
- struct r600_texture *rtex,
- struct r600_cmask_info *out)
-{
- unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
- unsigned num_pipes = rscreen->info.num_tile_pipes;
- unsigned cl_width, cl_height;
-
- if (rscreen->chip_class >= GFX9) {
- out->alignment = rtex->surface.u.gfx9.cmask_alignment;
- out->size = rtex->surface.u.gfx9.cmask_size;
- return;
- }
-
- switch (num_pipes) {
- case 2:
- cl_width = 32;
- cl_height = 16;
- break;
- case 4:
- cl_width = 32;
- cl_height = 32;
- break;
- case 8:
- cl_width = 64;
- cl_height = 32;
- break;
- case 16: /* Hawaii */
- cl_width = 64;
- cl_height = 64;
- break;
- default:
- assert(0);
- return;
- }
-
- unsigned base_align = num_pipes * pipe_interleave_bytes;
-
- unsigned width = align(rtex->resource.b.b.width0, cl_width*8);
- unsigned height = align(rtex->resource.b.b.height0, cl_height*8);
- unsigned slice_elements = (width * height) / (8*8);
-
- /* Each element of CMASK is a nibble. */
- unsigned slice_bytes = slice_elements / 2;
-
- out->slice_tile_max = (width * height) / (128*128);
- if (out->slice_tile_max)
- out->slice_tile_max -= 1;
-
- out->alignment = MAX2(256, base_align);
- out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
- align(slice_bytes, base_align);
-}
-
static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- if (rscreen->chip_class >= SI) {
- si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- } else {
- r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- }
+ r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
rtex->size = rtex->cmask.offset + rtex->cmask.size;
- if (rscreen->chip_class >= SI)
- rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
- else
- rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+ rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
}
static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
assert(rtex->cmask.size == 0);
- if (rscreen->chip_class >= SI) {
- si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- } else {
- r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
- }
+ r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
rtex->cmask_buffer = (struct r600_resource *)
r600_aligned_buffer_create(&rscreen->b,
/* update colorbuffer state bits */
rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
- if (rscreen->chip_class >= SI)
- rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
- else
- rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
+ rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
p_atomic_inc(&rscreen->compressed_colortex_counter);
}
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
unsigned num_pipes = rscreen->info.num_tile_pipes;
- assert(rscreen->chip_class <= VI);
-
rtex->surface.htile_size = 0;
if (rscreen->chip_class <= EVERGREEN &&
rtex->resource.b.b.height0 > 7680))
return;
- /* HTILE is broken with 1D tiling on old kernels and CIK. */
- if (rscreen->chip_class >= CIK &&
- rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
- rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
- return;
-
- /* Overalign HTILE on P2 configs to work around GPU hangs in
- * piglit/depthstencil-render-miplevels 585.
- *
- * This has been confirmed to help Kabini & Stoney, where the hangs
- * are always reproducible. I think I have seen the test hang
- * on Carrizo too, though it was very rare there.
- */
- if (rscreen->chip_class >= CIK && num_pipes < 4)
- num_pipes = 4;
-
switch (num_pipes) {
case 1:
cl_width = 32;
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
struct r600_texture *rtex)
{
- if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile)
- r600_texture_get_htile_size(rscreen, rtex);
+ r600_texture_get_htile_size(rscreen, rtex);
if (!rtex->surface.htile_size)
return;
rtex->surface.bpe, rtex->resource.b.b.nr_samples,
rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
- if (rscreen->chip_class >= GFX9) {
- u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", "
- "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n",
- rtex->surface.surf_size,
- rtex->surface.u.gfx9.surf_slice_size,
- rtex->surface.surf_alignment,
- rtex->surface.u.gfx9.surf.swizzle_mode,
- rtex->surface.u.gfx9.surf.epitch,
- rtex->surface.u.gfx9.surf_pitch);
-
- if (rtex->fmask.size) {
- u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, swmode=%u, epitch=%u\n",
- rtex->fmask.offset,
- rtex->surface.u.gfx9.fmask_size,
- rtex->surface.u.gfx9.fmask_alignment,
- rtex->surface.u.gfx9.fmask.swizzle_mode,
- rtex->surface.u.gfx9.fmask.epitch);
- }
-
- if (rtex->cmask.size) {
- u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
- rtex->cmask.offset,
- rtex->surface.u.gfx9.cmask_size,
- rtex->surface.u.gfx9.cmask_alignment,
- rtex->surface.u.gfx9.cmask.rb_aligned,
- rtex->surface.u.gfx9.cmask.pipe_aligned);
- }
-
- if (rtex->htile_offset) {
- u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
- "rb_aligned=%u, pipe_aligned=%u\n",
- rtex->htile_offset,
- rtex->surface.htile_size,
- rtex->surface.htile_alignment,
- rtex->surface.u.gfx9.htile.rb_aligned,
- rtex->surface.u.gfx9.htile.pipe_aligned);
- }
-
- if (rtex->dcc_offset) {
- u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
- rtex->dcc_offset, rtex->surface.dcc_size,
- rtex->surface.dcc_alignment,
- rtex->surface.u.gfx9.dcc_pitch_max,
- rtex->surface.num_dcc_levels);
- }
-
- if (rtex->surface.u.gfx9.stencil_offset) {
- u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n",
- rtex->surface.u.gfx9.stencil_offset,
- rtex->surface.u.gfx9.stencil.swizzle_mode,
- rtex->surface.u.gfx9.stencil.epitch);
- }
- return;
- }
-
u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
"bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
if (rtex->htile_offset)
u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", "
- "alignment=%u, TC_compatible = %u\n",
- rtex->htile_offset, rtex->surface.htile_size,
- rtex->surface.htile_alignment,
- rtex->tc_compatible_htile);
-
- if (rtex->dcc_offset) {
- u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
- rtex->dcc_offset, rtex->surface.dcc_size,
- rtex->surface.dcc_alignment);
- for (i = 0; i <= rtex->resource.b.b.last_level; i++)
- u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
- "fast_clear_size=%"PRIu64"\n",
- i, i < rtex->surface.num_dcc_levels,
- rtex->surface.u.legacy.level[i].dcc_offset,
- rtex->surface.u.legacy.level[i].dcc_fast_clear_size);
- }
+ "alignment=%u\n",
+ rtex->htile_offset, rtex->surface.htile_size,
+ rtex->surface.htile_alignment);
for (i = 0; i <= rtex->resource.b.b.last_level; i++)
u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
rtex->surface = *surface;
rtex->size = rtex->surface.surf_size;
-
- rtex->tc_compatible_htile = rtex->surface.htile_size != 0 &&
- (rtex->surface.flags &
- RADEON_SURF_TC_COMPATIBLE_HTILE);
-
- /* TC-compatible HTILE:
- * - VI only supports Z32_FLOAT.
- * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
- if (rtex->tc_compatible_htile) {
- if (rscreen->chip_class >= GFX9 &&
- base->format == PIPE_FORMAT_Z16_UNORM)
- rtex->db_render_format = base->format;
- else
- rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
- } else {
- rtex->db_render_format = base->format;
- }
+ rtex->db_render_format = base->format;
/* Tiled depth textures utilize the non-displayable tile order.
* This must be done after r600_setup_surface.
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
- /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
- * between frames, so the only thing that can enable separate DCC
- * with DRI2 is multiple slow clears within a frame.
- */
- rtex->ps_draw_ratio = 0;
-
if (rtex->is_depth) {
if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
rscreen->chip_class >= EVERGREEN) {
- if (rscreen->chip_class >= GFX9) {
- rtex->can_sample_z = true;
- rtex->can_sample_s = true;
- } else {
- rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
- rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
- }
+ rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
+ rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
} else {
if (rtex->resource.b.b.nr_samples <= 1 &&
(rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
return NULL;
}
}
-
- /* Shared textures must always set up DCC here.
- * If it's not present, it will be disabled by
- * apply_opaque_metadata later.
- */
- if (rtex->surface.dcc_size &&
- (buf || !(rscreen->debug_flags & DBG_NO_DCC)) &&
- !(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
- /* Reserve space for the DCC buffer. */
- rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
- rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
- }
}
/* Now create the backing buffer. */
if (rtex->htile_offset) {
uint32_t clear_value = 0;
- if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
- clear_value = 0x0000030F;
-
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
rtex->htile_offset,
rtex->surface.htile_size,
clear_value);
}
- /* Initialize DCC only if the texture is not being imported. */
- if (!buf && rtex->dcc_offset) {
- r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
- rtex->dcc_offset,
- rtex->surface.dcc_size,
- 0xFFFFFFFF);
- }
-
/* Initialize the CMASK base register value. */
rtex->cmask.base_address_reg =
(rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
- /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI,
- * which requires 2D tiling.
- */
- if (rscreen->chip_class == VI &&
- is_depth_stencil &&
- (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
- return RADEON_SURF_MODE_2D;
-
/* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
(templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
- /* Cursors are linear on SI.
- * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
- if (rscreen->chip_class >= SI &&
- (templ->bind & PIPE_BIND_CURSOR))
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
if (templ->bind & PIPE_BIND_LINEAR)
return RADEON_SURF_MODE_LINEAR_ALIGNED;
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
struct radeon_surf surface = {0};
bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
- bool tc_compatible_htile =
- rscreen->chip_class >= VI &&
- (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
- !(rscreen->debug_flags & DBG_NO_HYPERZ) &&
- !is_flushed_depth &&
- templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
- util_format_is_depth_or_stencil(templ->format);
-
int r;
r = r600_init_surface(rscreen, &surface, templ,
r600_choose_tiling(rscreen, templ), 0, 0,
- false, false, is_flushed_depth,
- tc_compatible_htile);
+ false, false, is_flushed_depth);
if (r) {
return NULL;
}
&array_mode, &is_scanout);
r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
- offset, true, is_scanout, false, false);
+ offset, true, is_scanout, false);
if (r) {
return NULL;
}
memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
}
-/* Set the same micro tile mode as the destination of the last MSAA resolve.
- * This allows hitting the MSAA resolve fast path, which requires that both
- * src and dst micro tile modes match.
- */
-static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
- struct r600_texture *rtex)
-{
- if (rtex->resource.b.is_shared ||
- rtex->resource.b.b.nr_samples <= 1 ||
- rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
- return;
-
- assert(rscreen->chip_class >= GFX9 ||
- rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
- assert(rtex->resource.b.b.last_level == 0);
-
- if (rscreen->chip_class >= GFX9) {
- /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
- assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4);
-
- /* If you do swizzle_mode % 4, you'll get:
- * 0 = Depth
- * 1 = Standard,
- * 2 = Displayable
- * 3 = Rotated
- *
- * Depth-sample order isn't allowed:
- */
- assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
-
- switch (rtex->last_msaa_resolve_target_micro_mode) {
- case RADEON_MICRO_MODE_DISPLAY:
- rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
- rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
- break;
- case RADEON_MICRO_MODE_THIN:
- rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
- rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
- break;
- case RADEON_MICRO_MODE_ROTATED:
- rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
- rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
- break;
- default: /* depth */
- assert(!"unexpected micro mode");
- return;
- }
- } else if (rscreen->chip_class >= CIK) {
- /* These magic numbers were copied from addrlib. It doesn't use
- * any definitions for them either. They are all 2D_TILED_THIN1
- * modes with different bpp and micro tile mode.
- */
- switch (rtex->last_msaa_resolve_target_micro_mode) {
- case RADEON_MICRO_MODE_DISPLAY:
- rtex->surface.u.legacy.tiling_index[0] = 10;
- break;
- case RADEON_MICRO_MODE_THIN:
- rtex->surface.u.legacy.tiling_index[0] = 14;
- break;
- case RADEON_MICRO_MODE_ROTATED:
- rtex->surface.u.legacy.tiling_index[0] = 28;
- break;
- default: /* depth, thick */
- assert(!"unexpected micro mode");
- return;
- }
- } else { /* SI */
- switch (rtex->last_msaa_resolve_target_micro_mode) {
- case RADEON_MICRO_MODE_DISPLAY:
- switch (rtex->surface.bpe) {
- case 1:
- rtex->surface.u.legacy.tiling_index[0] = 10;
- break;
- case 2:
- rtex->surface.u.legacy.tiling_index[0] = 11;
- break;
- default: /* 4, 8 */
- rtex->surface.u.legacy.tiling_index[0] = 12;
- break;
- }
- break;
- case RADEON_MICRO_MODE_THIN:
- switch (rtex->surface.bpe) {
- case 1:
- rtex->surface.u.legacy.tiling_index[0] = 14;
- break;
- case 2:
- rtex->surface.u.legacy.tiling_index[0] = 15;
- break;
- case 4:
- rtex->surface.u.legacy.tiling_index[0] = 16;
- break;
- default: /* 8, 16 */
- rtex->surface.u.legacy.tiling_index[0] = 17;
- break;
- }
- break;
- default: /* depth, thick */
- assert(!"unexpected micro mode");
- return;
- }
- }
-
- rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
-
- p_atomic_inc(&rscreen->dirty_tex_counter);
-}
-
void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
struct pipe_framebuffer_state *fb,
struct r600_atom *fb_state,
!(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
continue;
- /* fast color clear with 1D tiling doesn't work on old kernels and CIK */
- if (rctx->chip_class == CIK &&
- tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
- rctx->screen->info.drm_major == 2 &&
- rctx->screen->info.drm_minor < 38) {
- continue;
- }
-
{
/* 128-bit formats are unusupported */
if (tex->surface.bpe > 8) {
continue;
}
- /* RB+ doesn't work with CMASK fast clear on Stoney. */
- if (rctx->family == CHIP_STONEY)
- continue;
-
/* ensure CMASK is enabled */
r600_texture_alloc_cmask_separate(rctx->screen, tex);
if (tex->cmask.size == 0) {
p_atomic_inc(&rctx->screen->compressed_colortex_counter);
}
- /* We can change the micro tile mode before a full clear. */
- if (rctx->screen->chip_class >= SI)
- si_set_optimal_micro_tile_mode(rctx->screen, tex);
-
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
if (dirty_cbufs)
r = r600_init_surface(rscreen, &surface, templ,
array_mode, memobj->stride,
offset, true, is_scanout,
- false, false);
+ false);
if (r)
return NULL;
msg->body.decode.dt_field_mode = buf->base.interlaced;
msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks));
- ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface, RUVD_SURFACE_TYPE_LEGACY);
+ ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
return luma->resource.buf;
}
{
switch (u_reduce_video_profile(dec->base.profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- return (family >= CHIP_TONGA) ?
- RUVD_CODEC_H264_PERF : RUVD_CODEC_H264;
+ return RUVD_CODEC_H264;
case PIPE_VIDEO_FORMAT_VC1:
return RUVD_CODEC_VC1;
}
}
-static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder *dec)
-{
- unsigned width_in_mb, height_in_mb, ctx_size;
- unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
- unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT);
-
- unsigned max_references = dec->base.max_references + 1;
-
- // picture width & height in 16 pixel units
- width_in_mb = width / VL_MACROBLOCK_WIDTH;
- height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2);
-
- if (!dec->use_legacy) {
- unsigned fs_in_mb = width_in_mb * height_in_mb;
- unsigned num_dpb_buffer;
- switch(dec->base.level) {
- case 30:
- num_dpb_buffer = 8100 / fs_in_mb;
- break;
- case 31:
- num_dpb_buffer = 18000 / fs_in_mb;
- break;
- case 32:
- num_dpb_buffer = 20480 / fs_in_mb;
- break;
- case 41:
- num_dpb_buffer = 32768 / fs_in_mb;
- break;
- case 42:
- num_dpb_buffer = 34816 / fs_in_mb;
- break;
- case 50:
- num_dpb_buffer = 110400 / fs_in_mb;
- break;
- case 51:
- num_dpb_buffer = 184320 / fs_in_mb;
- break;
- default:
- num_dpb_buffer = 184320 / fs_in_mb;
- break;
- }
- num_dpb_buffer++;
- max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
- ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256);
- } else {
- // the firmware seems to always assume a minimum of ref frames
- max_references = MAX2(NUM_H264_REFS, max_references);
- // macroblock context buffer
- ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256);
- }
-
- return ctx_size;
-}
-
static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec)
{
unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH);
static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec)
{
- if (((struct r600_common_screen*)dec->screen)->family < CHIP_VEGA10)
- return 16;
- else
- return 32;
+ return 16;
}
/* calculate size of reference picture buffer */
num_dpb_buffer++;
max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references);
dpb_size = image_size * max_references;
- if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||
- (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) {
+ if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment);
dpb_size += align(width_in_mb * height_in_mb * 32, alignment);
}
max_references = MAX2(NUM_H264_REFS, max_references);
// reference picture buffer
dpb_size = image_size * max_references;
- if ((dec->stream_type != RUVD_CODEC_H264_PERF) ||
- (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) {
+ if ((dec->stream_type != RUVD_CODEC_H264_PERF)) {
// macroblock context buffer
dpb_size += width_in_mb * height_in_mb * max_references * 192;
// IT surface buffer
result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6;
result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8;
- if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO)
- result.sps_info_flags |= 1 << 9;
if (pic->UseRefPicList == true)
result.sps_info_flags |= 1 << 10;
dec->msg->body.decode.bsd_size = bs_size;
dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec));
- if (dec->stream_type == RUVD_CODEC_H264_PERF &&
- ((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10)
- dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size;
-
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
- if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
- dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2;
switch (u_reduce_video_profile(picture->profile)) {
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
goto error;
}
- dec->fb_size = (info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA :
- FB_BUFFER_SIZE;
+ dec->fb_size = FB_BUFFER_SIZE;
bs_buf_size = width * height * (512 / (16 * 16));
for (i = 0; i < NUM_BUFFERS; ++i) {
unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;
rvid_clear_buffer(context, &dec->dpb);
}
- if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) {
- unsigned ctx_size = calc_ctx_size_h264_perf(dec);
- if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
- RVID_ERR("Can't allocated context buffer.\n");
- goto error;
- }
- rvid_clear_buffer(context, &dec->ctx);
- }
-
- if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) {
- if (!rvid_create_buffer(dec->screen, &dec->sessionctx,
- UVD_SESSION_CONTEXT_SIZE,
- PIPE_USAGE_DEFAULT)) {
- RVID_ERR("Can't allocated session ctx.\n");
- goto error;
- }
- rvid_clear_buffer(context, &dec->sessionctx);
- }
-
- if (info.family >= CHIP_VEGA10) {
- dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
- dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
- dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
- dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15;
- } else {
- dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
- dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
- dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
- dec->reg.cntl = RUVD_ENGINE_CNTL;
- }
+ dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0;
+ dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1;
+ dec->reg.cmd = RUVD_GPCOM_VCPU_CMD;
+ dec->reg.cntl = RUVD_ENGINE_CNTL;
map_msg_fb_it_buf(dec);
dec->msg->size = sizeof(*dec->msg);
}
/* calculate top/bottom offset */
-static unsigned texture_offset(struct radeon_surf *surface, unsigned layer,
- enum ruvd_surface_type type)
+static unsigned texture_offset(struct radeon_surf *surface, unsigned layer)
{
- switch (type) {
- default:
- case RUVD_SURFACE_TYPE_LEGACY:
- return surface->u.legacy.level[0].offset +
- layer * surface->u.legacy.level[0].slice_size;
- break;
- case RUVD_SURFACE_TYPE_GFX9:
- return surface->u.gfx9.surf_offset +
- layer * surface->u.gfx9.surf_slice_size;
- break;
- }
+ return surface->u.legacy.level[0].offset +
+ layer * surface->u.legacy.level[0].slice_size;
}
/* hw encode the aspect of macro tiles */
* fill decoding target field from the luma and chroma surfaces
*/
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
- struct radeon_surf *chroma, enum ruvd_surface_type type)
+ struct radeon_surf *chroma)
{
- switch (type) {
+ msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;
+ switch (luma->u.legacy.level[0].mode) {
+ case RADEON_SURF_MODE_LINEAR_ALIGNED:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
+ break;
+ case RADEON_SURF_MODE_1D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
+ break;
+ case RADEON_SURF_MODE_2D:
+ msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
+ msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
+ break;
default:
- case RUVD_SURFACE_TYPE_LEGACY:
- msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w;
- switch (luma->u.legacy.level[0].mode) {
- case RADEON_SURF_MODE_LINEAR_ALIGNED:
- msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
- msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
- break;
- case RADEON_SURF_MODE_1D:
- msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
- msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN;
- break;
- case RADEON_SURF_MODE_2D:
- msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8;
- msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN;
- break;
- default:
- assert(0);
- break;
- }
+ assert(0);
+ break;
+ }
- msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);
+ msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0);
+ if (chroma)
+ msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0);
+ if (msg->body.decode.dt_field_mode) {
+ msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1);
if (chroma)
- msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
- if (msg->body.decode.dt_field_mode) {
- msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);
- if (chroma)
- msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
- } else {
- msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
- msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
- }
-
- if (chroma) {
- assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
- assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
- assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
- }
+ msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1);
+ } else {
+ msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
+ msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
+ }
- msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
- msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
- msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
- break;
- case RUVD_SURFACE_TYPE_GFX9:
- msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w;
- /* SWIZZLE LINEAR MODE */
- msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
- msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
- msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type);
- msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type);
- if (msg->body.decode.dt_field_mode) {
- msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type);
- msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type);
- } else {
- msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset;
- msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset;
- }
- msg->body.decode.dt_surf_tile_config = 0;
- break;
+ if (chroma) {
+ assert(luma->u.legacy.bankw == chroma->u.legacy.bankw);
+ assert(luma->u.legacy.bankh == chroma->u.legacy.bankh);
+ assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea);
}
+
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw));
+ msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh));
+ msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea));
}
#define RUVD_VC1_PROFILE_MAIN 0x00000001
#define RUVD_VC1_PROFILE_ADVANCED 0x00000002
-enum ruvd_surface_type {
- RUVD_SURFACE_TYPE_LEGACY = 0,
- RUVD_SURFACE_TYPE_GFX9
-};
-
struct ruvd_mvc_element {
uint16_t viewOrderIndex;
uint16_t viewId;
/* fill decoding target field from the luma and chroma surfaces */
void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma,
- struct radeon_surf *chroma, enum ruvd_surface_type type);
+ struct radeon_surf *chroma);
#endif
void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot,
signed *luma_offset, signed *chroma_offset)
{
- struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen;
unsigned pitch, vpitch, fsize;
- if (rscreen->chip_class < GFX9) {
- pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
- vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
- } else {
- pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256);
- vpitch = align(enc->luma->u.gfx9.surf_height, 16);
- }
+ pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128);
+ vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16);
fsize = pitch * (vpitch + vpitch / 2);
*luma_offset = slot->index * fsize;
if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) ||
rscreen->info.drm_major == 3)
enc->use_vui = true;
- if (rscreen->info.family >= CHIP_TONGA &&
- rscreen->info.family != CHIP_STONEY &&
- rscreen->info.family != CHIP_POLARIS11 &&
- rscreen->info.family != CHIP_POLARIS12)
- enc->dual_pipe = true;
- /* TODO enable B frame with dual instance */
- if ((rscreen->info.family >= CHIP_TONGA) &&
- (templ->max_references == 1) &&
- (rscreen->info.vce_harvest_config == 0))
- enc->dual_inst = true;
enc->base = *templ;
enc->base.context = context;
get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf);
- cpb_size = (rscreen->chip_class < GFX9) ?
- align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
- align(tmp_surf->u.legacy.level[0].nblk_y, 32) :
-
- align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) *
- align(tmp_surf->u.gfx9.surf_height, 32);
+ cpb_size = align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) *
+ align(tmp_surf->u.legacy.level[0].nblk_y, 32);
cpb_size = cpb_size * 3 / 2;
cpb_size = cpb_size * enc->cpb_num;
if (!surfaces[i])
continue;
- if (rctx->chip_class < GFX9) {
- /* choose the smallest bank w/h for now */
- wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
- if (wh < best_wh) {
- best_wh = wh;
- best_tiling = i;
- }
+ /* choose the smallest bank w/h for now */
+ wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
+ if (wh < best_wh) {
+ best_wh = wh;
+ best_tiling = i;
}
}
/* adjust the texture layer offsets */
off = align(off, surfaces[i]->surf_alignment);
- if (rctx->chip_class < GFX9) {
- /* copy the tiling parameters */
- surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
- surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
- surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea;
- surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
+ /* copy the tiling parameters */
+ surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
+ surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
+ surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea;
+ surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split;
- for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
- surfaces[i]->u.legacy.level[j].offset += off;
- } else
- surfaces[i]->u.gfx9.surf_offset += off;
+ for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
+ surfaces[i]->u.legacy.level[j].offset += off;
off += surfaces[i]->surf_size;
}
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
- return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
+ return 2048;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return (rscreen->family < CHIP_TONGA) ? 1152 : 2304;
+ return 1152;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
return PIPE_FORMAT_NV12;
case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
return true;
case PIPE_VIDEO_CAP_STACKED_FRAMES:
- return (rscreen->family < CHIP_TONGA) ? 1 : 2;
+ return 1;
default:
return 0;
}
/* no support for MPEG4 on older hw */
return rscreen->family >= CHIP_PALM;
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
- if ((rscreen->family == CHIP_POLARIS10 ||
- rscreen->family == CHIP_POLARIS11) &&
- info.uvd_fw_version < UVD_FW_1_66_16 ) {
- RVID_ERR("POLARIS10/11 firmware version need to be updated.\n");
- return false;
- }
return true;
case PIPE_VIDEO_FORMAT_VC1:
return true;
case PIPE_VIDEO_FORMAT_HEVC:
- /* Carrizo only supports HEVC Main */
- if (rscreen->family >= CHIP_STONEY)
- return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN ||
- profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10);
- else if (rscreen->family >= CHIP_CARRIZO)
- return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
return false;
case PIPE_VIDEO_FORMAT_JPEG:
- if (rscreen->family < CHIP_CARRIZO || rscreen->family >= CHIP_VEGA10)
- return false;
- if (!(rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 19)) {
- RVID_ERR("No MJPEG support for the kernel version\n");
- return false;
- }
- return true;
+ return false;
default:
return false;
}
case PIPE_VIDEO_CAP_NPOT_TEXTURES:
return 1;
case PIPE_VIDEO_CAP_MAX_WIDTH:
- return (rscreen->family < CHIP_TONGA) ? 2048 : 4096;
+ return 2048;
case PIPE_VIDEO_CAP_MAX_HEIGHT:
- return (rscreen->family < CHIP_TONGA) ? 1152 : 4096;
+ return 1152;
case PIPE_VIDEO_CAP_PREFERED_FORMAT:
if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)
return PIPE_FORMAT_P016;
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
- return (rscreen->family < CHIP_TONGA) ? 41 : 52;
+ return 41;
case PIPE_VIDEO_PROFILE_HEVC_MAIN:
case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
return 186;