From bee61d16c8c48b0339d1db9bf4e6bf1ddc6685ff Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 9 Oct 2017 13:30:02 +1000 Subject: [PATCH] r600: drop a bunch of post-cayman code. (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Now that Marek has split the two drivers apart, drop a bunch of unnecessary code from the r600 half. There is probably a bunch more hiding in the video code. No piglit regressions on caicos. v2: fix HAVE_LLVM protected code Acked-by: Nicolai Hähnle Acked-by: Marek Olšák Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_gpu_load.c | 20 - src/gallium/drivers/r600/r600_pipe_common.c | 197 +----- src/gallium/drivers/r600/r600_pipe_common.h | 75 --- src/gallium/drivers/r600/r600_query.c | 102 +-- src/gallium/drivers/r600/r600_query.h | 1 - src/gallium/drivers/r600/r600_streamout.c | 60 +- src/gallium/drivers/r600/r600_texture.c | 675 +++----------------- src/gallium/drivers/r600/r600_uvd.c | 2 +- src/gallium/drivers/r600/radeon_uvd.c | 222 ++----- src/gallium/drivers/r600/radeon_uvd.h | 7 +- src/gallium/drivers/r600/radeon_vce.c | 28 +- src/gallium/drivers/r600/radeon_video.c | 61 +- 12 files changed, 199 insertions(+), 1251 deletions(-) diff --git a/src/gallium/drivers/r600/r600_gpu_load.c b/src/gallium/drivers/r600/r600_gpu_load.c index d35be4f327a..c15fb9dfa06 100644 --- a/src/gallium/drivers/r600/r600_gpu_load.c +++ b/src/gallium/drivers/r600/r600_gpu_load.c @@ -104,26 +104,6 @@ static void r600_update_mmio_counters(struct r600_common_screen *rscreen, UPDATE_COUNTER(gui, GUI_ACTIVE); gui_busy = GUI_ACTIVE(value); - if (rscreen->chip_class == CIK || rscreen->chip_class == VI) { - /* SRBM_STATUS2 */ - rscreen->ws->read_registers(rscreen->ws, SRBM_STATUS2, 1, &value); - - UPDATE_COUNTER(sdma, SDMA_BUSY); - sdma_busy = SDMA_BUSY(value); - } - - if (rscreen->chip_class >= VI) { - /* CP_STAT */ - rscreen->ws->read_registers(rscreen->ws, CP_STAT, 1, &value); - - UPDATE_COUNTER(pfp, PFP_BUSY); - UPDATE_COUNTER(meq, MEQ_BUSY); - UPDATE_COUNTER(me, ME_BUSY); - UPDATE_COUNTER(surf_sync, SURFACE_SYNC_BUSY); - UPDATE_COUNTER(cp_dma, DMA_BUSY); - UPDATE_COUNTER(scratch_ram, SCRATCH_RAM_BUSY); - } - value = gui_busy || sdma_busy; UPDATE_COUNTER(gpu, IDENTITY); } diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 56297cf4db8..066d10a451f 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -111,71 +111,12 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx, event_flags; unsigned sel = EOP_DATA_SEL(data_sel); - /* Wait for write confirmation before writing data, but don't send - * an interrupt. */ - if (ctx->chip_class >= SI && data_sel != EOP_DATA_SEL_DISCARD) - sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM); - - if (ctx->chip_class >= GFX9) { - /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion - * counters) must immediately precede every timestamp event to - * prevent a GPU hang on GFX9. - * - * Occlusion queries don't need to do it here, because they - * always do ZPASS_DONE before the timestamp. - */ - if (ctx->chip_class == GFX9 && - query_type != PIPE_QUERY_OCCLUSION_COUNTER && - query_type != PIPE_QUERY_OCCLUSION_PREDICATE) { - struct r600_resource *scratch = ctx->eop_bug_scratch; - - assert(16 * ctx->screen->info.num_render_backends <= - scratch->b.b.width0); - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); - radeon_emit(cs, scratch->gpu_address); - radeon_emit(cs, scratch->gpu_address >> 32); - - radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - } - - radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0)); - radeon_emit(cs, op); - radeon_emit(cs, sel); - radeon_emit(cs, va); /* address lo */ - radeon_emit(cs, va >> 32); /* address hi */ - radeon_emit(cs, new_fence); /* immediate data lo */ - radeon_emit(cs, 0); /* immediate data hi */ - radeon_emit(cs, 0); /* unused */ - } else { - if (ctx->chip_class == CIK || - ctx->chip_class == VI) { - struct r600_resource *scratch = ctx->eop_bug_scratch; - uint64_t va = scratch->gpu_address; - - /* Two EOP events are required to make all engines go idle - * (and optional cache flushes executed) before the timestamp - * is written. - */ - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, ((va >> 32) & 0xffff) | sel); - radeon_emit(cs, 0); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - - radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - } - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, ((va >> 32) & 0xffff) | sel); - radeon_emit(cs, new_fence); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - } + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, op); + radeon_emit(cs, va); + radeon_emit(cs, ((va >> 32) & 0xffff) | sel); + radeon_emit(cs, new_fence); /* immediate data */ + radeon_emit(cs, 0); /* unused */ if (buf) r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, @@ -186,10 +127,6 @@ unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen) { unsigned dwords = 6; - if (screen->chip_class == CIK || - screen->chip_class == VI) - dwords *= 2; - if (!screen->info.has_virtual_memory) dwords += 2; @@ -302,10 +239,7 @@ static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) { struct radeon_winsys_cs *cs = rctx->dma.cs; - /* NOP waits for idle on Evergreen and later. */ - if (rctx->chip_class >= CIK) - radeon_emit(cs, 0x00000000); /* NOP */ - else if (rctx->chip_class >= EVERGREEN) + if (rctx->chip_class >= EVERGREEN) radeon_emit(cs, 0xf0000000); /* NOP */ else { /* TODO: R600-R700 should use the FENCE packet. @@ -760,16 +694,6 @@ bool r600_common_context_init(struct r600_common_context *rctx, r600_query_init(rctx); cayman_init_msaa(&rctx->b); - if (rctx->chip_class == CIK || - rctx->chip_class == VI || - rctx->chip_class == GFX9) { - rctx->eop_bug_scratch = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, - 16 * rscreen->info.num_render_backends); - if (!rctx->eop_bug_scratch) - return false; - } - rctx->allocator_zeroed_memory = u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, 0, PIPE_USAGE_DEFAULT, 0, true); @@ -802,20 +726,6 @@ bool r600_common_context_init(struct r600_common_context *rctx, void r600_common_context_cleanup(struct r600_common_context *rctx) { - unsigned i,j; - - /* Release DCC stats. */ - for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) { - assert(!rctx->dcc_stats[i].query_active); - - for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++) - if (rctx->dcc_stats[i].ps_stats[j]) - rctx->b.destroy_query(&rctx->b, - rctx->dcc_stats[i].ps_stats[j]); - - r600_texture_reference(&rctx->dcc_stats[i].tex, NULL); - } - if (rctx->query_result_shader) rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader); @@ -886,15 +796,7 @@ static const struct debug_named_value common_debug_options[] = { { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, { "nowc", DBG_NO_WC, "Disable GTT write combining" }, { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, - { "nodcc", DBG_NO_DCC, "Disable DCC." }, - { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, - { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." }, - { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, - { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" }, - { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" }, - { "nodpbb", DBG_NO_DPBB, "Disable DPBB." }, - { "nodfsm", DBG_NO_DFSM, "Disable DFSM." }, DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -944,26 +846,6 @@ static const char *r600_get_family_name(const struct r600_common_screen *rscreen case CHIP_CAICOS: return "AMD CAICOS"; case CHIP_CAYMAN: return "AMD CAYMAN"; case CHIP_ARUBA: return "AMD ARUBA"; - case CHIP_TAHITI: return "AMD TAHITI"; - case CHIP_PITCAIRN: return "AMD PITCAIRN"; - case CHIP_VERDE: return "AMD CAPE VERDE"; - case CHIP_OLAND: return "AMD OLAND"; - case CHIP_HAINAN: return "AMD HAINAN"; - case CHIP_BONAIRE: return "AMD BONAIRE"; - case CHIP_KAVERI: return "AMD KAVERI"; - case CHIP_KABINI: return "AMD KABINI"; - case CHIP_HAWAII: return "AMD HAWAII"; - case CHIP_MULLINS: return "AMD MULLINS"; - case CHIP_TONGA: return "AMD TONGA"; - case CHIP_ICELAND: return "AMD ICELAND"; - case CHIP_CARRIZO: return "AMD CARRIZO"; - case CHIP_FIJI: return "AMD FIJI"; - case CHIP_POLARIS10: return "AMD POLARIS10"; - case CHIP_POLARIS11: return "AMD POLARIS11"; - case CHIP_POLARIS12: return "AMD POLARIS12"; - case CHIP_STONEY: return "AMD STONEY"; - case CHIP_VEGA10: return "AMD VEGA10"; - case CHIP_RAVEN: return "AMD RAVEN"; default: return "AMD unknown"; } } @@ -979,25 +861,13 @@ static void r600_disk_cache_create(struct r600_common_screen *rscreen) &mesa_timestamp)) { char *timestamp_str; int res = -1; - if (rscreen->chip_class < SI) { - res = asprintf(×tamp_str, "%u",mesa_timestamp); - } -#if HAVE_LLVM - else { - uint32_t llvm_timestamp; - if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, - &llvm_timestamp)) { - res = asprintf(×tamp_str, "%u_%u", - mesa_timestamp, llvm_timestamp); - } - } -#endif + + res = asprintf(×tamp_str, "%u",mesa_timestamp); if (res != -1) { /* These flags affect shader compilation. */ uint64_t shader_debug_flags = rscreen->debug_flags & (DBG_FS_CORRECT_DERIVS_AFTER_KILL | - DBG_SI_SCHED | DBG_UNSAFE_MATH); rscreen->disk_shader_cache = @@ -1120,32 +990,6 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) case CHIP_ARUBA: return "cayman"; - case CHIP_TAHITI: return "tahiti"; - case CHIP_PITCAIRN: return "pitcairn"; - case CHIP_VERDE: return "verde"; - case CHIP_OLAND: return "oland"; - case CHIP_HAINAN: return "hainan"; - case CHIP_BONAIRE: return "bonaire"; - case CHIP_KABINI: return "kabini"; - case CHIP_KAVERI: return "kaveri"; - case CHIP_HAWAII: return "hawaii"; - case CHIP_MULLINS: - return "mullins"; - case CHIP_TONGA: return "tonga"; - case CHIP_ICELAND: return "iceland"; - case CHIP_CARRIZO: return "carrizo"; - case CHIP_FIJI: - return "fiji"; - case CHIP_STONEY: - return "stoney"; - case CHIP_POLARIS10: - return "polaris10"; - case CHIP_POLARIS11: - case CHIP_POLARIS12: /* same as polaris11 */ - return "polaris11"; - case CHIP_VEGA10: - case CHIP_RAVEN: - return "gfx900"; default: return ""; } @@ -1154,19 +998,6 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) static unsigned get_max_threads_per_block(struct r600_common_screen *screen, enum pipe_shader_ir ir_type) { - if (ir_type != PIPE_SHADER_IR_TGSI) - return 256; - - /* Only 16 waves per thread-group on gfx9. */ - if (screen->chip_class >= GFX9) - return 1024; - - /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice - * round number. - */ - if (screen->chip_class >= SI) - return 2048; - return 256; } @@ -1241,8 +1072,6 @@ static int r600_get_compute_param(struct pipe_screen *screen, if (ret) { uint32_t *address_bits = ret; address_bits[0] = 32; - if (rscreen->chip_class >= SI) - address_bits[0] = 64; } return 1 * sizeof(uint32_t); @@ -1322,11 +1151,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: if (ret) { uint64_t *max_variable_threads_per_block = ret; - if (rscreen->chip_class >= SI && - ir_type == PIPE_SHADER_IR_TGSI) - *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; - else - *max_variable_threads_per_block = 0; + *max_variable_threads_per_block = 0; } return sizeof(uint64_t); } @@ -1516,8 +1341,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0); - rscreen->has_rbplus = false; - rscreen->rbplus_allowed = false; r600_disk_cache_create(rscreen); diff --git a/src/gallium/drivers/r600/r600_pipe_common.h b/src/gallium/drivers/r600/r600_pipe_common.h index 52f39cdb7ac..87c48e33e31 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.h +++ b/src/gallium/drivers/r600/r600_pipe_common.h @@ -54,7 +54,6 @@ struct u_log_context; #define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1) #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2) -#define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3) #define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4) #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0) @@ -105,19 +104,11 @@ struct u_log_context; #define DBG_INFO (1ull << 40) #define DBG_NO_WC (1ull << 41) #define DBG_CHECK_VM (1ull << 42) -#define DBG_NO_DCC (1ull << 43) -#define DBG_NO_DCC_CLEAR (1ull << 44) -#define DBG_NO_RB_PLUS (1ull << 45) -#define DBG_SI_SCHED (1ull << 46) -#define DBG_MONOLITHIC_SHADERS (1ull << 47) /* gap */ #define DBG_UNSAFE_MATH (1ull << 49) -#define DBG_NO_DCC_FB (1ull << 50) #define DBG_TEST_VMFAULT_CP (1ull << 51) #define DBG_TEST_VMFAULT_SDMA (1ull << 52) #define DBG_TEST_VMFAULT_SHADER (1ull << 53) -#define DBG_NO_DPBB (1ull << 54) -#define DBG_NO_DFSM (1ull << 55) #define R600_MAP_BUFFER_ALIGNMENT 64 #define R600_MAX_VIEWPORTS 16 @@ -238,14 +229,12 @@ struct r600_texture { struct r600_fmask_info fmask; struct r600_cmask_info cmask; struct r600_resource *cmask_buffer; - uint64_t dcc_offset; /* 0 = disabled */ unsigned cb_color_info; /* fast clear enable bit */ unsigned color_clear_value[2]; unsigned last_msaa_resolve_target_micro_mode; /* Depth buffer compression and fast clear. */ uint64_t htile_offset; - bool tc_compatible_htile; bool depth_cleared; /* if it was cleared at least once */ float depth_clear_value; bool stencil_cleared; /* if it was cleared at least once */ @@ -253,34 +242,6 @@ struct r600_texture { bool non_disp_tiling; /* R600-Cayman only */ - /* Whether the texture is a displayable back buffer and needs DCC - * decompression, which is expensive. Therefore, it's enabled only - * if statistics suggest that it will pay off and it's allocated - * separately. It can't be bound as a sampler by apps. Limited to - * target == 2D and last_level == 0. If enabled, dcc_offset contains - * the absolute GPUVM address, not the relative one. - */ - struct r600_resource *dcc_separate_buffer; - /* When DCC is temporarily disabled, the separate buffer is here. */ - struct r600_resource *last_dcc_separate_buffer; - /* We need to track DCC dirtiness, because st/dri usually calls - * flush_resource twice per frame (not a bug) and we don't wanna - * decompress DCC twice. Also, the dirty tracking must be done even - * if DCC isn't used, because it's required by the DCC usage analysis - * for a possible future enablement. - */ - bool separate_dcc_dirty; - /* Statistics gathering for the DCC enablement heuristic. */ - bool dcc_gather_statistics; - /* Estimate of how much this color buffer is written to in units of - * full-screen draws: ps_invocations / (width * height) - * Shader kills, late Z, and blending with trivial discards make it - * inaccurate (we need to count CB updates, not PS invocations). - */ - unsigned ps_draw_ratio; - /* The number of clears since the last DCC usage analysis. */ - unsigned num_slow_clears; - /* Counter that should be non-zero if the texture is bound to a * framebuffer. Implemented in radeonsi only. */ @@ -302,7 +263,6 @@ struct r600_surface { bool export_16bpc; bool color_is_int8; bool color_is_int10; - bool dcc_incompatible; /* Color registers. */ unsigned cb_color_info; @@ -313,16 +273,10 @@ struct r600_surface { unsigned cb_color_pitch; /* EG and later */ unsigned cb_color_slice; /* EG and later */ unsigned cb_color_attrib; /* EG and later */ - unsigned cb_color_attrib2; /* GFX9 and later */ - unsigned cb_dcc_control; /* VI and later */ unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ unsigned cb_color_fmask_slice; /* EG and later */ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ unsigned cb_color_mask; /* R600 only */ - unsigned spi_shader_col_format; /* SI+, no blending, no alpha-to-coverage. */ - unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */ - unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */ - unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */ @@ -332,12 +286,10 @@ struct r600_surface { uint64_t db_htile_data_base; unsigned db_depth_info; /* R600 only, then SI and later */ unsigned db_z_info; /* EG and later */ - unsigned db_z_info2; /* GFX9+ */ unsigned db_depth_view; unsigned db_depth_size; unsigned db_depth_slice; /* EG and later */ unsigned db_stencil_info; /* EG and later */ - unsigned db_stencil_info2; /* GFX9+ */ unsigned db_prefetch_limit; /* R600 only */ unsigned db_htile_surface; unsigned db_preload_control; /* EG and later */ @@ -399,8 +351,6 @@ struct r600_common_screen { uint64_t debug_flags; bool has_cp_dma; bool has_streamout; - bool has_rbplus; /* if RB+ registers exist */ - bool rbplus_allowed; /* if RB+ is allowed */ struct disk_cache *disk_shader_cache; @@ -624,7 +574,6 @@ struct r600_common_context { unsigned num_L2_writebacks; unsigned num_resident_handles; uint64_t num_alloc_tex_transfer_bytes; - unsigned last_tex_ps_draw_ratio; /* for query */ /* Render condition. */ struct r600_atom render_cond_atom; @@ -642,25 +591,6 @@ struct r600_common_context { float sample_locations_8x[8][2]; float sample_locations_16x[16][2]; - /* Statistics gathering for the DCC enablement heuristic. It can't be - * in r600_texture because r600_texture can be shared by multiple - * contexts. This is for back buffers only. We shouldn't get too many - * of those. - * - * X11 DRI3 rotates among a finite set of back buffers. They should - * all fit in this array. If they don't, separate DCC might never be - * enabled by DCC stat gathering. - */ - struct { - struct r600_texture *tex; - /* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */ - struct pipe_query *ps_stats[3]; - /* If all slots are used and another slot is needed, - * the least recently used slot is evicted based on this. */ - int64_t last_use_timestamp; - bool query_active; - } dcc_stats[5]; - struct pipe_debug_callback debug; struct pipe_device_reset_callback device_reset_callback; struct u_log_context *log; @@ -690,9 +620,6 @@ struct r600_common_context { unsigned first_layer, unsigned last_layer, unsigned first_sample, unsigned last_sample); - void (*decompress_dcc)(struct pipe_context *ctx, - struct r600_texture *rtex); - /* Reallocate the buffer and update all resource bindings where * the buffer is bound, including all resource descriptors. */ void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf); @@ -856,8 +783,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct r600_atom *fb_state, unsigned *buffers, ubyte *dirty_cbufs, const union pipe_color_union *color); -bool r600_texture_disable_dcc(struct r600_common_context *rctx, - struct r600_texture *rtex); void r600_init_screen_texture_functions(struct r600_common_screen *rscreen); void r600_init_context_texture_functions(struct r600_common_context *rctx); diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 4c6311c79e0..86e6d097869 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -169,7 +169,6 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx, case R600_QUERY_GPU_TEMPERATURE: case R600_QUERY_CURRENT_GPU_SCLK: case R600_QUERY_CURRENT_GPU_MCLK: - case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: case R600_QUERY_NUM_MAPPED_BUFFERS: query->begin_result = 0; break; @@ -387,9 +386,6 @@ static bool r600_query_sw_end(struct r600_common_context *rctx, case R600_QUERY_NUM_SHADERS_CREATED: query->end_result = p_atomic_read(&rctx->screen->num_shaders_created); break; - case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO: - query->end_result = rctx->last_tex_ps_draw_ratio; - break; case R600_QUERY_NUM_SHADER_CACHE_HITS: query->end_result = p_atomic_read(&rctx->screen->num_shader_cache_hits); @@ -763,26 +759,12 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx, emit_sample_streamout(cs, va + 32 * stream, stream); break; case PIPE_QUERY_TIME_ELAPSED: - if (ctx->chip_class >= SI) { - /* Write the timestamp from the CP not waiting for - * outstanding draws (top-of-pipe). - */ - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_COUNT_SEL | - COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | - COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - } else { - /* Write the timestamp after the last draw is done. - * (bottom-of-pipe) - */ - r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, - 0, EOP_DATA_SEL_TIMESTAMP, - NULL, va, 0, query->b.type); - } + /* Write the timestamp after the last draw is done. + * (bottom-of-pipe) + */ + r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, + 0, EOP_DATA_SEL_TIMESTAMP, + NULL, va, 0, query->b.type); break; case PIPE_QUERY_PIPELINE_STATISTICS: radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); @@ -928,16 +910,9 @@ static void emit_set_predicate(struct r600_common_context *ctx, { struct radeon_winsys_cs *cs = ctx->gfx.cs; - if (ctx->chip_class >= GFX9) { - radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - } else { - radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); - radeon_emit(cs, va); - radeon_emit(cs, op | ((va >> 32) & 0xFF)); - } + radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0)); + radeon_emit(cs, va); + radeon_emit(cs, op | ((va >> 32) & 0xFF)); r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ, RADEON_PRIO_QUERY); } @@ -1803,55 +1778,11 @@ static void r600_render_condition(struct pipe_context *ctx, /* Compute the size of SET_PREDICATION packets. */ atom->num_dw = 0; if (query) { - bool needs_workaround = false; + for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) + atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; - /* There is a firmware regression in VI which causes successive - * SET_PREDICATION packets to give the wrong answer for - * non-inverted stream overflow predication. - */ - if (rctx->chip_class >= VI && !condition && - (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE || - (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE && - (rquery->buffer.previous || - rquery->buffer.results_end > rquery->result_size)))) { - needs_workaround = true; - } - - if (needs_workaround && !rquery->workaround_buf) { - bool old_force_off = rctx->render_cond_force_off; - rctx->render_cond_force_off = true; - - u_suballocator_alloc( - rctx->allocator_zeroed_memory, 8, 8, - &rquery->workaround_offset, - (struct pipe_resource **)&rquery->workaround_buf); - - /* Reset to NULL to avoid a redundant SET_PREDICATION - * from launching the compute grid. - */ - rctx->render_cond = NULL; - - ctx->get_query_result_resource( - ctx, query, true, PIPE_QUERY_TYPE_U64, 0, - &rquery->workaround_buf->b.b, rquery->workaround_offset); - - /* Settings this in the render cond atom is too late, - * so set it here. */ - rctx->flags |= rctx->screen->barrier_flags.L2_to_cp | - R600_CONTEXT_FLUSH_FOR_RENDER_COND; - - rctx->render_cond_force_off = old_force_off; - } - - if (needs_workaround) { - atom->num_dw = 5; - } else { - for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) - atom->num_dw += (qbuf->results_end / rquery->result_size) * 5; - - if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) - atom->num_dw *= R600_MAX_STREAMS; - } + if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) + atom->num_dw *= R600_MAX_STREAMS; } rctx->render_cond = query; @@ -2045,7 +1976,6 @@ static struct pipe_driver_query_info r600_driver_query_list[] = { X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE), X("VRAM-vis-usage", VRAM_VIS_USAGE, BYTES, AVERAGE), X("GTT-usage", GTT_USAGE, BYTES, AVERAGE), - X("back-buffer-ps-draw-ratio", BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE), /* GPIN queries are for the benefit of old versions of GPUPerfStudio, * which use it as a fallback path to detect the GPU type. @@ -2095,12 +2025,6 @@ static unsigned r600_get_num_queries(struct r600_common_screen *rscreen) { if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) return ARRAY_SIZE(r600_driver_query_list); - else if (rscreen->info.drm_major == 3) { - if (rscreen->chip_class >= VI) - return ARRAY_SIZE(r600_driver_query_list); - else - return ARRAY_SIZE(r600_driver_query_list) - 7; - } else return ARRAY_SIZE(r600_driver_query_list) - 25; } diff --git a/src/gallium/drivers/r600/r600_query.h b/src/gallium/drivers/r600/r600_query.h index 7455c8e63a8..1a3c6839eb3 100644 --- a/src/gallium/drivers/r600/r600_query.h +++ b/src/gallium/drivers/r600/r600_query.h @@ -104,7 +104,6 @@ enum { R600_QUERY_GPU_SCRATCH_RAM_BUSY, R600_QUERY_NUM_COMPILATIONS, R600_QUERY_NUM_SHADERS_CREATED, - R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO, R600_QUERY_NUM_SHADER_CACHE_HITS, R600_QUERY_GPIN_ASIC_ID, R600_QUERY_GPIN_NUM_SIMD, diff --git a/src/gallium/drivers/r600/r600_streamout.c b/src/gallium/drivers/r600/r600_streamout.c index 6d45a07d5c9..78334066c39 100644 --- a/src/gallium/drivers/r600/r600_streamout.c +++ b/src/gallium/drivers/r600/r600_streamout.c @@ -95,14 +95,10 @@ void r600_streamout_buffers_dirty(struct r600_common_context *rctx) begin->num_dw = 12; /* flush_vgt_streamout */ - if (rctx->chip_class >= SI) { - begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */ - } else { - begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */ + begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */ - if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) - begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */ - } + if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) + begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */ begin->num_dw += num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */ @@ -168,11 +164,7 @@ static void r600_flush_vgt_streamout(struct r600_common_context *rctx) reg_strmout_cntl = R_008490_CP_STRMOUT_CNTL; } - if (rctx->chip_class >= CIK) { - radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); - } else { - radeon_set_config_reg(cs, reg_strmout_cntl, 0); - } + radeon_set_config_reg(cs, reg_strmout_cntl, 0); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); @@ -201,38 +193,28 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r t[i]->stride_in_dw = stride_in_dw[i]; - if (rctx->chip_class >= SI) { - /* SI binds streamout buffers as shader resources. - * VGT only counts primitives and tells the shader - * through SGPRs what to do. */ - radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2); - radeon_emit(cs, (t[i]->b.buffer_offset + - t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */ - radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */ - } else { - uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address; + uint64_t va = r600_resource(t[i]->b.buffer)->gpu_address; - update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); + update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i); - radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3); - radeon_emit(cs, (t[i]->b.buffer_offset + - t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */ - radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */ - radeon_emit(cs, va >> 8); /* BUFFER_BASE */ + radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 3); + radeon_emit(cs, (t[i]->b.buffer_offset + + t[i]->b.buffer_size) >> 2); /* BUFFER_SIZE (in DW) */ + radeon_emit(cs, stride_in_dw[i]); /* VTX_STRIDE (in DW) */ + radeon_emit(cs, va >> 8); /* BUFFER_BASE */ - r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER); + r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer), + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER); - /* R7xx requires this packet after updating BUFFER_BASE. - * Without this, R7xx locks up. */ - if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) { - radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0)); - radeon_emit(cs, i); - radeon_emit(cs, va >> 8); + /* R7xx requires this packet after updating BUFFER_BASE. + * Without this, R7xx locks up. */ + if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) { + radeon_emit(cs, PKT3(PKT3_STRMOUT_BASE_UPDATE, 1, 0)); + radeon_emit(cs, i); + radeon_emit(cs, va >> 8); - r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer), - RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER); - } + r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer), + RADEON_USAGE_WRITE, RADEON_PRIO_SHADER_RW_BUFFER); } if (rctx->streamout.append_bitmask & (1 << i) && t[i]->buf_filled_size_valid) { diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 15a1ec92675..b8411144900 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -176,36 +176,20 @@ static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen, unsigned *stride, unsigned *layer_stride) { - if (rscreen->chip_class >= GFX9) { - *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe; - *layer_stride = rtex->surface.u.gfx9.surf_slice_size; - - if (!box) - return 0; - - /* Each texture is an array of slices. Each slice is an array - * of mipmap levels. */ - return box->z * rtex->surface.u.gfx9.surf_slice_size + - rtex->surface.u.gfx9.offset[level] + - (box->y / rtex->surface.blk_h * - rtex->surface.u.gfx9.surf_pitch + - box->x / rtex->surface.blk_w) * rtex->surface.bpe; - } else { - *stride = rtex->surface.u.legacy.level[level].nblk_x * - rtex->surface.bpe; - *layer_stride = rtex->surface.u.legacy.level[level].slice_size; - - if (!box) - return rtex->surface.u.legacy.level[level].offset; - - /* Each texture is an array of mipmap levels. Each level is - * an array of slices. */ - return rtex->surface.u.legacy.level[level].offset + - box->z * rtex->surface.u.legacy.level[level].slice_size + - (box->y / rtex->surface.blk_h * - rtex->surface.u.legacy.level[level].nblk_x + - box->x / rtex->surface.blk_w) * rtex->surface.bpe; - } + *stride = rtex->surface.u.legacy.level[level].nblk_x * + rtex->surface.bpe; + *layer_stride = rtex->surface.u.legacy.level[level].slice_size; + + if (!box) + return rtex->surface.u.legacy.level[level].offset; + + /* Each texture is an array of mipmap levels. Each level is + * an array of slices. */ + return rtex->surface.u.legacy.level[level].offset + + box->z * rtex->surface.u.legacy.level[level].slice_size + + (box->y / rtex->surface.blk_h * + rtex->surface.u.legacy.level[level].nblk_x + + box->x / rtex->surface.blk_w) * rtex->surface.bpe; } static int r600_init_surface(struct r600_common_screen *rscreen, @@ -216,8 +200,7 @@ static int r600_init_surface(struct r600_common_screen *rscreen, unsigned offset, bool is_imported, bool is_scanout, - bool is_flushed_depth, - bool tc_compatible_htile) + bool is_flushed_depth) { const struct util_format_description *desc = util_format_description(ptex->format); @@ -239,29 +222,10 @@ static int r600_init_surface(struct r600_common_screen *rscreen, if (!is_flushed_depth && is_depth) { flags |= RADEON_SURF_ZBUFFER; - if (tc_compatible_htile && - (rscreen->chip_class >= GFX9 || - array_mode == RADEON_SURF_MODE_2D)) { - /* TC-compatible HTILE only supports Z32_FLOAT. - * GFX9 also supports Z16_UNORM. - * On VI, promote Z16 to Z32. DB->CB copies will convert - * the format for transfers. - */ - if (rscreen->chip_class == VI) - bpe = 4; - - flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; - } - if (is_stencil) flags |= RADEON_SURF_SBUFFER; } - if (rscreen->chip_class >= VI && - (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC || - ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)) - flags |= RADEON_SURF_DISABLE_DCC; - if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) { /* This should catch bugs in gallium users setting incorrect flags. */ assert(ptex->nr_samples <= 1 && @@ -286,26 +250,21 @@ static int r600_init_surface(struct r600_common_screen *rscreen, return r; } - if (rscreen->chip_class >= GFX9) { - assert(!pitch_in_bytes_override || - pitch_in_bytes_override == surface->u.gfx9.surf_pitch * bpe); - surface->u.gfx9.surf_offset = offset; - } else { - if (pitch_in_bytes_override && - pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) { - /* old ddx on evergreen over estimate alignment for 1d, only 1 level - * for those - */ - surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe; - surface->u.legacy.level[0].slice_size = pitch_in_bytes_override * - surface->u.legacy.level[0].nblk_y; - } + if (pitch_in_bytes_override && + pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) { + /* old ddx on evergreen over estimate alignment for 1d, only 1 level + * for those + */ + surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe; + surface->u.legacy.level[0].slice_size = pitch_in_bytes_override * + surface->u.legacy.level[0].nblk_y; + } - if (offset) { - for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i) - surface->u.legacy.level[i].offset += offset; - } + if (offset) { + for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i) + surface->u.legacy.level[i].offset += offset; } + return 0; } @@ -317,22 +276,18 @@ static void r600_texture_init_metadata(struct r600_common_screen *rscreen, memset(metadata, 0, sizeof(*metadata)); - if (rscreen->chip_class >= GFX9) { - metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode; - } else { - metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? - RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; - metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? - RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; - metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; - metadata->u.legacy.bankw = surface->u.legacy.bankw; - metadata->u.legacy.bankh = surface->u.legacy.bankh; - metadata->u.legacy.tile_split = surface->u.legacy.tile_split; - metadata->u.legacy.mtilea = surface->u.legacy.mtilea; - metadata->u.legacy.num_banks = surface->u.legacy.num_banks; - metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; - metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; - } + metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? + RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? + RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR; + metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config; + metadata->u.legacy.bankw = surface->u.legacy.bankw; + metadata->u.legacy.bankh = surface->u.legacy.bankh; + metadata->u.legacy.tile_split = surface->u.legacy.tile_split; + metadata->u.legacy.mtilea = surface->u.legacy.mtilea; + metadata->u.legacy.num_banks = surface->u.legacy.num_banks; + metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe; + metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0; } static void r600_surface_import_metadata(struct r600_common_screen *rscreen, @@ -341,33 +296,21 @@ static void r600_surface_import_metadata(struct r600_common_screen *rscreen, enum radeon_surf_mode *array_mode, bool *is_scanout) { - if (rscreen->chip_class >= GFX9) { - if (metadata->u.gfx9.swizzle_mode > 0) - *array_mode = RADEON_SURF_MODE_2D; - else - *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - - *is_scanout = metadata->u.gfx9.swizzle_mode == 0 || - metadata->u.gfx9.swizzle_mode % 4 == 2; - - surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode; - } else { - surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; - surf->u.legacy.bankw = metadata->u.legacy.bankw; - surf->u.legacy.bankh = metadata->u.legacy.bankh; - surf->u.legacy.tile_split = metadata->u.legacy.tile_split; - surf->u.legacy.mtilea = metadata->u.legacy.mtilea; - surf->u.legacy.num_banks = metadata->u.legacy.num_banks; - - if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED) - *array_mode = RADEON_SURF_MODE_2D; - else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED) - *array_mode = RADEON_SURF_MODE_1D; - else - *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; + surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config; + surf->u.legacy.bankw = metadata->u.legacy.bankw; + surf->u.legacy.bankh = metadata->u.legacy.bankh; + surf->u.legacy.tile_split = metadata->u.legacy.tile_split; + surf->u.legacy.mtilea = metadata->u.legacy.mtilea; + surf->u.legacy.num_banks = metadata->u.legacy.num_banks; + + if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED) + *array_mode = RADEON_SURF_MODE_2D; + else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED) + *array_mode = RADEON_SURF_MODE_1D; + else + *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED; - *is_scanout = metadata->u.legacy.scanout; - } + *is_scanout = metadata->u.legacy.scanout; } static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx, @@ -399,10 +342,7 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8; rtex->dirty_level_mask = 0; - if (rscreen->chip_class >= SI) - rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1); - else - rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1); + rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1); if (rtex->cmask_buffer != &rtex->resource) r600_resource_reference(&rtex->cmask_buffer, NULL); @@ -412,72 +352,6 @@ static void r600_texture_discard_cmask(struct r600_common_screen *rscreen, p_atomic_inc(&rscreen->compressed_colortex_counter); } -static bool r600_can_disable_dcc(struct r600_texture *rtex) -{ - /* We can't disable DCC if it can be written by another process. */ - return rtex->dcc_offset && - (!rtex->resource.b.is_shared || - !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE)); -} - -static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - if (!r600_can_disable_dcc(rtex)) - return false; - - assert(rtex->dcc_separate_buffer == NULL); - - /* Disable DCC. */ - rtex->dcc_offset = 0; - - /* Notify all contexts about the change. */ - p_atomic_inc(&rscreen->dirty_tex_counter); - return true; -} - -/** - * Disable DCC for the texture. (first decompress, then discard metadata). - * - * There is unresolved multi-context synchronization issue between - * screen::aux_context and the current context. If applications do this with - * multiple contexts, it's already undefined behavior for them and we don't - * have to worry about that. The scenario is: - * - * If context 1 disables DCC and context 2 has queued commands that write - * to the texture via CB with DCC enabled, and the order of operations is - * as follows: - * context 2 queues draw calls rendering to the texture, but doesn't flush - * context 1 disables DCC and flushes - * context 1 & 2 reset descriptors and FB state - * context 2 flushes (new compressed tiles written by the draw calls) - * context 1 & 2 read garbage, because DCC is disabled, yet there are - * compressed tiled - * - * \param rctx the current context if you have one, or rscreen->aux_context - * if you don't. - */ -bool r600_texture_disable_dcc(struct r600_common_context *rctx, - struct r600_texture *rtex) -{ - struct r600_common_screen *rscreen = rctx->screen; - - if (!r600_can_disable_dcc(rtex)) - return false; - - if (&rctx->b == rscreen->aux_context) - mtx_lock(&rscreen->aux_context_lock); - - /* Decompress DCC. */ - rctx->decompress_dcc(&rctx->b, rtex); - rctx->b.flush(&rctx->b, NULL, 0); - - if (&rctx->b == rscreen->aux_context) - mtx_unlock(&rscreen->aux_context_lock); - - return r600_texture_discard_dcc(rscreen, rtex); -} - static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, struct r600_texture *rtex, unsigned new_bind_flag, @@ -527,7 +401,6 @@ static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, if (new_bind_flag == PIPE_BIND_LINEAR) { r600_texture_discard_cmask(rctx->screen, rtex); - r600_texture_discard_dcc(rctx->screen, rtex); } /* Replace the structure fields of rtex. */ @@ -551,18 +424,15 @@ static void r600_reallocate_texture_inplace(struct r600_common_context *rctx, rtex->cb_color_info = new_tex->cb_color_info; rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode; rtex->htile_offset = new_tex->htile_offset; - rtex->tc_compatible_htile = new_tex->tc_compatible_htile; rtex->depth_cleared = new_tex->depth_cleared; rtex->stencil_cleared = new_tex->stencil_cleared; rtex->non_disp_tiling = new_tex->non_disp_tiling; - rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics; rtex->framebuffers_bound = new_tex->framebuffers_bound; if (new_bind_flag == PIPE_BIND_LINEAR) { assert(!rtex->htile_offset); assert(!rtex->cmask.size); assert(!rtex->fmask.size); - assert(!rtex->dcc_offset); assert(!rtex->is_depth); } @@ -607,18 +477,9 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, assert(rtex->surface.tile_swizzle == 0); } - /* Since shader image stores don't support DCC on VI, - * disable it for external clients that want write - * access. - */ - if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) { - if (r600_texture_disable_dcc(rctx, rtex)) - update_metadata = true; - } - if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && - (rtex->cmask.size || rtex->dcc_offset)) { - /* Eliminate fast clear (both CMASK and DCC) */ + rtex->cmask.size) { + /* Eliminate fast clear (CMASK) */ r600_eliminate_fast_color_clear(rctx, rtex); /* Disable CMASK if flush_resource isn't going @@ -638,17 +499,10 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, rscreen->ws->buffer_set_metadata(res->buf, &metadata); } - if (rscreen->chip_class >= GFX9) { - offset = rtex->surface.u.gfx9.surf_offset; - stride = rtex->surface.u.gfx9.surf_pitch * - rtex->surface.bpe; - slice_size = rtex->surface.u.gfx9.surf_slice_size; - } else { - offset = rtex->surface.u.legacy.level[0].offset; - stride = rtex->surface.u.legacy.level[0].nblk_x * - rtex->surface.bpe; - slice_size = rtex->surface.u.legacy.level[0].slice_size; - } + offset = rtex->surface.u.legacy.level[0].offset; + stride = rtex->surface.u.legacy.level[0].nblk_x * + rtex->surface.bpe; + slice_size = rtex->surface.u.legacy.level[0].slice_size; } else { /* Move a suballocated buffer into a non-suballocated allocation. */ if (rscreen->ws->buffer_is_suballocated(res->buf)) { @@ -710,8 +564,6 @@ static void r600_texture_destroy(struct pipe_screen *screen, r600_resource_reference(&rtex->cmask_buffer, NULL); } pb_reference(&resource->buf, NULL); - r600_resource_reference(&rtex->dcc_separate_buffer, NULL); - r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL); FREE(rtex); } @@ -730,25 +582,17 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen, memset(out, 0, sizeof(*out)); - if (rscreen->chip_class >= GFX9) { - out->alignment = rtex->surface.u.gfx9.fmask_alignment; - out->size = rtex->surface.u.gfx9.fmask_size; - return; - } - templ.nr_samples = 1; flags = rtex->surface.flags | RADEON_SURF_FMASK; - if (rscreen->chip_class <= CAYMAN) { - /* Use the same parameters and tile mode. */ - fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw; - fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh; - fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea; - fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split; + /* Use the same parameters and tile mode. */ + fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw; + fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh; + fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea; + fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split; - if (nr_samples <= 4) - fmask.u.legacy.bankh = 4; - } + if (nr_samples <= 4) + fmask.u.legacy.bankh = 4; switch (nr_samples) { case 2: @@ -834,76 +678,15 @@ void r600_texture_get_cmask_info(struct r600_common_screen *rscreen, align(slice_bytes, base_align); } -static void si_texture_get_cmask_info(struct r600_common_screen *rscreen, - struct r600_texture *rtex, - struct r600_cmask_info *out) -{ - unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes; - unsigned num_pipes = rscreen->info.num_tile_pipes; - unsigned cl_width, cl_height; - - if (rscreen->chip_class >= GFX9) { - out->alignment = rtex->surface.u.gfx9.cmask_alignment; - out->size = rtex->surface.u.gfx9.cmask_size; - return; - } - - switch (num_pipes) { - case 2: - cl_width = 32; - cl_height = 16; - break; - case 4: - cl_width = 32; - cl_height = 32; - break; - case 8: - cl_width = 64; - cl_height = 32; - break; - case 16: /* Hawaii */ - cl_width = 64; - cl_height = 64; - break; - default: - assert(0); - return; - } - - unsigned base_align = num_pipes * pipe_interleave_bytes; - - unsigned width = align(rtex->resource.b.b.width0, cl_width*8); - unsigned height = align(rtex->resource.b.b.height0, cl_height*8); - unsigned slice_elements = (width * height) / (8*8); - - /* Each element of CMASK is a nibble. */ - unsigned slice_bytes = slice_elements / 2; - - out->slice_tile_max = (width * height) / (128*128); - if (out->slice_tile_max) - out->slice_tile_max -= 1; - - out->alignment = MAX2(256, base_align); - out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) * - align(slice_bytes, base_align); -} - static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen, struct r600_texture *rtex) { - if (rscreen->chip_class >= SI) { - si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); - } else { - r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); - } + r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment); rtex->size = rtex->cmask.offset + rtex->cmask.size; - if (rscreen->chip_class >= SI) - rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1); - else - rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); + rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); } static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen, @@ -914,11 +697,7 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen assert(rtex->cmask.size == 0); - if (rscreen->chip_class >= SI) { - si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); - } else { - r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); - } + r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask); rtex->cmask_buffer = (struct r600_resource *) r600_aligned_buffer_create(&rscreen->b, @@ -934,10 +713,7 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen /* update colorbuffer state bits */ rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8; - if (rscreen->chip_class >= SI) - rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1); - else - rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); + rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1); p_atomic_inc(&rscreen->compressed_colortex_counter); } @@ -949,8 +725,6 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align; unsigned num_pipes = rscreen->info.num_tile_pipes; - assert(rscreen->chip_class <= VI); - rtex->surface.htile_size = 0; if (rscreen->chip_class <= EVERGREEN && @@ -963,22 +737,6 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, rtex->resource.b.b.height0 > 7680)) return; - /* HTILE is broken with 1D tiling on old kernels and CIK. */ - if (rscreen->chip_class >= CIK && - rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38) - return; - - /* Overalign HTILE on P2 configs to work around GPU hangs in - * piglit/depthstencil-render-miplevels 585. - * - * This has been confirmed to help Kabini & Stoney, where the hangs - * are always reproducible. I think I have seen the test hang - * on Carrizo too, though it was very rare there. - */ - if (rscreen->chip_class >= CIK && num_pipes < 4) - num_pipes = 4; - switch (num_pipes) { case 1: cl_width = 32; @@ -1023,8 +781,7 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen, static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, struct r600_texture *rtex) { - if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile) - r600_texture_get_htile_size(rscreen, rtex); + r600_texture_get_htile_size(rscreen, rtex); if (!rtex->surface.htile_size) return; @@ -1049,64 +806,6 @@ void r600_print_texture_info(struct r600_common_screen *rscreen, rtex->surface.bpe, rtex->resource.b.b.nr_samples, rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format)); - if (rscreen->chip_class >= GFX9) { - u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", " - "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n", - rtex->surface.surf_size, - rtex->surface.u.gfx9.surf_slice_size, - rtex->surface.surf_alignment, - rtex->surface.u.gfx9.surf.swizzle_mode, - rtex->surface.u.gfx9.surf.epitch, - rtex->surface.u.gfx9.surf_pitch); - - if (rtex->fmask.size) { - u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, swmode=%u, epitch=%u\n", - rtex->fmask.offset, - rtex->surface.u.gfx9.fmask_size, - rtex->surface.u.gfx9.fmask_alignment, - rtex->surface.u.gfx9.fmask.swizzle_mode, - rtex->surface.u.gfx9.fmask.epitch); - } - - if (rtex->cmask.size) { - u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n", - rtex->cmask.offset, - rtex->surface.u.gfx9.cmask_size, - rtex->surface.u.gfx9.cmask_alignment, - rtex->surface.u.gfx9.cmask.rb_aligned, - rtex->surface.u.gfx9.cmask.pipe_aligned); - } - - if (rtex->htile_offset) { - u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, " - "rb_aligned=%u, pipe_aligned=%u\n", - rtex->htile_offset, - rtex->surface.htile_size, - rtex->surface.htile_alignment, - rtex->surface.u.gfx9.htile.rb_aligned, - rtex->surface.u.gfx9.htile.pipe_aligned); - } - - if (rtex->dcc_offset) { - u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", - rtex->dcc_offset, rtex->surface.dcc_size, - rtex->surface.dcc_alignment, - rtex->surface.u.gfx9.dcc_pitch_max, - rtex->surface.num_dcc_levels); - } - - if (rtex->surface.u.gfx9.stencil_offset) { - u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n", - rtex->surface.u.gfx9.stencil_offset, - rtex->surface.u.gfx9.stencil.swizzle_mode, - rtex->surface.u.gfx9.stencil.epitch); - } - return; - } - u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, " "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw, @@ -1129,22 +828,9 @@ void r600_print_texture_info(struct r600_common_screen *rscreen, if (rtex->htile_offset) u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", " - "alignment=%u, TC_compatible = %u\n", - rtex->htile_offset, rtex->surface.htile_size, - rtex->surface.htile_alignment, - rtex->tc_compatible_htile); - - if (rtex->dcc_offset) { - u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n", - rtex->dcc_offset, rtex->surface.dcc_size, - rtex->surface.dcc_alignment); - for (i = 0; i <= rtex->resource.b.b.last_level; i++) - u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", " - "fast_clear_size=%"PRIu64"\n", - i, i < rtex->surface.num_dcc_levels, - rtex->surface.u.legacy.level[i].dcc_offset, - rtex->surface.u.legacy.level[i].dcc_fast_clear_size); - } + "alignment=%u\n", + rtex->htile_offset, rtex->surface.htile_size, + rtex->surface.htile_alignment); for (i = 0; i <= rtex->resource.b.b.last_level; i++) u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", " @@ -1208,23 +894,7 @@ r600_texture_create_object(struct pipe_screen *screen, rtex->surface = *surface; rtex->size = rtex->surface.surf_size; - - rtex->tc_compatible_htile = rtex->surface.htile_size != 0 && - (rtex->surface.flags & - RADEON_SURF_TC_COMPATIBLE_HTILE); - - /* TC-compatible HTILE: - * - VI only supports Z32_FLOAT. - * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */ - if (rtex->tc_compatible_htile) { - if (rscreen->chip_class >= GFX9 && - base->format == PIPE_FORMAT_Z16_UNORM) - rtex->db_render_format = base->format; - else - rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT; - } else { - rtex->db_render_format = base->format; - } + rtex->db_render_format = base->format; /* Tiled depth textures utilize the non-displayable tile order. * This must be done after r600_setup_surface. @@ -1233,23 +903,12 @@ r600_texture_create_object(struct pipe_screen *screen, /* Applies to GCN. */ rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode; - /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers - * between frames, so the only thing that can enable separate DCC - * with DRI2 is multiple slow clears within a frame. - */ - rtex->ps_draw_ratio = 0; - if (rtex->is_depth) { if (base->flags & (R600_RESOURCE_FLAG_TRANSFER | R600_RESOURCE_FLAG_FLUSHED_DEPTH) || rscreen->chip_class >= EVERGREEN) { - if (rscreen->chip_class >= GFX9) { - rtex->can_sample_z = true; - rtex->can_sample_s = true; - } else { - rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted; - rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted; - } + rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted; + rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted; } else { if (rtex->resource.b.b.nr_samples <= 1 && (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM || @@ -1276,18 +935,6 @@ r600_texture_create_object(struct pipe_screen *screen, return NULL; } } - - /* Shared textures must always set up DCC here. - * If it's not present, it will be disabled by - * apply_opaque_metadata later. - */ - if (rtex->surface.dcc_size && - (buf || !(rscreen->debug_flags & DBG_NO_DCC)) && - !(rtex->surface.flags & RADEON_SURF_SCANOUT)) { - /* Reserve space for the DCC buffer. */ - rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment); - rtex->size = rtex->dcc_offset + rtex->surface.dcc_size; - } } /* Now create the backing buffer. */ @@ -1324,23 +971,12 @@ r600_texture_create_object(struct pipe_screen *screen, if (rtex->htile_offset) { uint32_t clear_value = 0; - if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) - clear_value = 0x0000030F; - r600_screen_clear_buffer(rscreen, &rtex->resource.b.b, rtex->htile_offset, rtex->surface.htile_size, clear_value); } - /* Initialize DCC only if the texture is not being imported. */ - if (!buf && rtex->dcc_offset) { - r600_screen_clear_buffer(rscreen, &rtex->resource.b.b, - rtex->dcc_offset, - rtex->surface.dcc_size, - 0xFFFFFFFF); - } - /* Initialize the CMASK base register value. */ rtex->cmask.base_address_reg = (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; @@ -1383,14 +1019,6 @@ r600_choose_tiling(struct r600_common_screen *rscreen, if (templ->flags & R600_RESOURCE_FLAG_TRANSFER) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI, - * which requires 2D tiling. - */ - if (rscreen->chip_class == VI && - is_depth_stencil && - (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY)) - return RADEON_SURF_MODE_2D; - /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */ if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN && (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) && @@ -1411,12 +1039,6 @@ r600_choose_tiling(struct r600_common_screen *rscreen, if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) return RADEON_SURF_MODE_LINEAR_ALIGNED; - /* Cursors are linear on SI. - * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */ - if (rscreen->chip_class >= SI && - (templ->bind & PIPE_BIND_CURSOR)) - return RADEON_SURF_MODE_LINEAR_ALIGNED; - if (templ->bind & PIPE_BIND_LINEAR) return RADEON_SURF_MODE_LINEAR_ALIGNED; @@ -1449,20 +1071,11 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen, struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct radeon_surf surface = {0}; bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH; - bool tc_compatible_htile = - rscreen->chip_class >= VI && - (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) && - !(rscreen->debug_flags & DBG_NO_HYPERZ) && - !is_flushed_depth && - templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */ - util_format_is_depth_or_stencil(templ->format); - int r; r = r600_init_surface(rscreen, &surface, templ, r600_choose_tiling(rscreen, templ), 0, 0, - false, false, is_flushed_depth, - tc_compatible_htile); + false, false, is_flushed_depth); if (r) { return NULL; } @@ -1500,7 +1113,7 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen &array_mode, &is_scanout); r = r600_init_surface(rscreen, &surface, templ, array_mode, stride, - offset, true, is_scanout, false, false); + offset, true, is_scanout, false); if (r) { return NULL; } @@ -2124,114 +1737,6 @@ static void evergreen_set_clear_color(struct r600_texture *rtex, memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); } -/* Set the same micro tile mode as the destination of the last MSAA resolve. - * This allows hitting the MSAA resolve fast path, which requires that both - * src and dst micro tile modes match. - */ -static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen, - struct r600_texture *rtex) -{ - if (rtex->resource.b.is_shared || - rtex->resource.b.b.nr_samples <= 1 || - rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode) - return; - - assert(rscreen->chip_class >= GFX9 || - rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); - assert(rtex->resource.b.b.last_level == 0); - - if (rscreen->chip_class >= GFX9) { - /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */ - assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4); - - /* If you do swizzle_mode % 4, you'll get: - * 0 = Depth - * 1 = Standard, - * 2 = Displayable - * 3 = Rotated - * - * Depth-sample order isn't allowed: - */ - assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0); - - switch (rtex->last_msaa_resolve_target_micro_mode) { - case RADEON_MICRO_MODE_DISPLAY: - rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; - rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */ - break; - case RADEON_MICRO_MODE_THIN: - rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; - rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */ - break; - case RADEON_MICRO_MODE_ROTATED: - rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3; - rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */ - break; - default: /* depth */ - assert(!"unexpected micro mode"); - return; - } - } else if (rscreen->chip_class >= CIK) { - /* These magic numbers were copied from addrlib. It doesn't use - * any definitions for them either. They are all 2D_TILED_THIN1 - * modes with different bpp and micro tile mode. - */ - switch (rtex->last_msaa_resolve_target_micro_mode) { - case RADEON_MICRO_MODE_DISPLAY: - rtex->surface.u.legacy.tiling_index[0] = 10; - break; - case RADEON_MICRO_MODE_THIN: - rtex->surface.u.legacy.tiling_index[0] = 14; - break; - case RADEON_MICRO_MODE_ROTATED: - rtex->surface.u.legacy.tiling_index[0] = 28; - break; - default: /* depth, thick */ - assert(!"unexpected micro mode"); - return; - } - } else { /* SI */ - switch (rtex->last_msaa_resolve_target_micro_mode) { - case RADEON_MICRO_MODE_DISPLAY: - switch (rtex->surface.bpe) { - case 1: - rtex->surface.u.legacy.tiling_index[0] = 10; - break; - case 2: - rtex->surface.u.legacy.tiling_index[0] = 11; - break; - default: /* 4, 8 */ - rtex->surface.u.legacy.tiling_index[0] = 12; - break; - } - break; - case RADEON_MICRO_MODE_THIN: - switch (rtex->surface.bpe) { - case 1: - rtex->surface.u.legacy.tiling_index[0] = 14; - break; - case 2: - rtex->surface.u.legacy.tiling_index[0] = 15; - break; - case 4: - rtex->surface.u.legacy.tiling_index[0] = 16; - break; - default: /* 8, 16 */ - rtex->surface.u.legacy.tiling_index[0] = 17; - break; - } - break; - default: /* depth, thick */ - assert(!"unexpected micro mode"); - return; - } - } - - rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode; - - p_atomic_inc(&rscreen->dirty_tex_counter); -} - void evergreen_do_fast_color_clear(struct r600_common_context *rctx, struct pipe_framebuffer_state *fb, struct r600_atom *fb_state, @@ -2285,24 +1790,12 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) continue; - /* fast color clear with 1D tiling doesn't work on old kernels and CIK */ - if (rctx->chip_class == CIK && - tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D && - rctx->screen->info.drm_major == 2 && - rctx->screen->info.drm_minor < 38) { - continue; - } - { /* 128-bit formats are unusupported */ if (tex->surface.bpe > 8) { continue; } - /* RB+ doesn't work with CMASK fast clear on Stoney. */ - if (rctx->family == CHIP_STONEY) - continue; - /* ensure CMASK is enabled */ r600_texture_alloc_cmask_separate(rctx->screen, tex); if (tex->cmask.size == 0) { @@ -2322,10 +1815,6 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, p_atomic_inc(&rctx->screen->compressed_colortex_counter); } - /* We can change the micro tile mode before a full clear. */ - if (rctx->screen->chip_class >= SI) - si_set_optimal_micro_tile_mode(rctx->screen, tex); - evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); if (dirty_cbufs) @@ -2425,7 +1914,7 @@ r600_texture_from_memobj(struct pipe_screen *screen, r = r600_init_surface(rscreen, &surface, templ, array_mode, memobj->stride, offset, true, is_scanout, - false, false); + false); if (r) return NULL; diff --git a/src/gallium/drivers/r600/r600_uvd.c b/src/gallium/drivers/r600/r600_uvd.c index 28551e2daad..ca4248aed69 100644 --- a/src/gallium/drivers/r600/r600_uvd.c +++ b/src/gallium/drivers/r600/r600_uvd.c @@ -162,7 +162,7 @@ static struct pb_buffer* r600_uvd_set_dtb(struct ruvd_msg *msg, struct vl_video_ msg->body.decode.dt_field_mode = buf->base.interlaced; msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks)); - ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface, RUVD_SURFACE_TYPE_LEGACY); + ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface); return luma->resource.buf; } diff --git a/src/gallium/drivers/r600/radeon_uvd.c b/src/gallium/drivers/r600/radeon_uvd.c index fabc73e3834..b0551d7e1e3 100644 --- a/src/gallium/drivers/r600/radeon_uvd.c +++ b/src/gallium/drivers/r600/radeon_uvd.c @@ -205,8 +205,7 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) { switch (u_reduce_video_profile(dec->base.profile)) { case PIPE_VIDEO_FORMAT_MPEG4_AVC: - return (family >= CHIP_TONGA) ? - RUVD_CODEC_H264_PERF : RUVD_CODEC_H264; + return RUVD_CODEC_H264; case PIPE_VIDEO_FORMAT_VC1: return RUVD_CODEC_VC1; @@ -229,60 +228,6 @@ static uint32_t profile2stream_type(struct ruvd_decoder *dec, unsigned family) } } -static unsigned calc_ctx_size_h264_perf(struct ruvd_decoder *dec) -{ - unsigned width_in_mb, height_in_mb, ctx_size; - unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); - unsigned height = align(dec->base.height, VL_MACROBLOCK_HEIGHT); - - unsigned max_references = dec->base.max_references + 1; - - // picture width & height in 16 pixel units - width_in_mb = width / VL_MACROBLOCK_WIDTH; - height_in_mb = align(height / VL_MACROBLOCK_HEIGHT, 2); - - if (!dec->use_legacy) { - unsigned fs_in_mb = width_in_mb * height_in_mb; - unsigned num_dpb_buffer; - switch(dec->base.level) { - case 30: - num_dpb_buffer = 8100 / fs_in_mb; - break; - case 31: - num_dpb_buffer = 18000 / fs_in_mb; - break; - case 32: - num_dpb_buffer = 20480 / fs_in_mb; - break; - case 41: - num_dpb_buffer = 32768 / fs_in_mb; - break; - case 42: - num_dpb_buffer = 34816 / fs_in_mb; - break; - case 50: - num_dpb_buffer = 110400 / fs_in_mb; - break; - case 51: - num_dpb_buffer = 184320 / fs_in_mb; - break; - default: - num_dpb_buffer = 184320 / fs_in_mb; - break; - } - num_dpb_buffer++; - max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); - ctx_size = max_references * align(width_in_mb * height_in_mb * 192, 256); - } else { - // the firmware seems to always assume a minimum of ref frames - max_references = MAX2(NUM_H264_REFS, max_references); - // macroblock context buffer - ctx_size = align(width_in_mb * height_in_mb * max_references * 192, 256); - } - - return ctx_size; -} - static unsigned calc_ctx_size_h265_main(struct ruvd_decoder *dec) { unsigned width = align(dec->base.width, VL_MACROBLOCK_WIDTH); @@ -335,10 +280,7 @@ static unsigned calc_ctx_size_h265_main10(struct ruvd_decoder *dec, struct pipe_ static unsigned get_db_pitch_alignment(struct ruvd_decoder *dec) { - if (((struct r600_common_screen*)dec->screen)->family < CHIP_VEGA10) - return 16; - else - return 32; + return 16; } /* calculate size of reference picture buffer */ @@ -399,8 +341,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) num_dpb_buffer++; max_references = MAX2(MIN2(NUM_H264_REFS, num_dpb_buffer), max_references); dpb_size = image_size * max_references; - if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + if ((dec->stream_type != RUVD_CODEC_H264_PERF)) { dpb_size += max_references * align(width_in_mb * height_in_mb * 192, alignment); dpb_size += align(width_in_mb * height_in_mb * 32, alignment); } @@ -409,8 +350,7 @@ static unsigned calc_dpb_size(struct ruvd_decoder *dec) max_references = MAX2(NUM_H264_REFS, max_references); // reference picture buffer dpb_size = image_size * max_references; - if ((dec->stream_type != RUVD_CODEC_H264_PERF) || - (((struct r600_common_screen*)dec->screen)->family < CHIP_POLARIS10)) { + if ((dec->stream_type != RUVD_CODEC_H264_PERF)) { // macroblock context buffer dpb_size += width_in_mb * height_in_mb * max_references * 192; // IT surface buffer @@ -610,8 +550,6 @@ static struct ruvd_h265 get_h265_msg(struct ruvd_decoder *dec, struct pipe_video result.sps_info_flags |= pic->pps->sps->sps_temporal_mvp_enabled_flag << 6; result.sps_info_flags |= pic->pps->sps->strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= pic->pps->sps->separate_colour_plane_flag << 8; - if (((struct r600_common_screen*)dec->screen)->family == CHIP_CARRIZO) - result.sps_info_flags |= 1 << 9; if (pic->UseRefPicList == true) result.sps_info_flags |= 1 << 10; @@ -1250,13 +1188,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder, dec->msg->body.decode.bsd_size = bs_size; dec->msg->body.decode.db_pitch = align(dec->base.width, get_db_pitch_alignment(dec)); - if (dec->stream_type == RUVD_CODEC_H264_PERF && - ((struct r600_common_screen*)dec->screen)->family >= CHIP_POLARIS10) - dec->msg->body.decode.dpb_reserved = dec->ctx.res->buf->size; - dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target); - if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY) - dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2; switch (u_reduce_video_profile(picture->profile)) { case PIPE_VIDEO_FORMAT_MPEG4_AVC: @@ -1407,8 +1339,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, goto error; } - dec->fb_size = (info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : - FB_BUFFER_SIZE; + dec->fb_size = FB_BUFFER_SIZE; bs_buf_size = width * height * (512 / (16 * 16)); for (i = 0; i < NUM_BUFFERS; ++i) { unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size; @@ -1440,36 +1371,10 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, rvid_clear_buffer(context, &dec->dpb); } - if (dec->stream_type == RUVD_CODEC_H264_PERF && info.family >= CHIP_POLARIS10) { - unsigned ctx_size = calc_ctx_size_h264_perf(dec); - if (!rvid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated context buffer.\n"); - goto error; - } - rvid_clear_buffer(context, &dec->ctx); - } - - if (info.family >= CHIP_POLARIS10 && info.drm_minor >= 3) { - if (!rvid_create_buffer(dec->screen, &dec->sessionctx, - UVD_SESSION_CONTEXT_SIZE, - PIPE_USAGE_DEFAULT)) { - RVID_ERR("Can't allocated session ctx.\n"); - goto error; - } - rvid_clear_buffer(context, &dec->sessionctx); - } - - if (info.family >= CHIP_VEGA10) { - dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15; - dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15; - dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15; - dec->reg.cntl = RUVD_ENGINE_CNTL_SOC15; - } else { - dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; - dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; - dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; - dec->reg.cntl = RUVD_ENGINE_CNTL; - } + dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0; + dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1; + dec->reg.cmd = RUVD_GPCOM_VCPU_CMD; + dec->reg.cntl = RUVD_ENGINE_CNTL; map_msg_fb_it_buf(dec); dec->msg->size = sizeof(*dec->msg); @@ -1506,20 +1411,10 @@ error: } /* calculate top/bottom offset */ -static unsigned texture_offset(struct radeon_surf *surface, unsigned layer, - enum ruvd_surface_type type) +static unsigned texture_offset(struct radeon_surf *surface, unsigned layer) { - switch (type) { - default: - case RUVD_SURFACE_TYPE_LEGACY: - return surface->u.legacy.level[0].offset + - layer * surface->u.legacy.level[0].slice_size; - break; - case RUVD_SURFACE_TYPE_GFX9: - return surface->u.gfx9.surf_offset + - layer * surface->u.gfx9.surf_slice_size; - break; - } + return surface->u.legacy.level[0].offset + + layer * surface->u.legacy.level[0].slice_size; } /* hw encode the aspect of macro tiles */ @@ -1552,67 +1447,46 @@ static unsigned bank_wh(unsigned bankwh) * fill decoding target field from the luma and chroma surfaces */ void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, - struct radeon_surf *chroma, enum ruvd_surface_type type) + struct radeon_surf *chroma) { - switch (type) { + msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; + switch (luma->u.legacy.level[0].mode) { + case RADEON_SURF_MODE_LINEAR_ALIGNED: + msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; + break; + case RADEON_SURF_MODE_1D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; + break; + case RADEON_SURF_MODE_2D: + msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; + msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; + break; default: - case RUVD_SURFACE_TYPE_LEGACY: - msg->body.decode.dt_pitch = luma->u.legacy.level[0].nblk_x * luma->blk_w; - switch (luma->u.legacy.level[0].mode) { - case RADEON_SURF_MODE_LINEAR_ALIGNED: - msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; - break; - case RADEON_SURF_MODE_1D: - msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_1D_THIN; - break; - case RADEON_SURF_MODE_2D: - msg->body.decode.dt_tiling_mode = RUVD_TILE_8X8; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_2D_THIN; - break; - default: - assert(0); - break; - } + assert(0); + break; + } - msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); + msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0); + if (chroma) + msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0); + if (msg->body.decode.dt_field_mode) { + msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1); if (chroma) - msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); - if (msg->body.decode.dt_field_mode) { - msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); - if (chroma) - msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); - } else { - msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; - msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; - } - - if (chroma) { - assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); - assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); - assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); - } + msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1); + } else { + msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; + msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; + } - msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); - msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); - msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); - break; - case RUVD_SURFACE_TYPE_GFX9: - msg->body.decode.dt_pitch = luma->u.gfx9.surf_pitch * luma->blk_w; - /* SWIZZLE LINEAR MODE */ - msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; - msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; - msg->body.decode.dt_luma_top_offset = texture_offset(luma, 0, type); - msg->body.decode.dt_chroma_top_offset = texture_offset(chroma, 0, type); - if (msg->body.decode.dt_field_mode) { - msg->body.decode.dt_luma_bottom_offset = texture_offset(luma, 1, type); - msg->body.decode.dt_chroma_bottom_offset = texture_offset(chroma, 1, type); - } else { - msg->body.decode.dt_luma_bottom_offset = msg->body.decode.dt_luma_top_offset; - msg->body.decode.dt_chroma_bottom_offset = msg->body.decode.dt_chroma_top_offset; - } - msg->body.decode.dt_surf_tile_config = 0; - break; + if (chroma) { + assert(luma->u.legacy.bankw == chroma->u.legacy.bankw); + assert(luma->u.legacy.bankh == chroma->u.legacy.bankh); + assert(luma->u.legacy.mtilea == chroma->u.legacy.mtilea); } + + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_WIDTH(bank_wh(luma->u.legacy.bankw)); + msg->body.decode.dt_surf_tile_config |= RUVD_BANK_HEIGHT(bank_wh(luma->u.legacy.bankh)); + msg->body.decode.dt_surf_tile_config |= RUVD_MACRO_TILE_ASPECT_RATIO(macro_tile_aspect(luma->u.legacy.mtilea)); } diff --git a/src/gallium/drivers/r600/radeon_uvd.h b/src/gallium/drivers/r600/radeon_uvd.h index a927c843dac..c371b14414f 100644 --- a/src/gallium/drivers/r600/radeon_uvd.h +++ b/src/gallium/drivers/r600/radeon_uvd.h @@ -117,11 +117,6 @@ #define RUVD_VC1_PROFILE_MAIN 0x00000001 #define RUVD_VC1_PROFILE_ADVANCED 0x00000002 -enum ruvd_surface_type { - RUVD_SURFACE_TYPE_LEGACY = 0, - RUVD_SURFACE_TYPE_GFX9 -}; - struct ruvd_mvc_element { uint16_t viewOrderIndex; uint16_t viewId; @@ -443,5 +438,5 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, /* fill decoding target field from the luma and chroma surfaces */ void ruvd_set_dt_surfaces(struct ruvd_msg *msg, struct radeon_surf *luma, - struct radeon_surf *chroma, enum ruvd_surface_type type); + struct radeon_surf *chroma); #endif diff --git a/src/gallium/drivers/r600/radeon_vce.c b/src/gallium/drivers/r600/radeon_vce.c index e8b1a632dd8..16a0127f319 100644 --- a/src/gallium/drivers/r600/radeon_vce.c +++ b/src/gallium/drivers/r600/radeon_vce.c @@ -225,16 +225,10 @@ struct rvce_cpb_slot *l1_slot(struct rvce_encoder *enc) void rvce_frame_offset(struct rvce_encoder *enc, struct rvce_cpb_slot *slot, signed *luma_offset, signed *chroma_offset) { - struct r600_common_screen *rscreen = (struct r600_common_screen *)enc->screen; unsigned pitch, vpitch, fsize; - if (rscreen->chip_class < GFX9) { - pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); - vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); - } else { - pitch = align(enc->luma->u.gfx9.surf_pitch * enc->luma->bpe, 256); - vpitch = align(enc->luma->u.gfx9.surf_height, 16); - } + pitch = align(enc->luma->u.legacy.level[0].nblk_x * enc->luma->bpe, 128); + vpitch = align(enc->luma->u.legacy.level[0].nblk_y, 16); fsize = pitch * (vpitch + vpitch / 2); *luma_offset = slot->index * fsize; @@ -420,16 +414,6 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, if ((rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42) || rscreen->info.drm_major == 3) enc->use_vui = true; - if (rscreen->info.family >= CHIP_TONGA && - rscreen->info.family != CHIP_STONEY && - rscreen->info.family != CHIP_POLARIS11 && - rscreen->info.family != CHIP_POLARIS12) - enc->dual_pipe = true; - /* TODO enable B frame with dual instance */ - if ((rscreen->info.family >= CHIP_TONGA) && - (templ->max_references == 1) && - (rscreen->info.vce_harvest_config == 0)) - enc->dual_inst = true; enc->base = *templ; enc->base.context = context; @@ -466,12 +450,8 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, get_buffer(((struct vl_video_buffer *)tmp_buf)->resources[0], NULL, &tmp_surf); - cpb_size = (rscreen->chip_class < GFX9) ? - align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * - align(tmp_surf->u.legacy.level[0].nblk_y, 32) : - - align(tmp_surf->u.gfx9.surf_pitch * tmp_surf->bpe, 256) * - align(tmp_surf->u.gfx9.surf_height, 32); + cpb_size = align(tmp_surf->u.legacy.level[0].nblk_x * tmp_surf->bpe, 128) * + align(tmp_surf->u.legacy.level[0].nblk_y, 32); cpb_size = cpb_size * 3 / 2; cpb_size = cpb_size * enc->cpb_num; diff --git a/src/gallium/drivers/r600/radeon_video.c b/src/gallium/drivers/r600/radeon_video.c index 99b6676fee1..c7acc3d6e22 100644 --- a/src/gallium/drivers/r600/radeon_video.c +++ b/src/gallium/drivers/r600/radeon_video.c @@ -156,13 +156,11 @@ void rvid_join_surfaces(struct r600_common_context *rctx, if (!surfaces[i]) continue; - if (rctx->chip_class < GFX9) { - /* choose the smallest bank w/h for now */ - wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh; - if (wh < best_wh) { - best_wh = wh; - best_tiling = i; - } + /* choose the smallest bank w/h for now */ + wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh; + if (wh < best_wh) { + best_wh = wh; + best_tiling = i; } } @@ -173,17 +171,14 @@ void rvid_join_surfaces(struct r600_common_context *rctx, /* adjust the texture layer offsets */ off = align(off, surfaces[i]->surf_alignment); - if (rctx->chip_class < GFX9) { - /* copy the tiling parameters */ - surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw; - surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh; - surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea; - surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split; + /* copy the tiling parameters */ + surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw; + surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh; + surfaces[i]->u.legacy.mtilea = surfaces[best_tiling]->u.legacy.mtilea; + surfaces[i]->u.legacy.tile_split = surfaces[best_tiling]->u.legacy.tile_split; - for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j) - surfaces[i]->u.legacy.level[j].offset += off; - } else - surfaces[i]->u.gfx9.surf_offset += off; + for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j) + surfaces[i]->u.legacy.level[j].offset += off; off += surfaces[i]->surf_size; } @@ -237,9 +232,9 @@ int rvid_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; case PIPE_VIDEO_CAP_MAX_WIDTH: - return (rscreen->family < CHIP_TONGA) ? 2048 : 4096; + return 2048; case PIPE_VIDEO_CAP_MAX_HEIGHT: - return (rscreen->family < CHIP_TONGA) ? 1152 : 2304; + return 1152; case PIPE_VIDEO_CAP_PREFERED_FORMAT: return PIPE_FORMAT_NV12; case PIPE_VIDEO_CAP_PREFERS_INTERLACED: @@ -249,7 +244,7 @@ int rvid_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: return true; case PIPE_VIDEO_CAP_STACKED_FRAMES: - return (rscreen->family < CHIP_TONGA) ? 1 : 2; + return 1; default: return 0; } @@ -264,40 +259,22 @@ int rvid_get_video_param(struct pipe_screen *screen, /* no support for MPEG4 on older hw */ return rscreen->family >= CHIP_PALM; case PIPE_VIDEO_FORMAT_MPEG4_AVC: - if ((rscreen->family == CHIP_POLARIS10 || - rscreen->family == CHIP_POLARIS11) && - info.uvd_fw_version < UVD_FW_1_66_16 ) { - RVID_ERR("POLARIS10/11 firmware version need to be updated.\n"); - return false; - } return true; case PIPE_VIDEO_FORMAT_VC1: return true; case PIPE_VIDEO_FORMAT_HEVC: - /* Carrizo only supports HEVC Main */ - if (rscreen->family >= CHIP_STONEY) - return (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN || - profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10); - else if (rscreen->family >= CHIP_CARRIZO) - return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN; return false; case PIPE_VIDEO_FORMAT_JPEG: - if (rscreen->family < CHIP_CARRIZO || rscreen->family >= CHIP_VEGA10) - return false; - if (!(rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 19)) { - RVID_ERR("No MJPEG support for the kernel version\n"); - return false; - } - return true; + return false; default: return false; } case PIPE_VIDEO_CAP_NPOT_TEXTURES: return 1; case PIPE_VIDEO_CAP_MAX_WIDTH: - return (rscreen->family < CHIP_TONGA) ? 2048 : 4096; + return 2048; case PIPE_VIDEO_CAP_MAX_HEIGHT: - return (rscreen->family < CHIP_TONGA) ? 1152 : 4096; + return 1152; case PIPE_VIDEO_CAP_PREFERED_FORMAT: if (profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10) return PIPE_FORMAT_P016; @@ -342,7 +319,7 @@ int rvid_get_video_param(struct pipe_screen *screen, case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE: case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH: - return (rscreen->family < CHIP_TONGA) ? 41 : 52; + return 41; case PIPE_VIDEO_PROFILE_HEVC_MAIN: case PIPE_VIDEO_PROFILE_HEVC_MAIN_10: return 186; -- 2.30.2