X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_pipe.c;h=7123e7d46d5e0fe908b648ba9fc9d71968c72715;hb=ce785f5ffd7dbed14a3909164e55a975a023ee97;hp=0d6d93a73586ecfb08652a34ccba4f63b6fdf43f;hpb=473be16c745d0d78a0be71828d05aeb5d8a7cb76;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 0d6d93a7358..7123e7d46d5 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -41,12 +41,6 @@ #include "vl/vl_decoder.h" #include "driver_ddebug/dd_util.h" -#include -#include -#if HAVE_LLVM >= 0x0700 -#include -#endif - static const struct debug_named_value debug_options[] = { /* Shader logging options: */ { "vs", DBG(VS), "Print vertex shaders" }, @@ -63,6 +57,7 @@ static const struct debug_named_value debug_options[] = { /* Shader compiler options the shader cache should be aware of: */ { "unsafemath", DBG(UNSAFE_MATH), "Enable unsafe math shader optimizations" }, { "sisched", DBG(SI_SCHED), "Enable LLVM SI Machine Instruction Scheduler." }, + { "gisel", DBG(GISEL), "Enable LLVM global instruction selector." }, /* Shader compiler options (with no effect on the shader cache): */ { "checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR" }, @@ -106,65 +101,44 @@ static const struct debug_named_value debug_options[] = { { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." }, { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." }, { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." }, + { "testdmaperf", DBG(TEST_DMA_PERF), "Test DMA performance" }, + { "testgds", DBG(TEST_GDS), "Test GDS." }, + { "testgdsmm", DBG(TEST_GDS_MM), "Test GDS memory management." }, + { "testgdsoamm", DBG(TEST_GDS_OA_MM), "Test GDS OA memory management." }, DEBUG_NAMED_VALUE_END /* must be last */ }; static void si_init_compiler(struct si_screen *sscreen, - struct si_compiler *compiler) + struct ac_llvm_compiler *compiler) { + /* Only create the less-optimizing version of the compiler on APUs + * predating Ryzen (Raven). */ + bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram && + sscreen->info.chip_class <= VI; + enum ac_target_machine_options tm_options = (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) | + (sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) | (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) | (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) | - (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0); + (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) | + (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) | + (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0); - const char *triple; ac_init_llvm_once(); - compiler->tm = ac_create_target_machine(sscreen->info.family, - tm_options, &triple); - if (!compiler->tm) - return; - - compiler->target_library_info = - gallivm_create_target_library_info(triple); - if (!compiler->target_library_info) - return; + ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options); + compiler->passes = ac_create_llvm_passes(compiler->tm); - compiler->passmgr = LLVMCreatePassManager(); - if (!compiler->passmgr) - return; - - LLVMAddTargetLibraryInfo(compiler->target_library_info, - compiler->passmgr); - - /* Add LLVM passes into the pass manager. */ - if (sscreen->debug_flags & DBG(CHECK_IR)) - LLVMAddVerifierPass(compiler->passmgr); - - LLVMAddAlwaysInlinerPass(compiler->passmgr); - /* This pass should eliminate all the load and store instructions. */ - LLVMAddPromoteMemoryToRegisterPass(compiler->passmgr); - LLVMAddScalarReplAggregatesPass(compiler->passmgr); - LLVMAddLICMPass(compiler->passmgr); - LLVMAddAggressiveDCEPass(compiler->passmgr); - LLVMAddCFGSimplificationPass(compiler->passmgr); - /* This is recommended by the instruction combining pass. */ - LLVMAddEarlyCSEMemSSAPass(compiler->passmgr); - LLVMAddInstructionCombiningPass(compiler->passmgr); + if (compiler->low_opt_tm) + compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm); } -static void si_destroy_compiler(struct si_compiler *compiler) +static void si_destroy_compiler(struct ac_llvm_compiler *compiler) { - if (compiler->passmgr) - LLVMDisposePassManager(compiler->passmgr); -#if HAVE_LLVM >= 0x0700 - /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */ - if (compiler->target_library_info) - gallivm_dispose_target_library_info(compiler->target_library_info); -#endif - if (compiler->tm) - LLVMDisposeTargetMachine(compiler->tm); + ac_destroy_llvm_passes(compiler->passes); + ac_destroy_llvm_passes(compiler->low_opt_passes); + ac_destroy_llvm_compiler(compiler); } /* @@ -188,6 +162,7 @@ static void si_destroy_context(struct pipe_context *context) pipe_resource_reference(&sctx->gsvs_ring, NULL); pipe_resource_reference(&sctx->tess_rings, NULL); pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); + pipe_resource_reference(&sctx->sample_pos_buffer, NULL); r600_resource_reference(&sctx->border_color_buffer, NULL); free(sctx->border_color_table); r600_resource_reference(&sctx->scratch_buffer, NULL); @@ -222,6 +197,10 @@ static void si_destroy_context(struct pipe_context *context) sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_color_layered); if (sctx->vs_blit_texcoord) sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_texcoord); + if (sctx->cs_clear_buffer) + sctx->b.delete_compute_state(&sctx->b, sctx->cs_clear_buffer); + if (sctx->cs_copy_buffer) + sctx->b.delete_compute_state(&sctx->b, sctx->cs_copy_buffer); if (sctx->blitter) util_blitter_destroy(sctx->blitter); @@ -375,6 +354,20 @@ static void si_set_log_context(struct pipe_context *ctx, u_log_add_auto_logger(log, si_auto_log_cs, sctx); } +static void si_set_context_param(struct pipe_context *ctx, + enum pipe_context_param param, + unsigned value) +{ + struct radeon_winsys *ws = ((struct si_context *)ctx)->ws; + + switch (param) { + case PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE: + ws->pin_threads_to_L3_cache(ws, value); + break; + default:; + } +} + static struct pipe_context *si_create_context(struct pipe_screen *screen, unsigned flags) { @@ -382,6 +375,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen->ws; int shader, i; + bool stop_exec_on_failure = (flags & PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET) != 0; if (!sctx) return NULL; @@ -395,6 +389,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->b.emit_string_marker = si_emit_string_marker; sctx->b.set_debug_callback = si_set_debug_callback; sctx->b.set_log_context = si_set_log_context; + sctx->b.set_context_param = si_set_context_param; sctx->screen = sscreen; /* Easy accessing of screen/winsys. */ sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0; @@ -428,7 +423,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sctx->allocator_zeroed_memory = u_suballocator_create(&sctx->b, sscreen->info.gart_page_size, - 0, PIPE_USAGE_DEFAULT, 0, true); + 0, PIPE_USAGE_DEFAULT, + SI_RESOURCE_FLAG_SO_FILLED_SIZE, true); if (!sctx->allocator_zeroed_memory) goto fail; @@ -457,15 +453,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) { sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA, - (void*)si_flush_dma_cs, - sctx); + (void*)si_flush_dma_cs, + sctx, stop_exec_on_failure); } si_init_buffer_functions(sctx); si_init_clear_functions(sctx); si_init_blit_functions(sctx); si_init_compute_functions(sctx); - si_init_cp_dma_functions(sctx); + si_init_compute_blit_functions(sctx); si_init_debug_functions(sctx); si_init_msaa_functions(sctx); si_init_streamout_functions(sctx); @@ -479,7 +475,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX, - (void*)si_flush_gfx_cs, sctx); + (void*)si_flush_gfx_cs, sctx, stop_exec_on_failure); /* Border colors. */ sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS * @@ -505,7 +501,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, si_init_state_functions(sctx); si_init_shader_functions(sctx); si_init_viewport_functions(sctx); - si_init_ia_multi_vgt_param_table(sctx); if (sctx->chip_class >= CIK) cik_init_sdma_functions(sctx); @@ -515,12 +510,21 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, if (sscreen->debug_flags & DBG(FORCE_DMA)) sctx->b.resource_copy_region = sctx->dma_copy; + bool dst_stream_policy = SI_COMPUTE_DST_CACHE_POLICY != L2_LRU; + sctx->cs_clear_buffer = si_create_dma_compute_shader(&sctx->b, + SI_COMPUTE_CLEAR_DW_PER_THREAD, + dst_stream_policy, false); + sctx->cs_copy_buffer = si_create_dma_compute_shader(&sctx->b, + SI_COMPUTE_COPY_DW_PER_THREAD, + dst_stream_policy, true); + sctx->blitter = util_blitter_create(&sctx->b); if (sctx->blitter == NULL) goto fail; - sctx->blitter->draw_rectangle = si_draw_rectangle; sctx->blitter->skip_viewport_restore = true; + si_init_draw_functions(sctx); + sctx->sample_mask = 0xffff; if (sctx->chip_class >= GFX9) { @@ -571,11 +575,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, &sctx->null_const_buf); si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &sctx->null_const_buf); - - /* Clear the NULL constant buffer, because loads should return zeros. */ - si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, - sctx->null_const_buf.buffer->width0, 0, - SI_COHERENCY_SHADER); } uint64_t max_threads_per_block; @@ -612,8 +611,22 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL); util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL); + sctx->sample_pos_buffer = + pipe_buffer_create(sctx->b.screen, 0, PIPE_USAGE_DEFAULT, + sizeof(sctx->sample_positions)); + pipe_buffer_write(&sctx->b, sctx->sample_pos_buffer, 0, + sizeof(sctx->sample_positions), &sctx->sample_positions); + /* this must be last */ si_begin_new_gfx_cs(sctx); + + if (sctx->chip_class == CIK) { + /* Clear the NULL constant buffer, because loads should return zeros. */ + uint32_t clear_value = 0; + si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0, + sctx->null_const_buf.buffer->width0, + &clear_value, 4, SI_COHERENCY_SHADER); + } return &sctx->b; fail: fprintf(stderr, "radeonsi: Failed to create a context.\n"); @@ -712,38 +725,6 @@ static void si_init_gs_info(struct si_screen *sscreen) sscreen->info.family); } -static void si_handle_env_var_force_family(struct si_screen *sscreen) -{ - const char *family = debug_get_option("SI_FORCE_FAMILY", NULL); - unsigned i; - - if (!family) - return; - - for (i = CHIP_TAHITI; i < CHIP_LAST; i++) { - if (!strcmp(family, ac_get_llvm_processor_name(i))) { - /* Override family and chip_class. */ - sscreen->info.family = i; - - if (i >= CHIP_VEGA10) - sscreen->info.chip_class = GFX9; - else if (i >= CHIP_TONGA) - sscreen->info.chip_class = VI; - else if (i >= CHIP_BONAIRE) - sscreen->info.chip_class = CIK; - else - sscreen->info.chip_class = SI; - - /* Don't submit any IBs. */ - setenv("RADEON_NOOP", "1", 1); - return; - } - } - - fprintf(stderr, "radeonsi: Unknown family: %s\n", family); - exit(1); -} - static void si_test_vmfault(struct si_screen *sscreen) { struct pipe_context *ctx = sscreen->aux_context; @@ -759,12 +740,13 @@ static void si_test_vmfault(struct si_screen *sscreen) r600_resource(buf)->gpu_address = 0; /* cause a VM fault */ if (sscreen->debug_flags & DBG(TEST_VMFAULT_CP)) { - si_copy_buffer(sctx, buf, buf, 0, 4, 4, 0); + si_cp_dma_copy_buffer(sctx, buf, buf, 0, 4, 4, 0, + SI_COHERENCY_NONE, L2_BYPASS); ctx->flush(ctx, NULL, 0); puts("VM fault test: CP - done."); } if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) { - sctx->dma_clear_buffer(sctx, buf, 0, 4, 0); + si_sdma_clear_buffer(sctx, buf, 0, 4, 0); ctx->flush(ctx, NULL, 0); puts("VM fault test: SDMA - done."); } @@ -775,47 +757,80 @@ static void si_test_vmfault(struct si_screen *sscreen) exit(0); } +static void si_test_gds_memory_management(struct si_context *sctx, + unsigned alloc_size, unsigned alignment, + enum radeon_bo_domain domain) +{ + struct radeon_winsys *ws = sctx->ws; + struct radeon_cmdbuf *cs[8]; + struct pb_buffer *gds_bo[ARRAY_SIZE(cs)]; + + for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) { + cs[i] = ws->cs_create(sctx->ctx, RING_COMPUTE, + NULL, NULL, false); + gds_bo[i] = ws->buffer_create(ws, alloc_size, alignment, domain, 0); + assert(gds_bo[i]); + } + + for (unsigned iterations = 0; iterations < 20000; iterations++) { + for (unsigned i = 0; i < ARRAY_SIZE(cs); i++) { + /* This clears GDS with CP DMA. + * + * We don't care if GDS is present. Just add some packet + * to make the GPU busy for a moment. + */ + si_cp_dma_clear_buffer(sctx, cs[i], NULL, 0, alloc_size, 0, + SI_CPDMA_SKIP_BO_LIST_UPDATE | + SI_CPDMA_SKIP_CHECK_CS_SPACE | + SI_CPDMA_SKIP_GFX_SYNC, 0, 0); + + ws->cs_add_buffer(cs[i], gds_bo[i], domain, + RADEON_USAGE_READWRITE, 0); + ws->cs_flush(cs[i], PIPE_FLUSH_ASYNC, NULL); + } + } + exit(0); +} + static void si_disk_cache_create(struct si_screen *sscreen) { /* Don't use the cache if shader dumping is enabled. */ if (sscreen->debug_flags & DBG_ALL_SHADERS) return; - uint32_t mesa_timestamp; - if (disk_cache_get_function_timestamp(si_disk_cache_create, - &mesa_timestamp)) { - char *timestamp_str; - int res = -1; - uint32_t llvm_timestamp; - - if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, - &llvm_timestamp)) { - res = asprintf(×tamp_str, "%u_%u", - mesa_timestamp, llvm_timestamp); - } + struct mesa_sha1 ctx; + unsigned char sha1[20]; + char cache_id[20 * 2 + 1]; - if (res != -1) { - /* These flags affect shader compilation. */ - #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \ - DBG(SI_SCHED) | \ - DBG(UNSAFE_MATH) | \ - DBG(NIR)) - uint64_t shader_debug_flags = sscreen->debug_flags & - ALL_FLAGS; - - /* Add the high bits of 32-bit addresses, which affects - * how 32-bit addresses are expanded to 64 bits. - */ - STATIC_ASSERT(ALL_FLAGS <= UINT_MAX); - shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32; - - sscreen->disk_shader_cache = - disk_cache_create(si_get_family_name(sscreen), - timestamp_str, - shader_debug_flags); - free(timestamp_str); - } - } + _mesa_sha1_init(&ctx); + + if (!disk_cache_get_function_identifier(si_disk_cache_create, &ctx) || + !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, + &ctx)) + return; + + _mesa_sha1_final(&ctx, sha1); + disk_cache_format_hex_id(cache_id, sha1, 20 * 2); + + /* These flags affect shader compilation. */ + #define ALL_FLAGS (DBG(FS_CORRECT_DERIVS_AFTER_KILL) | \ + DBG(SI_SCHED) | \ + DBG(GISEL) | \ + DBG(UNSAFE_MATH) | \ + DBG(NIR)) + uint64_t shader_debug_flags = sscreen->debug_flags & + ALL_FLAGS; + + /* Add the high bits of 32-bit addresses, which affects + * how 32-bit addresses are expanded to 64 bits. + */ + STATIC_ASSERT(ALL_FLAGS <= UINT_MAX); + shader_debug_flags |= (uint64_t)sscreen->info.address32_hi << 32; + + sscreen->disk_shader_cache = + disk_cache_create(sscreen->info.name, + cache_id, + shader_debug_flags); } struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, @@ -831,6 +846,15 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->ws = ws; ws->query_info(ws, &sscreen->info); + if (sscreen->info.chip_class >= GFX9) { + sscreen->se_tile_repeat = 32 * sscreen->info.max_se; + } else { + ac_get_raster_config(&sscreen->info, + &sscreen->pa_sc_raster_config, + &sscreen->pa_sc_raster_config_1, + &sscreen->se_tile_repeat); + } + sscreen->debug_flags = debug_get_flags_option("R600_DEBUG", debug_options, 0); @@ -901,26 +925,26 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, num_comp_lo_threads = MIN2(num_comp_lo_threads, ARRAY_SIZE(sscreen->compiler_lowp)); - if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader", + if (!util_queue_init(&sscreen->shader_compiler_queue, "sh", 64, num_comp_hi_threads, - UTIL_QUEUE_INIT_RESIZE_IF_FULL)) { + UTIL_QUEUE_INIT_RESIZE_IF_FULL | + UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, - "si_shader_low", + "shlo", 64, num_comp_lo_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | + UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY | UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { si_destroy_shader_cache(sscreen); FREE(sscreen); return NULL; } - si_handle_env_var_force_family(sscreen); - if (!debug_get_bool_option("RADEON_DISABLE_PERFCOUNTERS", false)) si_init_perfcounters(sscreen); @@ -934,7 +958,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, unsigned max_offchip_buffers_per_se; /* Only certain chips can use the maximum value. */ - if (sscreen->info.family == CHIP_VEGA12) + if (sscreen->info.family == CHIP_VEGA12 || + sscreen->info.family == CHIP_VEGA20) max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; else max_offchip_buffers_per_se = double_offchip_buffers ? 127 : 63; @@ -972,8 +997,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, } /* The mere presense of CLEAR_STATE in the IB causes random GPU hangs - * on SI. */ - sscreen->has_clear_state = sscreen->info.chip_class >= CIK; + * on SI. Some CLEAR_STATE cause asic hang on radeon kernel, etc. + * SPI_VS_OUT_CONFIG. So only enable CI CLEAR_STATE on amdgpu kernel.*/ + sscreen->has_clear_state = sscreen->info.chip_class >= CIK && + sscreen->info.drm_major == 3; sscreen->has_distributed_tess = sscreen->info.chip_class >= VI && @@ -1006,21 +1033,28 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, sscreen->info.family == CHIP_RAVEN; sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 || sscreen->info.family == CHIP_RAVEN; + sscreen->has_dcc_constant_encode = sscreen->info.family == CHIP_RAVEN2; + + /* Only enable primitive binning on APUs by default. */ + sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2; + + sscreen->dfsm_allowed = sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2; + /* Process DPBB enable flags. */ if (sscreen->debug_flags & DBG(DPBB)) { sscreen->dpbb_allowed = true; - } else { - /* Only enable primitive binning on Raven by default. */ - /* TODO: Investigate if binning is profitable on Vega12. */ - sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN && - !(sscreen->debug_flags & DBG(NO_DPBB)); + if (sscreen->debug_flags & DBG(DFSM)) + sscreen->dfsm_allowed = true; } - if (sscreen->debug_flags & DBG(DFSM)) { - sscreen->dfsm_allowed = sscreen->dpbb_allowed; - } else { - sscreen->dfsm_allowed = sscreen->dpbb_allowed && - !(sscreen->debug_flags & DBG(NO_DFSM)); + /* Process DPBB disable flags. */ + if (sscreen->debug_flags & DBG(NO_DPBB)) { + sscreen->dpbb_allowed = false; + sscreen->dfsm_allowed = false; + } else if (sscreen->debug_flags & DBG(NO_DFSM)) { + sscreen->dfsm_allowed = false; } /* While it would be nice not to have this flag, we are constrained @@ -1040,7 +1074,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, !(sscreen->debug_flags & DBG(NO_RB_PLUS)) && (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA12 || - sscreen->info.family == CHIP_RAVEN); + sscreen->info.family == CHIP_RAVEN || + sscreen->info.family == CHIP_RAVEN2); } sscreen->dcc_msaa_allowed = @@ -1098,10 +1133,26 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws, if (sscreen->debug_flags & DBG(TEST_DMA)) si_test_dma(sscreen); + if (sscreen->debug_flags & DBG(TEST_DMA_PERF)) { + si_test_dma_perf(sscreen); + } + if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) | DBG(TEST_VMFAULT_SDMA) | DBG(TEST_VMFAULT_SHADER))) si_test_vmfault(sscreen); + if (sscreen->debug_flags & DBG(TEST_GDS)) + si_test_gds((struct si_context*)sscreen->aux_context); + + if (sscreen->debug_flags & DBG(TEST_GDS_MM)) { + si_test_gds_memory_management((struct si_context*)sscreen->aux_context, + 32 * 1024, 4, RADEON_DOMAIN_GDS); + } + if (sscreen->debug_flags & DBG(TEST_GDS_OA_MM)) { + si_test_gds_memory_management((struct si_context*)sscreen->aux_context, + 4, 1, RADEON_DOMAIN_OA); + } + return &sscreen->b; }