X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fr600_pipe_common.c;h=e7942c92c0c9832eefd56b356e28b33cd5d0743a;hb=9bc6c135acd59a48d35ce6d7fb619e064af04239;hp=4fbcd3a65723d66cccaa7512589e335e0492f396;hpb=dc546a7bb3fae1d597e5a22d9527540ec4f072c8;p=mesa.git diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c index 4fbcd3a6572..e7942c92c0c 100644 --- a/src/gallium/drivers/r600/r600_pipe_common.c +++ b/src/gallium/drivers/r600/r600_pipe_common.c @@ -27,30 +27,24 @@ #include "r600_pipe_common.h" #include "r600_cs.h" #include "tgsi/tgsi_parse.h" +#include "compiler/nir/nir.h" #include "util/list.h" #include "util/u_draw_quad.h" #include "util/u_memory.h" -#include "util/u_format_s3tc.h" +#include "util/format/u_format_s3tc.h" #include "util/u_upload_mgr.h" -#include "os/os_time.h" +#include "util/os_time.h" #include "vl/vl_decoder.h" #include "vl/vl_video_buffer.h" #include "radeon_video.h" #include #include +#include -#ifndef HAVE_LLVM -#define HAVE_LLVM 0 -#endif - -#if HAVE_LLVM +#ifdef LLVM_AVAILABLE #include #endif -#ifndef MESA_LLVM_VERSION_PATCH -#define MESA_LLVM_VERSION_PATCH 0 -#endif - struct r600_multi_fence { struct pipe_reference reference; struct pipe_fence_handle *gfx; @@ -63,27 +57,6 @@ struct r600_multi_fence { } gfx_unflushed; }; -/* - * shader binary helpers. - */ -void radeon_shader_binary_init(struct ac_shader_binary *b) -{ - memset(b, 0, sizeof(*b)); -} - -void radeon_shader_binary_clean(struct ac_shader_binary *b) -{ - if (!b) - return; - FREE(b->code); - FREE(b->config); - FREE(b->rodata); - FREE(b->global_symbol_offsets); - FREE(b->relocs); - FREE(b->disasm_string); - FREE(b->llvm_ir_string); -} - /* * pipe_context */ @@ -105,77 +78,18 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx, struct r600_resource *buf, uint64_t va, uint32_t new_fence, unsigned query_type) { - struct radeon_winsys_cs *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = ctx->gfx.cs; unsigned op = EVENT_TYPE(event) | EVENT_INDEX(5) | event_flags; unsigned sel = EOP_DATA_SEL(data_sel); - /* Wait for write confirmation before writing data, but don't send - * an interrupt. */ - if (ctx->chip_class >= SI && data_sel != EOP_DATA_SEL_DISCARD) - sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM); - - if (ctx->chip_class >= GFX9) { - /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion - * counters) must immediately precede every timestamp event to - * prevent a GPU hang on GFX9. - * - * Occlusion queries don't need to do it here, because they - * always do ZPASS_DONE before the timestamp. - */ - if (ctx->chip_class == GFX9 && - query_type != PIPE_QUERY_OCCLUSION_COUNTER && - query_type != PIPE_QUERY_OCCLUSION_PREDICATE) { - struct r600_resource *scratch = ctx->eop_bug_scratch; - - assert(16 * ctx->screen->info.num_render_backends <= - scratch->b.b.width0); - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1)); - radeon_emit(cs, scratch->gpu_address); - radeon_emit(cs, scratch->gpu_address >> 32); - - radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - } - - radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 6, 0)); - radeon_emit(cs, op); - radeon_emit(cs, sel); - radeon_emit(cs, va); /* address lo */ - radeon_emit(cs, va >> 32); /* address hi */ - radeon_emit(cs, new_fence); /* immediate data lo */ - radeon_emit(cs, 0); /* immediate data hi */ - radeon_emit(cs, 0); /* unused */ - } else { - if (ctx->chip_class == CIK || - ctx->chip_class == VI) { - struct r600_resource *scratch = ctx->eop_bug_scratch; - uint64_t va = scratch->gpu_address; - - /* Two EOP events are required to make all engines go idle - * (and optional cache flushes executed) before the timestamp - * is written. - */ - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, ((va >> 32) & 0xffff) | sel); - radeon_emit(cs, 0); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - - radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch, - RADEON_USAGE_WRITE, RADEON_PRIO_QUERY); - } - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, op); - radeon_emit(cs, va); - radeon_emit(cs, ((va >> 32) & 0xffff) | sel); - radeon_emit(cs, new_fence); /* immediate data */ - radeon_emit(cs, 0); /* unused */ - } + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, op); + radeon_emit(cs, va); + radeon_emit(cs, ((va >> 32) & 0xffff) | sel); + radeon_emit(cs, new_fence); /* immediate data */ + radeon_emit(cs, 0); /* unused */ if (buf) r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, @@ -186,20 +100,17 @@ unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen) { unsigned dwords = 6; - if (screen->chip_class == CIK || - screen->chip_class == VI) - dwords *= 2; - - if (!screen->info.has_virtual_memory) + if (!screen->info.r600_has_virtual_memory) dwords += 2; return dwords; } void r600_gfx_wait_fence(struct r600_common_context *ctx, + struct r600_resource *buf, uint64_t va, uint32_t ref, uint32_t mask) { - struct radeon_winsys_cs *cs = ctx->gfx.cs; + struct radeon_cmdbuf *cs = ctx->gfx.cs; radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); @@ -208,9 +119,15 @@ void r600_gfx_wait_fence(struct r600_common_context *ctx, radeon_emit(cs, ref); /* reference value */ radeon_emit(cs, mask); /* mask */ radeon_emit(cs, 4); /* poll interval */ + + if (buf) + r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ, + RADEON_PRIO_QUERY); } void r600_draw_rectangle(struct blitter_context *blitter, + void *vertex_elements_cso, + blitter_get_vs_func get_vs, int x1, int y1, int x2, int y2, float depth, unsigned num_instances, enum blitter_attrib_type type, @@ -223,6 +140,9 @@ void r600_draw_rectangle(struct blitter_context *blitter, unsigned offset = 0; float *vb; + rctx->b.bind_vertex_elements_state(&rctx->b, vertex_elements_cso); + rctx->b.bind_vs_state(&rctx->b, get_vs(blitter)); + /* Some operations (like color resolve on r6xx) don't work * with the conventional primitive types. * One that works is PT_RECTLIST, which we use here. */ @@ -295,12 +215,9 @@ void r600_draw_rectangle(struct blitter_context *blitter, static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) { - struct radeon_winsys_cs *cs = rctx->dma.cs; + struct radeon_cmdbuf *cs = rctx->dma.cs; - /* NOP waits for idle on Evergreen and later. */ - if (rctx->chip_class >= CIK) - radeon_emit(cs, 0x00000000); /* NOP */ - else if (rctx->chip_class >= EVERGREEN) + if (rctx->chip_class >= EVERGREEN) radeon_emit(cs, 0xf0000000); /* NOP */ else { /* TODO: R600-R700 should use the FENCE packet. @@ -331,7 +248,7 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, (src && ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf, RADEON_USAGE_WRITE)))) - ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); /* Flush if there's not enough space, or if the memory usage per IB * is too large. @@ -346,10 +263,10 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, * engine busy while uploads are being submitted. */ num_dw++; /* for emit_wait_idle below */ - if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) || + if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw, false) || ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 || !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) { - ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw); } @@ -367,29 +284,23 @@ void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, /* If GPUVM is not supported, the CS checker needs 2 entries * in the buffer list per packet, which has to be done manually. */ - if (ctx->screen->info.has_virtual_memory) { + if (ctx->screen->info.r600_has_virtual_memory) { if (dst) radeon_add_to_buffer_list(ctx, &ctx->dma, dst, - RADEON_USAGE_WRITE, - RADEON_PRIO_SDMA_BUFFER); + RADEON_USAGE_WRITE, 0); if (src) radeon_add_to_buffer_list(ctx, &ctx->dma, src, - RADEON_USAGE_READ, - RADEON_PRIO_SDMA_BUFFER); + RADEON_USAGE_READ, 0); } /* this function is called before all DMA calls, so increment this. */ ctx->num_dma_calls++; } -static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) -{ -} - void r600_preflush_suspend_features(struct r600_common_context *ctx) { /* suspend queries */ - if (!LIST_IS_EMPTY(&ctx->active_queries)) + if (!list_is_empty(&ctx->active_queries)) r600_suspend_queries(ctx); ctx->streamout.suspended = false; @@ -407,52 +318,16 @@ void r600_postflush_resume_features(struct r600_common_context *ctx) } /* resume queries */ - if (!LIST_IS_EMPTY(&ctx->active_queries)) + if (!list_is_empty(&ctx->active_queries)) r600_resume_queries(ctx); } -static void r600_add_fence_dependency(struct r600_common_context *rctx, - struct pipe_fence_handle *fence) -{ - struct radeon_winsys *ws = rctx->ws; - - if (rctx->dma.cs) - ws->cs_add_fence_dependency(rctx->dma.cs, fence); - ws->cs_add_fence_dependency(rctx->gfx.cs, fence); -} - static void r600_fence_server_sync(struct pipe_context *ctx, struct pipe_fence_handle *fence) { - struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; - - /* Only amdgpu needs to handle fence dependencies (for fence imports). - * radeon synchronizes all rings by default and will not implement + /* radeon synchronizes all rings by default and will not implement * fence imports. */ - if (rctx->screen->info.drm_major == 2) - return; - - /* Only imported fences need to be handled by fence_server_sync, - * because the winsys handles synchronizations automatically for BOs - * within the process. - * - * Simply skip unflushed fences here, and the winsys will drop no-op - * dependencies (i.e. dependencies within the same ring). - */ - if (rfence->gfx_unflushed.ctx) - return; - - /* All unflushed commands will not start execution before - * this fence dependency is signalled. - * - * Should we flush the context to allow more GPU parallelism? - */ - if (rfence->sdma) - r600_add_fence_dependency(rctx, rfence->sdma); - if (rfence->gfx) - r600_add_fence_dependency(rctx, rfence->gfx); } static void r600_flush_from_st(struct pipe_context *ctx, @@ -465,10 +340,10 @@ static void r600_flush_from_st(struct pipe_context *ctx, struct pipe_fence_handle *gfx_fence = NULL; struct pipe_fence_handle *sdma_fence = NULL; bool deferred_fence = false; - unsigned rflags = RADEON_FLUSH_ASYNC; + unsigned rflags = PIPE_FLUSH_ASYNC; if (flags & PIPE_FLUSH_END_OF_FRAME) - rflags |= RADEON_FLUSH_END_OF_FRAME; + rflags |= PIPE_FLUSH_END_OF_FRAME; /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ if (rctx->dma.cs) @@ -528,7 +403,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, struct pipe_fence_handle **fence) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - struct radeon_winsys_cs *cs = rctx->dma.cs; + struct radeon_cmdbuf *cs = rctx->dma.cs; struct radeon_saved_cs saved; bool check_vm = (rctx->screen->debug_flags & DBG_CHECK_VM) && @@ -562,7 +437,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags, * Store a linearized copy of all chunks of \p cs together with the buffer * list in \p saved. */ -void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, +void radeon_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved, bool get_buffer_list) { uint32_t *buf; @@ -612,14 +487,8 @@ void radeon_clear_saved_cs(struct radeon_saved_cs *saved) static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx) { struct r600_common_context *rctx = (struct r600_common_context *)ctx; - unsigned latest = rctx->ws->query_value(rctx->ws, - RADEON_GPU_RESET_COUNTER); - - if (rctx->gpu_reset_counter == latest) - return PIPE_NO_RESET; - rctx->gpu_reset_counter = latest; - return PIPE_UNKNOWN_CONTEXT_RESET; + return rctx->ws->ctx_query_reset_status(rctx->ctx); } static void r600_set_debug_callback(struct pipe_context *ctx, @@ -691,12 +560,12 @@ static bool r600_resource_commit(struct pipe_context *pctx, if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, res->buf, RADEON_USAGE_READWRITE)) { - ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); } if (radeon_emitted(ctx->dma.cs, 0) && ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, res->buf, RADEON_USAGE_READWRITE)) { - ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); } ctx->ws->cs_sync_flush(ctx->dma.cs); @@ -725,7 +594,6 @@ bool r600_common_context_init(struct r600_common_context *rctx, rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl; rctx->b.transfer_unmap = u_transfer_unmap_vtbl; rctx->b.texture_subdata = u_default_texture_subdata; - rctx->b.memory_barrier = r600_memory_barrier; rctx->b.flush = r600_flush_from_st; rctx->b.set_debug_callback = r600_set_debug_callback; rctx->b.fence_server_sync = r600_fence_server_sync; @@ -740,13 +608,7 @@ bool r600_common_context_init(struct r600_common_context *rctx, else rctx->b.buffer_subdata = r600_buffer_subdata; - if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) { - rctx->b.get_device_reset_status = r600_get_reset_status; - rctx->gpu_reset_counter = - rctx->ws->query_value(rctx->ws, - RADEON_GPU_RESET_COUNTER); - } - + rctx->b.get_device_reset_status = r600_get_reset_status; rctx->b.set_device_reset_callback = r600_set_device_reset_callback; r600_init_context_texture_functions(rctx); @@ -755,16 +617,6 @@ bool r600_common_context_init(struct r600_common_context *rctx, r600_query_init(rctx); cayman_init_msaa(&rctx->b); - if (rctx->chip_class == CIK || - rctx->chip_class == VI || - rctx->chip_class == GFX9) { - rctx->eop_bug_scratch = (struct r600_resource*) - pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT, - 16 * rscreen->info.num_render_backends); - if (!rctx->eop_bug_scratch) - return false; - } - rctx->allocator_zeroed_memory = u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, 0, PIPE_USAGE_DEFAULT, 0, true); @@ -772,12 +624,12 @@ bool r600_common_context_init(struct r600_common_context *rctx, return false; rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024, - 0, PIPE_USAGE_STREAM); + 0, PIPE_USAGE_STREAM, 0); if (!rctx->b.stream_uploader) return false; rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024, - 0, PIPE_USAGE_DEFAULT); + 0, PIPE_USAGE_DEFAULT, 0); if (!rctx->b.const_uploader) return false; @@ -785,10 +637,10 @@ bool r600_common_context_init(struct r600_common_context *rctx, if (!rctx->ctx) return false; - if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { + if (rscreen->info.num_rings[RING_DMA] && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, r600_flush_dma_ring, - rctx); + rctx, false); rctx->dma.flush = r600_flush_dma_ring; } @@ -797,20 +649,6 @@ bool r600_common_context_init(struct r600_common_context *rctx, void r600_common_context_cleanup(struct r600_common_context *rctx) { - unsigned i,j; - - /* Release DCC stats. */ - for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) { - assert(!rctx->dcc_stats[i].query_active); - - for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++) - if (rctx->dcc_stats[i].ps_stats[j]) - rctx->b.destroy_query(&rctx->b, - rctx->dcc_stats[i].ps_stats[j]); - - r600_texture_reference(&rctx->dcc_stats[i].tex, NULL); - } - if (rctx->query_result_shader) rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader); @@ -881,15 +719,7 @@ static const struct debug_named_value common_debug_options[] = { { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, { "nowc", DBG_NO_WC, "Disable GTT write combining" }, { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, - { "nodcc", DBG_NO_DCC, "Disable DCC." }, - { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, - { "norbplus", DBG_NO_RB_PLUS, "Disable RB+." }, - { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, - { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" }, - { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" }, - { "nodpbb", DBG_NO_DPBB, "Disable DPBB." }, - { "nodfsm", DBG_NO_DFSM, "Disable DFSM." }, DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -904,13 +734,6 @@ static const char* r600_get_device_vendor(struct pipe_screen* pscreen) return "AMD"; } -static const char *r600_get_marketing_name(struct radeon_winsys *ws) -{ - if (!ws->get_chip_name) - return NULL; - return ws->get_chip_name(ws); -} - static const char *r600_get_family_name(const struct r600_common_screen *rscreen) { switch (rscreen->info.family) { @@ -939,26 +762,6 @@ static const char *r600_get_family_name(const struct r600_common_screen *rscreen case CHIP_CAICOS: return "AMD CAICOS"; case CHIP_CAYMAN: return "AMD CAYMAN"; case CHIP_ARUBA: return "AMD ARUBA"; - case CHIP_TAHITI: return "AMD TAHITI"; - case CHIP_PITCAIRN: return "AMD PITCAIRN"; - case CHIP_VERDE: return "AMD CAPE VERDE"; - case CHIP_OLAND: return "AMD OLAND"; - case CHIP_HAINAN: return "AMD HAINAN"; - case CHIP_BONAIRE: return "AMD BONAIRE"; - case CHIP_KAVERI: return "AMD KAVERI"; - case CHIP_KABINI: return "AMD KABINI"; - case CHIP_HAWAII: return "AMD HAWAII"; - case CHIP_MULLINS: return "AMD MULLINS"; - case CHIP_TONGA: return "AMD TONGA"; - case CHIP_ICELAND: return "AMD ICELAND"; - case CHIP_CARRIZO: return "AMD CARRIZO"; - case CHIP_FIJI: return "AMD FIJI"; - case CHIP_POLARIS10: return "AMD POLARIS10"; - case CHIP_POLARIS11: return "AMD POLARIS11"; - case CHIP_POLARIS12: return "AMD POLARIS12"; - case CHIP_STONEY: return "AMD STONEY"; - case CHIP_VEGA10: return "AMD VEGA10"; - case CHIP_RAVEN: return "AMD RAVEN"; default: return "AMD unknown"; } } @@ -969,39 +772,28 @@ static void r600_disk_cache_create(struct r600_common_screen *rscreen) if (rscreen->debug_flags & DBG_ALL_SHADERS) return; - uint32_t mesa_timestamp; - if (disk_cache_get_function_timestamp(r600_disk_cache_create, - &mesa_timestamp)) { - char *timestamp_str; - int res = -1; - if (rscreen->chip_class < SI) { - res = asprintf(×tamp_str, "%u",mesa_timestamp); - } -#if HAVE_LLVM - else { - uint32_t llvm_timestamp; - if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, - &llvm_timestamp)) { - res = asprintf(×tamp_str, "%u_%u", - mesa_timestamp, llvm_timestamp); - } - } -#endif - if (res != -1) { - /* These flags affect shader compilation. */ - uint64_t shader_debug_flags = - rscreen->debug_flags & - (DBG_FS_CORRECT_DERIVS_AFTER_KILL | - DBG_SI_SCHED | - DBG_UNSAFE_MATH); - - rscreen->disk_shader_cache = - disk_cache_create(r600_get_family_name(rscreen), - timestamp_str, - shader_debug_flags); - free(timestamp_str); - } - } + struct mesa_sha1 ctx; + unsigned char sha1[20]; + char cache_id[20 * 2 + 1]; + + _mesa_sha1_init(&ctx); + if (!disk_cache_get_function_identifier(r600_disk_cache_create, + &ctx)) + return; + + _mesa_sha1_final(&ctx, sha1); + disk_cache_format_hex_id(cache_id, sha1, 20 * 2); + + /* These flags affect shader compilation. */ + uint64_t shader_debug_flags = + rscreen->debug_flags & + (DBG_FS_CORRECT_DERIVS_AFTER_KILL | + DBG_UNSAFE_MATH); + + rscreen->disk_shader_cache = + disk_cache_create(r600_get_family_name(rscreen), + cache_id, + shader_debug_flags); } static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen) @@ -1027,19 +819,15 @@ static float r600_get_paramf(struct pipe_screen* pscreen, case PIPE_CAPF_MAX_LINE_WIDTH_AA: case PIPE_CAPF_MAX_POINT_WIDTH: case PIPE_CAPF_MAX_POINT_WIDTH_AA: - if (rscreen->family >= CHIP_CEDAR) - return 16384.0f; - else - return 8192.0f; + return 8191.0f; case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: return 16.0f; case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: return 16.0f; - case PIPE_CAPF_GUARD_BAND_LEFT: - case PIPE_CAPF_GUARD_BAND_TOP: - case PIPE_CAPF_GUARD_BAND_RIGHT: - case PIPE_CAPF_GUARD_BAND_BOTTOM: - return 0.0f; + case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE: + case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY: + return 0.0f; } return 0.0f; } @@ -1115,32 +903,6 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) case CHIP_ARUBA: return "cayman"; - case CHIP_TAHITI: return "tahiti"; - case CHIP_PITCAIRN: return "pitcairn"; - case CHIP_VERDE: return "verde"; - case CHIP_OLAND: return "oland"; - case CHIP_HAINAN: return "hainan"; - case CHIP_BONAIRE: return "bonaire"; - case CHIP_KABINI: return "kabini"; - case CHIP_KAVERI: return "kaveri"; - case CHIP_HAWAII: return "hawaii"; - case CHIP_MULLINS: - return "mullins"; - case CHIP_TONGA: return "tonga"; - case CHIP_ICELAND: return "iceland"; - case CHIP_CARRIZO: return "carrizo"; - case CHIP_FIJI: - return "fiji"; - case CHIP_STONEY: - return "stoney"; - case CHIP_POLARIS10: - return "polaris10"; - case CHIP_POLARIS11: - case CHIP_POLARIS12: /* same as polaris11 */ - return "polaris11"; - case CHIP_VEGA10: - case CHIP_RAVEN: - return "gfx900"; default: return ""; } @@ -1149,19 +911,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) static unsigned get_max_threads_per_block(struct r600_common_screen *screen, enum pipe_shader_ir ir_type) { - if (ir_type != PIPE_SHADER_IR_TGSI) + if (ir_type != PIPE_SHADER_IR_TGSI && + ir_type != PIPE_SHADER_IR_NIR) return 256; - - /* Only 16 waves per thread-group on gfx9. */ - if (screen->chip_class >= GFX9) + if (screen->chip_class >= EVERGREEN) return 1024; - - /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice - * round number. - */ - if (screen->chip_class >= SI) - return 2048; - return 256; } @@ -1176,24 +930,8 @@ static int r600_get_compute_param(struct pipe_screen *screen, switch (param) { case PIPE_COMPUTE_CAP_IR_TARGET: { const char *gpu; - const char *triple; - if (rscreen->family <= CHIP_ARUBA) { - triple = "r600--"; - } else { - if (HAVE_LLVM < 0x0400) { - triple = "amdgcn--"; - } else { - triple = "amdgcn-mesa-mesa3d"; - } - } - switch(rscreen->family) { - /* Clang < 3.6 is missing Hainan in its list of - * GPUs, so we need to use the name of a similar GPU. - */ - default: - gpu = r600_get_llvm_processor_name(rscreen->family); - break; - } + const char *triple = "r600--"; + gpu = r600_get_llvm_processor_name(rscreen->family); if (ret) { sprintf(ret, "%s-%s", gpu, triple); } @@ -1236,8 +974,6 @@ static int r600_get_compute_param(struct pipe_screen *screen, if (ret) { uint32_t *address_bits = ret; address_bits[0] = 32; - if (rscreen->chip_class >= SI) - address_bits[0] = 64; } return 1 * sizeof(uint32_t); @@ -1317,11 +1053,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: if (ret) { uint64_t *max_variable_threads_per_block = ret; - if (rscreen->chip_class >= SI && - ir_type == PIPE_SHADER_IR_TGSI) - *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; - else - *max_variable_threads_per_block = 0; + *max_variable_threads_per_block = 0; } return sizeof(uint64_t); } @@ -1354,10 +1086,10 @@ static void r600_fence_reference(struct pipe_screen *screen, *rdst = rsrc; } -static boolean r600_fence_finish(struct pipe_screen *screen, - struct pipe_context *ctx, - struct pipe_fence_handle *fence, - uint64_t timeout) +static bool r600_fence_finish(struct pipe_screen *screen, + struct pipe_context *ctx, + struct pipe_fence_handle *fence, + uint64_t timeout) { struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; @@ -1385,7 +1117,7 @@ static boolean r600_fence_finish(struct pipe_screen *screen, if (rctx && rfence->gfx_unflushed.ctx == rctx && rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { - rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); + rctx->gfx.flush(rctx, timeout ? 0 : PIPE_FLUSH_ASYNC, NULL); rfence->gfx_unflushed.ctx = NULL; if (!timeout) @@ -1434,12 +1166,8 @@ static void r600_query_memory_info(struct pipe_screen *screen, info->device_memory_evicted = ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; - if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) - info->nr_device_memory_evictions = - ws->query_value(ws, RADEON_NUM_EVICTIONS); - else - /* Just return the number of evicted 64KB pages. */ - info->nr_device_memory_evictions = info->device_memory_evicted / 64; + /* Just return the number of evicted 64KB pages. */ + info->nr_device_memory_evictions = info->device_memory_evicted / 64; } struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, @@ -1452,37 +1180,58 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, } } +const struct nir_shader_compiler_options r600_nir_options = { + .fuse_ffma = true, + .lower_scmp = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_fpow = true, + .lower_fdiv = true, + .lower_idiv = true, + .lower_fmod = true, + .lower_doubles_options = nir_lower_fp64_full_software, + .lower_int64_options = 0, + .lower_extract_byte = true, + .lower_extract_word = true, + .max_unroll_iterations = 32, + .lower_all_io_to_temps = true, + .vectorize_io = true +}; + +static const void * +r600_get_compiler_options(struct pipe_screen *screen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) +{ + assert(ir == PIPE_SHADER_IR_NIR); + return &r600_nir_options; +} + bool r600_common_screen_init(struct r600_common_screen *rscreen, struct radeon_winsys *ws) { - char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {}; + char family_name[32] = {}, kernel_version[128] = {}; struct utsname uname_data; const char *chip_name; ws->query_info(ws, &rscreen->info); rscreen->ws = ws; - if ((chip_name = r600_get_marketing_name(ws))) - snprintf(family_name, sizeof(family_name), "%s / ", - r600_get_family_name(rscreen) + 4); - else - chip_name = r600_get_family_name(rscreen); + chip_name = r600_get_family_name(rscreen); if (uname(&uname_data) == 0) snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); - if (HAVE_LLVM > 0) { - snprintf(llvm_string, sizeof(llvm_string), - ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, - HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); - } - snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), - "%s (%sDRM %i.%i.%i%s%s)", + "%s (%sDRM %i.%i.%i%s" +#ifdef LLVM_AVAILABLE + ", LLVM " MESA_LLVM_VERSION_STRING +#endif + ")", chip_name, family_name, rscreen->info.drm_major, rscreen->info.drm_minor, rscreen->info.drm_patchlevel, - kernel_version, llvm_string); + kernel_version); rscreen->b.get_name = r600_get_name; rscreen->b.get_vendor = r600_get_vendor; @@ -1491,6 +1240,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->b.get_compute_param = r600_get_compute_param; rscreen->b.get_paramf = r600_get_paramf; rscreen->b.get_timestamp = r600_get_timestamp; + rscreen->b.get_compiler_options = r600_get_compiler_options; rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.resource_destroy = u_resource_destroy_vtbl; @@ -1511,8 +1261,10 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->family = rscreen->info.family; rscreen->chip_class = rscreen->info.chip_class; rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0); - rscreen->has_rbplus = false; - rscreen->rbplus_allowed = false; + int has_draw_use_llvm = debug_get_bool_option("DRAW_USE_LLVM", FALSE); + if (!has_draw_use_llvm) + setenv("DRAW_USE_LLVM", "no", 0); + r600_disk_cache_create(rscreen); @@ -1545,11 +1297,11 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size); printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram); - printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); + printf("r600_has_virtual_memory = %i\n", rscreen->info.r600_has_virtual_memory); printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode); - printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings); - printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings); + printf("num_rings[RING_DMA] = %i\n", rscreen->info.num_rings[RING_DMA]); + printf("num_rings[RING_COMPUTE] = %u\n", rscreen->info.num_rings[RING_COMPUTE]); printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version); printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version); printf("me_fw_version = %i\n", rscreen->info.me_fw_version);