From ad1df471d083630106da8c39ec076f49e779e965 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Tue, 30 Apr 2013 20:53:15 +0400 Subject: [PATCH] r600g: plug in optimizing backend Optimization is enabled with "R600_DEBUG=sb". Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/Makefile.am | 10 ++- src/gallium/drivers/r600/Makefile.sources | 28 ++++++++ src/gallium/drivers/r600/r600_asm.c | 11 ++++ src/gallium/drivers/r600/r600_asm.h | 1 + src/gallium/drivers/r600/r600_pipe.c | 11 ++++ src/gallium/drivers/r600/r600_pipe.h | 7 ++ src/gallium/drivers/r600/r600_shader.c | 79 ++++++++++++++++++++++- src/gallium/drivers/r600/r600_shader.h | 11 ++++ 8 files changed, 155 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am index 43c8704618d..35c75ad76c7 100644 --- a/src/gallium/drivers/r600/Makefile.am +++ b/src/gallium/drivers/r600/Makefile.am @@ -10,8 +10,16 @@ AM_CFLAGS = \ $(RADEON_CFLAGS) \ $(VISIBILITY_CFLAGS) +AM_CXXFLAGS = \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + $(DEFINES) + libr600_la_SOURCES = \ - $(C_SOURCES) + $(C_SOURCES) \ + $(CXX_SOURCES) libr600_la_LIBADD = ../radeon/libradeon.la diff --git a/src/gallium/drivers/r600/Makefile.sources b/src/gallium/drivers/r600/Makefile.sources index ec376ef488e..df083d714a4 100644 --- a/src/gallium/drivers/r600/Makefile.sources +++ b/src/gallium/drivers/r600/Makefile.sources @@ -19,4 +19,32 @@ C_SOURCES = \ compute_memory_pool.c \ r600_uvd.c +CXX_SOURCES = \ + sb/sb_bc_builder.cpp \ + sb/sb_bc_decoder.cpp \ + sb/sb_bc_dump.cpp \ + sb/sb_bc_finalize.cpp \ + sb/sb_bc_parser.cpp \ + sb/sb_context.cpp \ + sb/sb_core.cpp \ + sb/sb_dce_cleanup.cpp \ + sb/sb_def_use.cpp \ + sb/sb_dump.cpp \ + sb/sb_expr.cpp \ + sb/sb_gcm.cpp \ + sb/sb_gvn.cpp \ + sb/sb_if_conversion.cpp \ + sb/sb_ir.cpp \ + sb/sb_liveness.cpp \ + sb/sb_pass.cpp \ + sb/sb_peephole.cpp \ + sb/sb_psi_ops.cpp \ + sb/sb_ra_checker.cpp \ + sb/sb_ra_coalesce.cpp \ + sb/sb_ra_init.cpp \ + sb/sb_sched.cpp \ + sb/sb_shader.cpp \ + sb/sb_ssa_builder.cpp \ + sb/sb_valtable.cpp + LLVM_C_SOURCES = r600_llvm.c diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 64e052b6fca..81b84ec5fd3 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -32,6 +32,8 @@ #include "util/u_memory.h" #include "pipe/p_shader_tokens.h" +#include "sb/sb_public.h" + #define NUM_OF_CYCLES 3 #define NUM_OF_COMPONENTS 4 @@ -126,6 +128,10 @@ void r600_bytecode_init(struct r600_bytecode *bc, enum radeon_family family, enum r600_msaa_texture_mode msaa_texture_mode) { + static unsigned next_shader_id = 0; + + bc->debug_id = ++next_shader_id; + if ((chip_class == R600) && (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) { bc->ar_handling = AR_HANDLE_RV6XX; @@ -2381,8 +2387,13 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, fprintf(stderr, "\n"); } +#if 0 r600_bytecode_disasm(&bc); + fprintf(stderr, "______________________________________________________________\n"); +#else + r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/); +#endif } fs_size = bc.ndw*4; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index c052ceabfc7..bbebaec84cc 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -214,6 +214,7 @@ struct r600_bytecode { unsigned ar_chan; unsigned ar_handling; unsigned r6xx_nop_after_rel_dst; + unsigned debug_id; struct r600_isa* isa; }; diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index b0128928857..8f6d59b3e6a 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -26,6 +26,8 @@ #include "evergreen_compute.h" #include "r600d.h" +#include "sb/sb_public.h" + #include #include "pipe/p_shader_tokens.h" #include "util/u_blitter.h" @@ -64,6 +66,13 @@ static const struct debug_named_value debug_options[] = { /* GL uses the word INVALIDATE, gallium uses the word DISCARD */ { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" }, + /* shader backend */ + { "sb", DBG_SB, "Enable optimization of graphics shaders" }, + { "sbcl", DBG_SB_CS, "Enable optimization of compute shaders" }, + { "sbdry", DBG_SB_DRY_RUN, "Don't use optimized bytecode (just print the dumps)" }, + { "sbstat", DBG_SB_STAT, "Print optimization statistics for shaders" }, + { "sbdump", DBG_SB_DUMP, "Print IR dumps after some optimization passes" }, + DEBUG_NAMED_VALUE_END /* must be last */ }; @@ -305,6 +314,8 @@ static void r600_destroy_context(struct pipe_context *context) r600_isa_destroy(rctx->isa); + r600_sb_context_destroy(rctx->sb_context); + pipe_resource_reference((struct pipe_resource**)&rctx->dummy_cmask, NULL); pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 33c85fc5454..322989a839d 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -257,6 +257,12 @@ typedef boolean (*r600g_dma_blit_t)(struct pipe_context *ctx, #define DBG_NO_CP_DMA (1 << 18) #define DBG_NO_ASYNC_DMA (1 << 19) #define DBG_NO_DISCARD_RANGE (1 << 20) +/* shader backend */ +#define DBG_SB (1 << 21) +#define DBG_SB_CS (1 << 22) +#define DBG_SB_DRY_RUN (1 << 23) +#define DBG_SB_STAT (1 << 24) +#define DBG_SB_DUMP (1 << 25) struct r600_tiling_info { unsigned num_channels; @@ -640,6 +646,7 @@ struct r600_context { unsigned current_render_cond_mode; boolean predicate_drawing; + void *sb_context; struct r600_isa *isa; }; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f0d3be405d2..fd3fe3933a0 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -27,6 +27,8 @@ #include "r600_shader.h" #include "r600d.h" +#include "sb/sb_public.h" + #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" @@ -62,6 +64,26 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, struct r600_pipe_shader *pipeshader, struct r600_shader_key key); +static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, + int size, unsigned comp_mask) { + + if (!size) + return; + + if (ps->num_arrays == ps->max_arrays) { + ps->max_arrays += 64; + ps->arrays = realloc(ps->arrays, ps->max_arrays * + sizeof(struct r600_shader_array)); + } + + int n = ps->num_arrays; + ++ps->num_arrays; + + ps->arrays[n].comp_mask = comp_mask; + ps->arrays[n].gpr_start = start_gpr; + ps->arrays[n].gpr_count = size; +} + static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens) { struct tgsi_parse_context parse; @@ -118,6 +140,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, int r, i; uint32_t *ptr; bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens)); + unsigned use_sb = rctx->screen->debug_flags & DBG_SB; shader->shader.bc.isa = rctx->isa; @@ -139,12 +162,22 @@ int r600_pipe_shader_create(struct pipe_context *ctx, R600_ERR("building bytecode failed !\n"); return r; } + +#if 0 if (dump) { fprintf(stderr, "--------------------------------------------------------------\n"); r600_bytecode_disasm(&shader->shader.bc); fprintf(stderr, "______________________________________________________________\n"); } - +#else + if (dump || use_sb) { + r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, dump, use_sb); + if (r) { + R600_ERR("r600_sb_bytecode_process failed !\n"); + return r; + } + } +#endif /* Store the shader in a buffer. */ if (shader->bo == NULL) { @@ -273,6 +306,7 @@ int r600_compute_shader_create(struct pipe_context * ctx, struct r600_shader_ctx shader_ctx; boolean use_kill = false; bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0; + unsigned use_sb = r600_ctx->screen->debug_flags & DBG_SB_CS; shader_ctx.bc = bytecode; r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family, @@ -286,9 +320,18 @@ int r600_compute_shader_create(struct pipe_context * ctx, cm_bytecode_add_cf_end(shader_ctx.bc); } r600_bytecode_build(shader_ctx.bc); + +#if 0 if (dump) { r600_bytecode_disasm(shader_ctx.bc); } +#else + if (dump || use_sb) { + if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL, dump, use_sb)) + R600_ERR("r600_sb_bytecode_process failed!\n"); + } +#endif + free(bytes); return 1; } @@ -956,8 +999,18 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) } } break; - case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: + if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { + if (d->Array.ArrayID) { + r600_add_gpr_array(ctx->shader, + ctx->file_offset[TGSI_FILE_TEMPORARY] + + d->Range.First, + d->Range.Last - d->Range.First + 1, 0b1111); + } + } + break; + + case TGSI_FILE_CONSTANT: case TGSI_FILE_SAMPLER: case TGSI_FILE_ADDRESS: break; @@ -1248,6 +1301,7 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) return 0; } + static int r600_shader_from_tgsi(struct r600_screen *rscreen, struct r600_pipe_shader *pipeshader, struct r600_shader_key key) @@ -1267,6 +1321,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, bool use_llvm = false; unsigned char * inst_bytes = NULL; unsigned inst_byte_count = 0; + bool indirect_gprs; #ifdef R600_USE_LLVM use_llvm = !(rscreen->debug_flags & DBG_NO_LLVM); @@ -1279,6 +1334,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, rscreen->msaa_texture_support); ctx.tokens = tokens; tgsi_scan_shader(tokens, &ctx.info); + shader->indirect_files = ctx.info.indirect_files; + indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT); tgsi_parse_init(&ctx.parse, tokens); ctx.type = ctx.parse.FullHeader.Processor.Processor; shader->processor_type = ctx.type; @@ -1356,6 +1413,24 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; ctx.temp_reg = ctx.bc->ar_reg + 1; + if (indirect_gprs) { + shader->max_arrays = 0; + shader->num_arrays = 0; + + if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { + r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], + ctx.file_offset[TGSI_FILE_OUTPUT] - + ctx.file_offset[TGSI_FILE_INPUT], + 0b1111); + } + if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { + r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], + ctx.file_offset[TGSI_FILE_TEMPORARY] - + ctx.file_offset[TGSI_FILE_OUTPUT], + 0b1111); + } + } + ctx.nliterals = 0; ctx.literals = NULL; shader->fs_write_all = FALSE; diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index 17441ed2298..411667ae89e 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -63,6 +63,11 @@ struct r600_shader { boolean vs_out_point_size; boolean has_txq_cube_array_z_comp; boolean uses_tex_buffers; + + unsigned indirect_files; + unsigned max_arrays; + unsigned num_arrays; + struct r600_shader_array * arrays; }; struct r600_shader_key { @@ -71,6 +76,12 @@ struct r600_shader_key { unsigned nr_cbufs:4; }; +struct r600_shader_array { + unsigned gpr_start; + unsigned gpr_count; + unsigned comp_mask; +}; + struct r600_pipe_shader { struct r600_pipe_shader_selector *selector; struct r600_pipe_shader *next_variant; -- 2.30.2