r600g: plug in optimizing backend
authorVadim Girlin <vadimgirlin@gmail.com>
Tue, 30 Apr 2013 16:53:15 +0000 (20:53 +0400)
committerVadim Girlin <vadimgirlin@gmail.com>
Tue, 30 Apr 2013 17:50:47 +0000 (21:50 +0400)
Optimization is enabled with "R600_DEBUG=sb".

Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
src/gallium/drivers/r600/Makefile.am
src/gallium/drivers/r600/Makefile.sources
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h

index 43c8704618d80c3c32eefb5baf174b66b1ba88e9..35c75ad76c72399d990b2dc717acbf223dd675ec 100644 (file)
@@ -10,8 +10,16 @@ AM_CFLAGS = \
        $(RADEON_CFLAGS) \
        $(VISIBILITY_CFLAGS)
 
+AM_CXXFLAGS = \
+       -I$(top_srcdir)/src/gallium/drivers \
+       -I$(top_srcdir)/include \
+       -I$(top_srcdir)/src/gallium/include \
+       -I$(top_srcdir)/src/gallium/auxiliary \
+       $(DEFINES)
+
 libr600_la_SOURCES = \
-       $(C_SOURCES)
+       $(C_SOURCES) \
+       $(CXX_SOURCES)
 
 libr600_la_LIBADD = ../radeon/libradeon.la
 
index ec376ef488e45013f3f187503582f114ef20363b..df083d714a4ea108397bf5e21299a72bfde10609 100644 (file)
@@ -19,4 +19,32 @@ C_SOURCES = \
        compute_memory_pool.c \
        r600_uvd.c
 
+CXX_SOURCES = \
+       sb/sb_bc_builder.cpp \
+       sb/sb_bc_decoder.cpp \
+       sb/sb_bc_dump.cpp \
+       sb/sb_bc_finalize.cpp \
+       sb/sb_bc_parser.cpp \
+       sb/sb_context.cpp \
+       sb/sb_core.cpp \
+       sb/sb_dce_cleanup.cpp \
+       sb/sb_def_use.cpp \
+       sb/sb_dump.cpp \
+       sb/sb_expr.cpp \
+       sb/sb_gcm.cpp \
+       sb/sb_gvn.cpp \
+       sb/sb_if_conversion.cpp \
+       sb/sb_ir.cpp \
+       sb/sb_liveness.cpp \
+       sb/sb_pass.cpp \
+       sb/sb_peephole.cpp \
+       sb/sb_psi_ops.cpp \
+       sb/sb_ra_checker.cpp \
+       sb/sb_ra_coalesce.cpp \
+       sb/sb_ra_init.cpp \
+       sb/sb_sched.cpp \
+       sb/sb_shader.cpp \
+       sb/sb_ssa_builder.cpp \
+       sb/sb_valtable.cpp
+
 LLVM_C_SOURCES = r600_llvm.c
index 64e052b6fcadbb2db5a7e957e9197bfad9ecf6dc..81b84ec5fd30b6c92f91d9353c11746c0f9bd5f7 100644 (file)
@@ -32,6 +32,8 @@
 #include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"
 
+#include "sb/sb_public.h"
+
 #define NUM_OF_CYCLES 3
 #define NUM_OF_COMPONENTS 4
 
@@ -126,6 +128,10 @@ void r600_bytecode_init(struct r600_bytecode *bc,
                        enum radeon_family family,
                        enum r600_msaa_texture_mode msaa_texture_mode)
 {
+       static unsigned next_shader_id = 0;
+
+       bc->debug_id = ++next_shader_id;
+
        if ((chip_class == R600) &&
            (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
                bc->ar_handling = AR_HANDLE_RV6XX;
@@ -2381,8 +2387,13 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
                        fprintf(stderr, "\n");
                }
 
+#if 0
                r600_bytecode_disasm(&bc);
+
                fprintf(stderr, "______________________________________________________________\n");
+#else
+               r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/);
+#endif
        }
 
        fs_size = bc.ndw*4;
index c052ceabfc7713b2ddb5fe99eefd3feec98a16f0..bbebaec84cc3e70038891bb3b4c246362b67d434 100644 (file)
@@ -214,6 +214,7 @@ struct r600_bytecode {
        unsigned        ar_chan;
        unsigned        ar_handling;
        unsigned        r6xx_nop_after_rel_dst;
+       unsigned        debug_id;
        struct r600_isa* isa;
 };
 
index b0128928857a36242fddf4b2b6ef39c0afa35ebf..8f6d59b3e6aca2842ab2bbc28d0d9933423cfd74 100644 (file)
@@ -26,6 +26,8 @@
 #include "evergreen_compute.h"
 #include "r600d.h"
 
+#include "sb/sb_public.h"
+
 #include <errno.h>
 #include "pipe/p_shader_tokens.h"
 #include "util/u_blitter.h"
@@ -64,6 +66,13 @@ static const struct debug_named_value debug_options[] = {
        /* GL uses the word INVALIDATE, gallium uses the word DISCARD */
        { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
 
+       /* shader backend */
+       { "sb", DBG_SB, "Enable optimization of graphics shaders" },
+       { "sbcl", DBG_SB_CS, "Enable optimization of compute shaders" },
+       { "sbdry", DBG_SB_DRY_RUN, "Don't use optimized bytecode (just print the dumps)" },
+       { "sbstat", DBG_SB_STAT, "Print optimization statistics for shaders" },
+       { "sbdump", DBG_SB_DUMP, "Print IR dumps after some optimization passes" },
+
        DEBUG_NAMED_VALUE_END /* must be last */
 };
 
@@ -305,6 +314,8 @@ static void r600_destroy_context(struct pipe_context *context)
 
        r600_isa_destroy(rctx->isa);
 
+       r600_sb_context_destroy(rctx->sb_context);
+
        pipe_resource_reference((struct pipe_resource**)&rctx->dummy_cmask, NULL);
        pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
 
index 33c85fc5454dfd63c59894ab64e20fa2e44b9924..322989a839d9130ebdaa5fba220ac2777d8cdb92 100644 (file)
@@ -257,6 +257,12 @@ typedef boolean (*r600g_dma_blit_t)(struct pipe_context *ctx,
 #define DBG_NO_CP_DMA          (1 << 18)
 #define DBG_NO_ASYNC_DMA       (1 << 19)
 #define DBG_NO_DISCARD_RANGE   (1 << 20)
+/* shader backend */
+#define DBG_SB                 (1 << 21)
+#define DBG_SB_CS              (1 << 22)
+#define DBG_SB_DRY_RUN (1 << 23)
+#define DBG_SB_STAT            (1 << 24)
+#define DBG_SB_DUMP            (1 << 25)
 
 struct r600_tiling_info {
        unsigned num_channels;
@@ -640,6 +646,7 @@ struct r600_context {
        unsigned                        current_render_cond_mode;
        boolean                         predicate_drawing;
 
+       void                            *sb_context;
        struct r600_isa         *isa;
 };
 
index f0d3be405d23ae089f1317cb120261c807fa3eb6..fd3fe3933a0cdb7c0159d19981021f259e63b1e2 100644 (file)
@@ -27,6 +27,8 @@
 #include "r600_shader.h"
 #include "r600d.h"
 
+#include "sb/sb_public.h"
+
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
@@ -62,6 +64,26 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                 struct r600_pipe_shader *pipeshader,
                                 struct r600_shader_key key);
 
+static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
+                           int size, unsigned comp_mask) {
+
+       if (!size)
+               return;
+
+       if (ps->num_arrays == ps->max_arrays) {
+               ps->max_arrays += 64;
+               ps->arrays = realloc(ps->arrays, ps->max_arrays *
+                                    sizeof(struct r600_shader_array));
+       }
+
+       int n = ps->num_arrays;
+       ++ps->num_arrays;
+
+       ps->arrays[n].comp_mask = comp_mask;
+       ps->arrays[n].gpr_start = start_gpr;
+       ps->arrays[n].gpr_count = size;
+}
+
 static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
 {
        struct tgsi_parse_context parse;
@@ -118,6 +140,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
        int r, i;
        uint32_t *ptr;
        bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens));
+       unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
 
        shader->shader.bc.isa = rctx->isa;
 
@@ -139,12 +162,22 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                R600_ERR("building bytecode failed !\n");
                return r;
        }
+
+#if 0
        if (dump) {
                fprintf(stderr, "--------------------------------------------------------------\n");
                r600_bytecode_disasm(&shader->shader.bc);
                fprintf(stderr, "______________________________________________________________\n");
        }
-
+#else
+       if (dump || use_sb) {
+               r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, dump, use_sb);
+               if (r) {
+                       R600_ERR("r600_sb_bytecode_process failed !\n");
+                       return r;
+               }
+       }
+#endif
 
        /* Store the shader in a buffer. */
        if (shader->bo == NULL) {
@@ -273,6 +306,7 @@ int r600_compute_shader_create(struct pipe_context * ctx,
        struct r600_shader_ctx shader_ctx;
        boolean use_kill = false;
        bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0;
+       unsigned use_sb = r600_ctx->screen->debug_flags & DBG_SB_CS;
 
        shader_ctx.bc = bytecode;
        r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family,
@@ -286,9 +320,18 @@ int r600_compute_shader_create(struct pipe_context * ctx,
                cm_bytecode_add_cf_end(shader_ctx.bc);
        }
        r600_bytecode_build(shader_ctx.bc);
+
+#if 0
        if (dump) {
                r600_bytecode_disasm(shader_ctx.bc);
        }
+#else
+       if (dump || use_sb) {
+               if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL, dump, use_sb))
+                       R600_ERR("r600_sb_bytecode_process failed!\n");
+       }
+#endif
+
        free(bytes);
        return 1;
 }
@@ -956,8 +999,18 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                        }
                }
                break;
-       case TGSI_FILE_CONSTANT:
        case TGSI_FILE_TEMPORARY:
+               if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+                       if (d->Array.ArrayID) {
+                               r600_add_gpr_array(ctx->shader,
+                                              ctx->file_offset[TGSI_FILE_TEMPORARY] +
+                                                                  d->Range.First,
+                                              d->Range.Last - d->Range.First + 1, 0b1111);
+                       }
+               }
+               break;
+
+       case TGSI_FILE_CONSTANT:
        case TGSI_FILE_SAMPLER:
        case TGSI_FILE_ADDRESS:
                break;
@@ -1248,6 +1301,7 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+
 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                 struct r600_pipe_shader *pipeshader,
                                 struct r600_shader_key key)
@@ -1267,6 +1321,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        bool use_llvm = false;
        unsigned char * inst_bytes = NULL;
        unsigned inst_byte_count = 0;
+       bool indirect_gprs;
 
 #ifdef R600_USE_LLVM
        use_llvm = !(rscreen->debug_flags & DBG_NO_LLVM);
@@ -1279,6 +1334,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                           rscreen->msaa_texture_support);
        ctx.tokens = tokens;
        tgsi_scan_shader(tokens, &ctx.info);
+       shader->indirect_files = ctx.info.indirect_files;
+       indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
        tgsi_parse_init(&ctx.parse, tokens);
        ctx.type = ctx.parse.FullHeader.Processor.Processor;
        shader->processor_type = ctx.type;
@@ -1356,6 +1413,24 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
        ctx.temp_reg = ctx.bc->ar_reg + 1;
 
+       if (indirect_gprs) {
+               shader->max_arrays = 0;
+               shader->num_arrays = 0;
+
+               if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
+                       r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
+                                          ctx.file_offset[TGSI_FILE_OUTPUT] -
+                                          ctx.file_offset[TGSI_FILE_INPUT],
+                                          0b1111);
+               }
+               if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+                       r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT],
+                                          ctx.file_offset[TGSI_FILE_TEMPORARY] -
+                                          ctx.file_offset[TGSI_FILE_OUTPUT],
+                                          0b1111);
+               }
+       }
+
        ctx.nliterals = 0;
        ctx.literals = NULL;
        shader->fs_write_all = FALSE;
index 17441ed2298108b3083849d1f89f5b9b8ef141ec..411667ae89eac58bb52bdbb3ca28fd0f9efeb9ff 100644 (file)
@@ -63,6 +63,11 @@ struct r600_shader {
        boolean                 vs_out_point_size;
        boolean                 has_txq_cube_array_z_comp;
        boolean                 uses_tex_buffers;
+
+       unsigned                indirect_files;
+       unsigned                max_arrays;
+       unsigned                num_arrays;
+       struct r600_shader_array * arrays;
 };
 
 struct r600_shader_key {
@@ -71,6 +76,12 @@ struct r600_shader_key {
        unsigned nr_cbufs:4;
 };
 
+struct r600_shader_array {
+       unsigned gpr_start;
+       unsigned gpr_count;
+       unsigned comp_mask;
+};
+
 struct r600_pipe_shader {
        struct r600_pipe_shader_selector *selector;
        struct r600_pipe_shader *next_variant;