radeon/llvm: Always build libradeonllvm as static
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
index 6dbca505cbe772528f6d998070a344c1422f1b41..9afd57f71a241098f76639c6e0a811c0a29351ac 100644 (file)
@@ -27,6 +27,8 @@
 #include "r600_shader.h"
 #include "r600d.h"
 
+#include "sb/sb_public.h"
+
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
@@ -62,6 +64,26 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                 struct r600_pipe_shader *pipeshader,
                                 struct r600_shader_key key);
 
+static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr,
+                           int size, unsigned comp_mask) {
+
+       if (!size)
+               return;
+
+       if (ps->num_arrays == ps->max_arrays) {
+               ps->max_arrays += 64;
+               ps->arrays = realloc(ps->arrays, ps->max_arrays *
+                                    sizeof(struct r600_shader_array));
+       }
+
+       int n = ps->num_arrays;
+       ++ps->num_arrays;
+
+       ps->arrays[n].comp_mask = comp_mask;
+       ps->arrays[n].gpr_start = start_gpr;
+       ps->arrays[n].gpr_count = size;
+}
+
 static unsigned tgsi_get_processor_type(const struct tgsi_token *tokens)
 {
        struct tgsi_parse_context parse;
@@ -118,6 +140,8 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
        int r, i;
        uint32_t *ptr;
        bool dump = r600_can_dump_shader(rctx->screen, tgsi_get_processor_type(sel->tokens));
+       unsigned use_sb = rctx->screen->debug_flags & DBG_SB;
+       unsigned sb_disasm = use_sb || (rctx->screen->debug_flags & DBG_SB_DISASM);
 
        shader->shader.bc.isa = rctx->isa;
 
@@ -139,13 +163,20 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
                R600_ERR("building bytecode failed !\n");
                return r;
        }
-       if (dump) {
+
+       if (dump && !sb_disasm) {
                fprintf(stderr, "--------------------------------------------------------------\n");
                r600_bytecode_disasm(&shader->shader.bc);
                fprintf(stderr, "______________________________________________________________\n");
+       } else if ((dump && sb_disasm) || use_sb) {
+               r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader,
+                                            dump, use_sb);
+               if (r) {
+                       R600_ERR("r600_sb_bytecode_process failed !\n");
+                       return r;
+               }
        }
 
-
        /* Store the shader in a buffer. */
        if (shader->bo == NULL) {
                shader->bo = (struct r600_resource*)
@@ -271,22 +302,32 @@ int r600_compute_shader_create(struct pipe_context * ctx,
        unsigned char * bytes;
        unsigned byte_count;
        struct r600_shader_ctx shader_ctx;
+       boolean use_kill = false;
        bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0;
+       unsigned use_sb = r600_ctx->screen->debug_flags & DBG_SB_CS;
+       unsigned sb_disasm = use_sb ||
+                       (r600_ctx->screen->debug_flags & DBG_SB_DISASM);
 
-       r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
        shader_ctx.bc = bytecode;
        r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family,
                           r600_ctx->screen->msaa_texture_support);
        shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
        shader_ctx.bc->isa = r600_ctx->isa;
+       r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family,
+                               shader_ctx.bc, &use_kill, dump);
        r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
        if (shader_ctx.bc->chip_class == CAYMAN) {
                cm_bytecode_add_cf_end(shader_ctx.bc);
        }
        r600_bytecode_build(shader_ctx.bc);
-       if (dump) {
+
+       if (dump && !sb_disasm) {
                r600_bytecode_disasm(shader_ctx.bc);
+       } else if ((dump && sb_disasm) || use_sb) {
+               if (r600_sb_bytecode_process(r600_ctx, shader_ctx.bc, NULL, dump, use_sb))
+                       R600_ERR("r600_sb_bytecode_process failed!\n");
        }
+
        free(bytes);
        return 1;
 }
@@ -590,7 +631,6 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
                                unsigned char * bytes,  unsigned num_bytes)
 {
        unsigned bytes_read = 0;
-       ctx->bc->nstack = bytes[bytes_read++];
        unsigned i, byte;
        while (bytes_read < num_bytes) {
                char inst_type = bytes[bytes_read++];
@@ -955,8 +995,18 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                        }
                }
                break;
-       case TGSI_FILE_CONSTANT:
        case TGSI_FILE_TEMPORARY:
+               if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
+                       if (d->Array.ArrayID) {
+                               r600_add_gpr_array(ctx->shader,
+                                              ctx->file_offset[TGSI_FILE_TEMPORARY] +
+                                                                  d->Range.First,
+                                              d->Range.Last - d->Range.First + 1, 0x0F);
+                       }
+               }
+               break;
+
+       case TGSI_FILE_CONSTANT:
        case TGSI_FILE_SAMPLER:
        case TGSI_FILE_ADDRESS:
                break;
@@ -1247,6 +1297,7 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+
 static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                 struct r600_pipe_shader *pipeshader,
                                 struct r600_shader_key key)
@@ -1266,6 +1317,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        bool use_llvm = false;
        unsigned char * inst_bytes = NULL;
        unsigned inst_byte_count = 0;
+       bool indirect_gprs;
 
 #ifdef R600_USE_LLVM
        use_llvm = !(rscreen->debug_flags & DBG_NO_LLVM);
@@ -1278,6 +1330,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                           rscreen->msaa_texture_support);
        ctx.tokens = tokens;
        tgsi_scan_shader(tokens, &ctx.info);
+       shader->indirect_files = ctx.info.indirect_files;
+       indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT);
        tgsi_parse_init(&ctx.parse, tokens);
        ctx.type = ctx.parse.FullHeader.Processor.Processor;
        shader->processor_type = ctx.type;
@@ -1355,6 +1409,24 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
        ctx.temp_reg = ctx.bc->ar_reg + 1;
 
+       if (indirect_gprs) {
+               shader->max_arrays = 0;
+               shader->num_arrays = 0;
+
+               if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) {
+                       r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT],
+                                          ctx.file_offset[TGSI_FILE_OUTPUT] -
+                                          ctx.file_offset[TGSI_FILE_INPUT],
+                                          0x0F);
+               }
+               if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
+                       r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT],
+                                          ctx.file_offset[TGSI_FILE_TEMPORARY] -
+                                          ctx.file_offset[TGSI_FILE_OUTPUT],
+                                          0x0F);
+               }
+       }
+
        ctx.nliterals = 0;
        ctx.literals = NULL;
        shader->fs_write_all = FALSE;
@@ -1445,6 +1517,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                struct radeon_llvm_context radeon_llvm_ctx;
                LLVMModuleRef mod;
                bool dump = r600_can_dump_shader(rscreen, ctx.type);
+               boolean use_kill = false;
 
                memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
                radeon_llvm_ctx.type = ctx.type;
@@ -1461,7 +1534,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
 
                if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
-                                     rscreen->family, dump)) {
+                                     rscreen->family, ctx.bc, &use_kill, dump)) {
                        FREE(inst_bytes);
                        radeon_llvm_dispose(&radeon_llvm_ctx);
                        use_llvm = 0;
@@ -1471,6 +1544,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        ctx.file_offset[TGSI_FILE_OUTPUT] =
                                        ctx.file_offset[TGSI_FILE_INPUT];
                }
+               if (use_kill)
+                       ctx.shader->uses_kill = use_kill;
                radeon_llvm_dispose(&radeon_llvm_ctx);
        }
 #endif
@@ -1898,7 +1973,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                }
        }
        /* add program end */
-       if (ctx.bc->chip_class == CAYMAN)
+       if (!use_llvm && ctx.bc->chip_class == CAYMAN)
                cm_bytecode_add_cf_end(ctx.bc);
 
        /* check GPR limit - we have 124 = 128 - 4
@@ -4739,6 +4814,26 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                /* the array index is read from Z */
                tex.coord_type_z = 0;
 
+       /* mask unused source components */
+       if (opcode == FETCH_OP_SAMPLE) {
+               switch (inst->Texture.Texture) {
+               case TGSI_TEXTURE_2D:
+               case TGSI_TEXTURE_RECT:
+                       tex.src_sel_z = 7;
+                       tex.src_sel_w = 7;
+                       break;
+               case TGSI_TEXTURE_1D_ARRAY:
+                       tex.src_sel_y = 7;
+                       tex.src_sel_w = 7;
+                       break;
+               case TGSI_TEXTURE_1D:
+                       tex.src_sel_y = 7;
+                       tex.src_sel_z = 7;
+                       tex.src_sel_w = 7;
+                       break;
+               }
+       }
+
        r = r600_bytecode_add_tex(ctx->bc, &tex);
        if (r)
                return r;
@@ -5490,7 +5585,7 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
        return 0;
 }
 
-static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type)
 {
        struct r600_bytecode_alu alu;
        int r;
@@ -5510,7 +5605,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 
        alu.last = 1;
 
-       r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE);
+       r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
        if (r)
                return r;
        return 0;
@@ -5728,9 +5823,21 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
 }
 #endif
 
-static int tgsi_if(struct r600_shader_ctx *ctx)
+static int emit_if(struct r600_shader_ctx *ctx, int opcode)
 {
-       emit_logic_pred(ctx, ALU_OP2_PRED_SETNE_INT);
+       int alu_type = CF_OP_ALU_PUSH_BEFORE;
+
+       /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by
+        * LOOP_STARTxxx for nested loops may put the branch stack into a state
+        * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this
+        * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */
+       if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) {
+               r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH);
+               ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+               alu_type = CF_OP_ALU;
+       }
+
+       emit_logic_pred(ctx, opcode, alu_type);
 
        r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
 
@@ -5740,6 +5847,16 @@ static int tgsi_if(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+       return emit_if(ctx, ALU_OP2_PRED_SETNE);
+}
+
+static int tgsi_uif(struct r600_shader_ctx *ctx)
+{
+       return emit_if(ctx, ALU_OP2_PRED_SETNE_INT);
+}
+
 static int tgsi_else(struct r600_shader_ctx *ctx)
 {
        r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE);
@@ -5991,8 +6108,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_TXL,       0, FETCH_OP_SAMPLE_L, tgsi_tex},
        {TGSI_OPCODE_BRK,       0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
        {TGSI_OPCODE_IF,        0, ALU_OP0_NOP, tgsi_if},
-       /* gap */
-       {75,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UIF,       0, ALU_OP0_NOP, tgsi_uif},
        {76,                    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_ELSE,      0, ALU_OP0_NOP, tgsi_else},
        {TGSI_OPCODE_ENDIF,     0, ALU_OP0_NOP, tgsi_endif},
@@ -6035,7 +6151,8 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {111,                   0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_NRM4,      0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_CALLNZ,    0, ALU_OP0_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_IFC,       0, ALU_OP0_NOP, tgsi_unsupported},
+       /* gap */
+       {114,                   0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_BREAKC,    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_KIL,       0, ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
        {TGSI_OPCODE_END,       0, ALU_OP0_NOP, tgsi_end},  /* aka HALT */
@@ -6184,8 +6301,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_TXL,       0, FETCH_OP_SAMPLE_L, tgsi_tex},
        {TGSI_OPCODE_BRK,       0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
        {TGSI_OPCODE_IF,        0, ALU_OP0_NOP, tgsi_if},
-       /* gap */
-       {75,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UIF,       0, ALU_OP0_NOP, tgsi_uif},
        {76,                    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_ELSE,      0, ALU_OP0_NOP, tgsi_else},
        {TGSI_OPCODE_ENDIF,     0, ALU_OP0_NOP, tgsi_endif},
@@ -6228,7 +6344,8 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = {
        {111,                   0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_NRM4,      0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_CALLNZ,    0, ALU_OP0_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_IFC,       0, ALU_OP0_NOP, tgsi_unsupported},
+       /* gap */
+       {114,                   0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_BREAKC,    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_KIL,       0, ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
        {TGSI_OPCODE_END,       0, ALU_OP0_NOP, tgsi_end},  /* aka HALT */
@@ -6377,8 +6494,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_TXL,       0, FETCH_OP_SAMPLE_L, tgsi_tex},
        {TGSI_OPCODE_BRK,       0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont},
        {TGSI_OPCODE_IF,        0, ALU_OP0_NOP, tgsi_if},
-       /* gap */
-       {75,                    0, ALU_OP0_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_UIF,       0, ALU_OP0_NOP, tgsi_uif},
        {76,                    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_ELSE,      0, ALU_OP0_NOP, tgsi_else},
        {TGSI_OPCODE_ENDIF,     0, ALU_OP0_NOP, tgsi_endif},
@@ -6421,7 +6537,8 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = {
        {111,                   0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_NRM4,      0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_CALLNZ,    0, ALU_OP0_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_IFC,       0, ALU_OP0_NOP, tgsi_unsupported},
+       /* gap */
+       {114,                   0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_BREAKC,    0, ALU_OP0_NOP, tgsi_unsupported},
        {TGSI_OPCODE_KIL,       0, ALU_OP2_KILLGT, tgsi_kill},  /* conditional kill */
        {TGSI_OPCODE_END,       0, ALU_OP0_NOP, tgsi_end},  /* aka HALT */