radeonsi: pass TGSI processor type to si_compile_llvm for dumping
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index 14f12dfb8213fa29e83306ce6ad6c7e22891c632..c2e802e69f22ea8509397c553c3c1923fab16b04 100644 (file)
@@ -82,11 +82,11 @@ struct si_shader_context
        int param_es2gs_offset;
        LLVMTargetMachineRef tm;
        LLVMValueRef const_md;
-       LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
+       LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
        LLVMValueRef lds;
        LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
-       LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
-       LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
+       LLVMValueRef sampler_views[SI_NUM_SAMPLER_VIEWS];
+       LLVMValueRef sampler_states[SI_NUM_SAMPLER_STATES];
        LLVMValueRef so_buffers[4];
        LLVMValueRef esgs_ring;
        LLVMValueRef gsvs_ring[4];
@@ -394,7 +394,7 @@ static void declare_input_vs(
        LLVMValueRef input;
 
        /* Load the T list */
-       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
+       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFERS);
 
        t_offset = lp_build_const_int32(gallivm, input_index);
 
@@ -594,6 +594,14 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
                            lp_build_const_int32(gallivm, swizzle));
 
        value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+       if (type == TGSI_TYPE_DOUBLE) {
+               LLVMValueRef value2;
+               dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+                                      lp_build_const_int32(gallivm, swizzle + 1));
+               value2 = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
+               return radeon_llvm_emit_fetch_double(bld_base, value, value2);
+       }
+
        return LLVMBuildBitCast(gallivm->builder, value,
                                tgsi2llvmtype(bld_base, type), "");
 }
@@ -733,6 +741,7 @@ static LLVMValueRef fetch_input_gs(
        unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
        unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
        unsigned param;
+       LLVMValueRef value;
 
        if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
                return get_primitive_id(bld_base, swizzle);
@@ -774,11 +783,22 @@ static LLVMValueRef fetch_input_gs(
        args[7] = uint->zero; /* SLC */
        args[8] = uint->zero; /* TFE */
 
+       value = lp_build_intrinsic(gallivm->builder,
+                                  "llvm.SI.buffer.load.dword.i32.i32",
+                                  i32, args, 9,
+                                  LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
+       if (type == TGSI_TYPE_DOUBLE) {
+               LLVMValueRef value2;
+               args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
+               value2 = lp_build_intrinsic(gallivm->builder,
+                                           "llvm.SI.buffer.load.dword.i32.i32",
+                                           i32, args, 9,
+                                           LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
+               return radeon_llvm_emit_fetch_double(bld_base,
+                                                    value, value2);
+       }
        return LLVMBuildBitCast(gallivm->builder,
-                               lp_build_intrinsic(gallivm->builder,
-                                               "llvm.SI.buffer.load.dword.i32.i32",
-                                               i32, args, 9,
-                                               LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
+                               value,
                                tgsi2llvmtype(bld_base, type), "");
 }
 
@@ -1045,7 +1065,7 @@ static LLVMValueRef load_sample_position(struct radeon_llvm_context *radeon_bld,
        struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
        struct gallivm_state *gallivm = &radeon_bld->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
-       LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+       LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
        LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
        LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
 
@@ -1213,13 +1233,13 @@ static LLVMValueRef fetch_constant(
        }
 
        if (reg->Register.Dimension && reg->Dimension.Indirect) {
-               LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+               LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
                LLVMValueRef index;
                index = get_indirect_index(si_shader_ctx, &reg->DimIndirect,
                                                   reg->Dimension.Index);
                bufp = build_indexed_load_const(si_shader_ctx, ptr, index);
        } else
-               bufp = si_shader_ctx->const_resource[buf];
+               bufp = si_shader_ctx->const_buffers[buf];
 
        addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
        addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
@@ -1240,7 +1260,7 @@ static LLVMValueRef fetch_constant(
                addr2 = lp_build_add(&bld_base->uint_bld, addr2,
                                     lp_build_const_int32(base->gallivm, idx * 4));
 
-               result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
+               result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_buffers[buf],
                                   addr2, bld_base->base.elem_type);
 
                result = radeon_llvm_emit_fetch_double(bld_base,
@@ -1412,7 +1432,7 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
        unsigned chan;
        unsigned const_chan;
        LLVMValueRef base_elt;
-       LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+       LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
        LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, SI_DRIVER_STATE_CONST_BUF);
        LLVMValueRef const_resource = build_indexed_load_const(si_shader_ctx, ptr, constbuf_index);
 
@@ -2370,10 +2390,10 @@ static void tex_fetch_ptrs(
 
                ind_index = get_indirect_index(si_shader_ctx, &reg->Indirect, reg->Register.Index);
 
-               *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+               *res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
                *res_ptr = build_indexed_load_const(si_shader_ctx, *res_ptr, ind_index);
 
-               *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+               *samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
                *samp_ptr = build_indexed_load_const(si_shader_ctx, *samp_ptr, ind_index);
 
                if (target == TGSI_TEXTURE_2D_MSAA ||
@@ -2381,13 +2401,13 @@ static void tex_fetch_ptrs(
                        ind_index = LLVMBuildAdd(gallivm->builder, ind_index,
                                                 lp_build_const_int32(gallivm,
                                                                      SI_FMASK_TEX_OFFSET), "");
-                       *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
+                       *fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
                        *fmask_ptr = build_indexed_load_const(si_shader_ctx, *fmask_ptr, ind_index);
                }
        } else {
-               *res_ptr = si_shader_ctx->resources[sampler_index];
-               *samp_ptr = si_shader_ctx->samplers[sampler_index];
-               *fmask_ptr = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
+               *res_ptr = si_shader_ctx->sampler_views[sampler_index];
+               *samp_ptr = si_shader_ctx->sampler_states[sampler_index];
+               *fmask_ptr = si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + sampler_index];
        }
 }
 
@@ -3412,15 +3432,15 @@ static void create_function(struct si_shader_context *si_shader_ctx)
        v16i8 = LLVMVectorType(i8, 16);
 
        params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS);
-       params[SI_PARAM_CONST] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
-       params[SI_PARAM_SAMPLER] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
-       params[SI_PARAM_RESOURCE] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
-       last_array_pointer = SI_PARAM_RESOURCE;
+       params[SI_PARAM_CONST_BUFFERS] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
+       params[SI_PARAM_SAMPLER_STATES] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
+       params[SI_PARAM_SAMPLER_VIEWS] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
+       last_array_pointer = SI_PARAM_SAMPLER_VIEWS;
 
        switch (si_shader_ctx->type) {
        case TGSI_PROCESSOR_VERTEX:
-               params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, SI_NUM_VERTEX_BUFFERS);
-               last_array_pointer = SI_PARAM_VERTEX_BUFFER;
+               params[SI_PARAM_VERTEX_BUFFERS] = const_array(v16i8, SI_NUM_VERTEX_BUFFERS);
+               last_array_pointer = SI_PARAM_VERTEX_BUFFERS;
                params[SI_PARAM_BASE_VERTEX] = i32;
                params[SI_PARAM_START_INSTANCE] = i32;
                num_params = SI_PARAM_START_INSTANCE+1;
@@ -3432,8 +3452,8 @@ static void create_function(struct si_shader_context *si_shader_ctx)
                        num_params = SI_PARAM_LS_OUT_LAYOUT+1;
                } else {
                        if (shader->is_gs_copy_shader) {
-                               last_array_pointer = SI_PARAM_CONST;
-                               num_params = SI_PARAM_CONST+1;
+                               last_array_pointer = SI_PARAM_CONST_BUFFERS;
+                               num_params = SI_PARAM_CONST_BUFFERS+1;
                        } else {
                                params[SI_PARAM_VS_STATE_BITS] = i32;
                                num_params = SI_PARAM_VS_STATE_BITS+1;
@@ -3590,7 +3610,7 @@ static void preload_constants(struct si_shader_context *si_shader_ctx)
        struct gallivm_state * gallivm = bld_base->base.gallivm;
        const struct tgsi_shader_info * info = bld_base->info;
        unsigned buf;
-       LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+       LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
 
        for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
                unsigned i, num_const = info->const_file_max[buf] + 1;
@@ -3602,14 +3622,14 @@ static void preload_constants(struct si_shader_context *si_shader_ctx)
                si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
 
                /* Load the resource descriptor */
-               si_shader_ctx->const_resource[buf] =
+               si_shader_ctx->const_buffers[buf] =
                        build_indexed_load_const(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf));
 
                /* Load the constants, we rely on the code sinking to do the rest */
                for (i = 0; i < num_const * 4; ++i) {
                        si_shader_ctx->constants[buf][i] =
                                buffer_load_const(gallivm->builder,
-                                       si_shader_ctx->const_resource[buf],
+                                       si_shader_ctx->const_buffers[buf],
                                        lp_build_const_int32(gallivm, i * 4),
                                        bld_base->base.elem_type);
                }
@@ -3630,23 +3650,23 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx)
        if (num_samplers == 0)
                return;
 
-       res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-       samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
+       res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_VIEWS);
+       samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER_STATES);
 
        /* Load the resources and samplers, we rely on the code sinking to do the rest */
        for (i = 0; i < num_samplers; ++i) {
                /* Resource */
                offset = lp_build_const_int32(gallivm, i);
-               si_shader_ctx->resources[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
+               si_shader_ctx->sampler_views[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
 
                /* Sampler */
                offset = lp_build_const_int32(gallivm, i);
-               si_shader_ctx->samplers[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
+               si_shader_ctx->sampler_states[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
 
                /* FMASK resource */
                if (info->is_msaa_sampler[i]) {
                        offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i);
-                       si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + i] =
+                       si_shader_ctx->sampler_views[SI_FMASK_TEX_OFFSET + i] =
                                build_indexed_load_const(si_shader_ctx, res_ptr, offset);
                }
        }
@@ -3807,7 +3827,7 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
        if (!shader->bo)
                return -ENOMEM;
 
-       ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
+       ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
                                        PIPE_TRANSFER_READ_WRITE);
        util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
        if (binary->rodata_size > 0) {
@@ -3816,14 +3836,61 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
                                        binary->rodata_size);
        }
 
-       sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
+       sscreen->b.ws->buffer_unmap(shader->bo->buf);
        return 0;
 }
 
-int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
+static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
+                                      struct pipe_debug_callback *debug)
+{
+       char *line, *p;
+       unsigned i, count;
+
+       if (binary->disasm_string) {
+               fprintf(stderr, "\nShader Disassembly:\n\n");
+               fprintf(stderr, "%s\n", binary->disasm_string);
+
+               if (debug && debug->debug_message) {
+                       /* Very long debug messages are cut off, so send the
+                        * disassembly one line at a time. This causes more
+                        * overhead, but on the plus side it simplifies
+                        * parsing of resulting logs.
+                        */
+                       pipe_debug_message(debug, SHADER_INFO,
+                                          "Shader Disassembly Begin");
+
+                       line = binary->disasm_string;
+                       while (*line) {
+                               p = strchrnul(line, '\n');
+                               count = p - line;
+
+                               if (count) {
+                                       pipe_debug_message(debug, SHADER_INFO,
+                                                          "%.*s", count, line);
+                               }
+
+                               if (!*p)
+                                       break;
+                               line = p + 1;
+                       }
+
+                       pipe_debug_message(debug, SHADER_INFO,
+                                          "Shader Disassembly End");
+               }
+       } else {
+               fprintf(stderr, "SI CODE:\n");
+               for (i = 0; i < binary->code_size; i += 4) {
+                       fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
+                               binary->code[i + 3], binary->code[i + 2],
+                               binary->code[i + 1], binary->code[i]);
+               }
+       }
+}
+
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
+                         struct pipe_debug_callback *debug)
 {
        const struct radeon_shader_binary *binary = &shader->binary;
-       unsigned i;
        int r;
        bool dump  = r600_can_dump_shader(&sscreen->b,
                shader->selector ? shader->selector->tokens : NULL);
@@ -3834,19 +3901,8 @@ int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
                return r;
 
        if (dump) {
-               if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
-                       if (binary->disasm_string) {
-                               fprintf(stderr, "\nShader Disassembly:\n\n");
-                               fprintf(stderr, "%s\n", binary->disasm_string);
-                       } else {
-                               fprintf(stderr, "SI CODE:\n");
-                               for (i = 0; i < binary->code_size; i+=4 ) {
-                                       fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
-                                       binary->code[i + 2], binary->code[i + 1],
-                                       binary->code[i]);
-                               }
-                       }
-               }
+               if (!(sscreen->b.debug_flags & DBG_NO_ASM))
+                       si_shader_dump_disassembly(binary, debug);
 
                fprintf(stderr, "*** SHADER STATS ***\n"
                        "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
@@ -3854,23 +3910,37 @@ int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
                        shader->num_sgprs, shader->num_vgprs, binary->code_size,
                        shader->lds_size, shader->scratch_bytes_per_wave);
        }
+
+       pipe_debug_message(debug, SHADER_INFO,
+                          "Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d",
+                          shader->num_sgprs, shader->num_vgprs, binary->code_size,
+                          shader->lds_size, shader->scratch_bytes_per_wave);
+
        return 0;
 }
 
 int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
-                   LLVMTargetMachineRef tm, LLVMModuleRef mod)
+                   LLVMTargetMachineRef tm, LLVMModuleRef mod,
+                   struct pipe_debug_callback *debug, unsigned processor)
 {
        int r = 0;
        bool dump_asm = r600_can_dump_shader(&sscreen->b,
                                shader->selector ? shader->selector->tokens : NULL);
        bool dump_ir = dump_asm && !(sscreen->b.debug_flags & DBG_NO_IR);
+       unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
 
-       r = radeon_llvm_compile(mod, &shader->binary,
-               r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm);
-       if (r)
-               return r;
+       if (dump_ir || dump_asm)
+               fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
+
+       if (!si_replace_shader(count, &shader->binary)) {
+               r = radeon_llvm_compile(mod, &shader->binary,
+                       r600_get_llvm_processor_name(sscreen->b.family), dump_ir, dump_asm, tm,
+                       debug);
+               if (r)
+                       return r;
+       }
 
-       r = si_shader_binary_read(sscreen, shader);
+       r = si_shader_binary_read(sscreen, shader, debug);
 
        FREE(shader->binary.config);
        FREE(shader->binary.rodata);
@@ -3887,7 +3957,8 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
 /* Generate code for the hardware VS shader stage to go with a geometry shader */
 static int si_generate_gs_copy_shader(struct si_screen *sscreen,
                                      struct si_shader_context *si_shader_ctx,
-                                     struct si_shader *gs, bool dump)
+                                     struct si_shader *gs, bool dump,
+                                     struct pipe_debug_callback *debug)
 {
        struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
        struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
@@ -3954,7 +4025,8 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
                fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
 
        r = si_compile_llvm(sscreen, si_shader_ctx->shader,
-                           si_shader_ctx->tm, bld_base->base.gallivm->module);
+                           si_shader_ctx->tm, bld_base->base.gallivm->module,
+                           debug, TGSI_PROCESSOR_GEOMETRY);
 
        radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
 
@@ -4008,7 +4080,8 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
 }
 
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
-                    struct si_shader *shader)
+                    struct si_shader *shader,
+                    struct pipe_debug_callback *debug)
 {
        struct si_shader_selector *sel = shader->selector;
        struct tgsi_token *tokens = sel->tokens;
@@ -4162,7 +4235,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
        radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
 
        mod = bld_base->base.gallivm->module;
-       r = si_compile_llvm(sscreen, shader, tm, mod);
+       r = si_compile_llvm(sscreen, shader, tm, mod, debug, si_shader_ctx.type);
        if (r) {
                fprintf(stderr, "LLVM failed to compile shader\n");
                goto out;
@@ -4176,7 +4249,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                shader->gs_copy_shader->key = shader->key;
                si_shader_ctx.shader = shader->gs_copy_shader;
                if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
-                                                   shader, dump))) {
+                                                   shader, dump, debug))) {
                        free(shader->gs_copy_shader);
                        shader->gs_copy_shader = NULL;
                        goto out;