r600g/llvm: tgsi to llvm emits stream output intrinsics.
authorVincent Lejeune <vljn@ovi.com>
Fri, 11 Jan 2013 18:48:28 +0000 (19:48 +0100)
committerTom Stellard <thomas.stellard@amd.com>
Fri, 18 Jan 2013 20:34:21 +0000 (20:34 +0000)
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_llvm.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/radeon/radeon_llvm.h

index 70dc94af2efe9ce9debed92071c3e7480aaaeda7..0dc3ffdaa3841feddfe76c4077495f34ed9917ce 100644 (file)
@@ -161,4 +161,6 @@ void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0
        output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
        output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
        output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+       output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
+       output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
 }
index 0a6f63ff9c02e7c8c4fc6e5bb2479ba2c0af1eaf..3aaea4a73e94b2ba40cbcd354b3ab02fd86b80b6 100644 (file)
@@ -2967,4 +2967,6 @@ void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t wor
        output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
        output->inst = R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
        output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+       output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
+       output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
 }
index 9e31171c711a0eccfc5ae371c21b194043847e21..a9f5825c349f80439bd564f3bd9a9f6118b37ecf 100644 (file)
@@ -240,11 +240,43 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 {
        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
        struct lp_build_context * base = &bld_base->base;
+       struct pipe_stream_output_info * so = ctx->stream_outputs;
        unsigned i;
        
        unsigned color_count = 0;
        boolean has_color = false;
 
+       if (ctx->type == TGSI_PROCESSOR_VERTEX && so->num_outputs) {
+               for (i = 0; i < so->num_outputs; i++) {
+                       unsigned register_index = so->output[i].register_index;
+                       unsigned start_component = so->output[i].start_component;
+                       unsigned num_components = so->output[i].num_components;
+                       unsigned dst_offset = so->output[i].dst_offset;
+                       unsigned chan;
+                       LLVMValueRef elements[4];
+                       if (dst_offset < start_component) {
+                               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+                                       elements[chan] = LLVMBuildLoad(base->gallivm->builder,
+                                               ctx->soa.outputs[register_index][(chan + start_component) % TGSI_NUM_CHANNELS], "");
+                               }
+                               start_component = 0;
+                       } else {
+                               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+                                       elements[chan] = LLVMBuildLoad(base->gallivm->builder,
+                                               ctx->soa.outputs[register_index][chan], "");
+                               }
+                       }
+                       LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
+                       LLVMValueRef args[4];
+                       args[0] = output;
+                       args[1] = lp_build_const_int32(base->gallivm, dst_offset - start_component);
+                       args[2] = lp_build_const_int32(base->gallivm, so->output[i].output_buffer);
+                       args[3] = lp_build_const_int32(base->gallivm, ((1 << num_components) - 1) << start_component);
+                       lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.stream.output",
+                               LLVMVoidTypeInContext(base->gallivm->context), args, 4);
+               }
+       }
+
        /* Add the necessary export instructions */
        for (i = 0; i < ctx->output_reg_count; i++) {
                unsigned chan;
index 410ffce28ba1ad16928260392de2466e3fdbcb6f..ad8b91fb08bffa07d143919d0051304b1d8eea6d 100644 (file)
@@ -1416,6 +1416,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
                radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
                radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
+               radeon_llvm_ctx.stream_outputs = &so;
                mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
                if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
                        dump = 1;
@@ -1572,7 +1573,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        }
 
        /* Add stream outputs. */
-       if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
+       if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs && !use_llvm) {
                unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
 
                /* Sanity checking. */
index b8dc771226a3c6cef5fb8bf7fb431f24150aaef6..c5932282901ec4094da6e0579045a348bceb0df2 100644 (file)
@@ -60,6 +60,7 @@ struct radeon_llvm_context {
        unsigned two_side;
        struct r600_shader_io * r600_inputs;
        struct r600_shader_io * r600_outputs;
+       struct pipe_stream_output_info *stream_outputs;
        unsigned color_buffer_count;
        unsigned fs_color_all;