r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs
authorVincent Lejeune <vljn@ovi.com>
Sat, 29 Sep 2012 14:49:13 +0000 (16:49 +0200)
committerVincent Lejeune <vljn@ovi.com>
Fri, 2 Nov 2012 22:19:11 +0000 (23:19 +0100)
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_llvm.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/radeon/radeon_llvm.h

index 310d42425741f8fb3214955f470a7c0d53abdf4a..70dc94af2efe9ce9debed92071c3e7480aaaeda7 100644 (file)
@@ -145,3 +145,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
        }
        return 0;
 }
+
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
+{
+       output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+       output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+       output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+       output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+       output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+       output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+       output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+       output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+       output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+       output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+       output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+       output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
index f04a92062f62b1f8ffc9a556913330141cc38941..5f2548e31949cc5da41a828ac21385c6f90e1f8e 100644 (file)
@@ -2947,3 +2947,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3
                        G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
        }
 }
+
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
+{
+       output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
+       output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
+       output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
+       output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
+
+       output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+       output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+       output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+       output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+       output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+       output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+       output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1));
+       output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+}
index 2c7db2cefd792cda604d3205ee3047147acc3591..f3b036d08c837f8dc4cc321972de43d42361a228 100644 (file)
@@ -247,5 +247,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
 void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf);
 int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id);
 void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
+void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
+void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 
 #endif
index 3dec8ae25372b329161dd451be09a5c865a97bd0..b3d4e6bab6886793f2368252460be221d9ad2d22 100644 (file)
@@ -229,6 +229,9 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
        struct lp_build_context * base = &bld_base->base;
        unsigned i;
+       
+       unsigned color_count = 0;
+       boolean has_color = false;
 
        /* Add the necessary export instructions */
        for (i = 0; i < ctx->output_reg_count; i++) {
@@ -237,20 +240,72 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
                        LLVMValueRef output;
                        unsigned adjusted_reg_idx = i +
                                        ctx->reserved_reg_count;
-                       LLVMValueRef reg_index = lp_build_const_int32(
-                               base->gallivm,
-                               radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
 
                        output = LLVMBuildLoad(base->gallivm->builder,
                                ctx->soa.outputs[i][chan], "");
 
-                       lp_build_intrinsic_binary(
-                               base->gallivm->builder,
-                               "llvm.AMDGPU.store.output",
-                               LLVMVoidTypeInContext(base->gallivm->context),
-                               output, reg_index);
+                       if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                               LLVMValueRef reg_index = lp_build_const_int32(
+                                       base->gallivm,
+                                       radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
+                               lp_build_intrinsic_binary(
+                                       base->gallivm->builder,
+                                       "llvm.AMDGPU.store.output",
+                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                       output, reg_index);
+                       } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                               switch (ctx->r600_outputs[i].name) {
+                               case TGSI_SEMANTIC_COLOR:
+                                       has_color = true;
+                                       if ( color_count/4 < ctx->color_buffer_count) {
+                                               if (ctx->fs_color_all) {
+                                                       for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
+                                                               LLVMValueRef reg_index = lp_build_const_int32(
+                                                                       base->gallivm,
+                                                                       (j * 4) + chan);
+                                                               lp_build_intrinsic_binary(
+                                                                       base->gallivm->builder,
+                                                                       "llvm.R600.store.pixel.color",
+                                                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                                                       output, reg_index);
+                                                       }
+                                               } else {
+                                                       LLVMValueRef reg_index = lp_build_const_int32(
+                                                               base->gallivm,
+                                                               (color_count++/4) * 4 + chan);
+                                                       lp_build_intrinsic_binary(
+                                                               base->gallivm->builder,
+                                                               "llvm.R600.store.pixel.color",
+                                                               LLVMVoidTypeInContext(base->gallivm->context),
+                                                               output, reg_index);
+                                               }
+                                       }
+                                       break;
+                               case TGSI_SEMANTIC_POSITION:
+                                       if (chan != 2)
+                                               continue;
+                                       lp_build_intrinsic_unary(
+                                               base->gallivm->builder,
+                                               "llvm.R600.store.pixel.depth",
+                                               LLVMVoidTypeInContext(base->gallivm->context),
+                                               output);
+                                       break;
+                               case TGSI_SEMANTIC_STENCIL:
+                                       if (chan != 1)
+                                               continue;
+                                       lp_build_intrinsic_unary(
+                                               base->gallivm->builder,
+                                               "llvm.R600.store.pixel.stencil",
+                                               LLVMVoidTypeInContext(base->gallivm->context),
+                                               output);
+                                       break;
+                               }
+                       }
                }
        }
+
+       if (!has_color && ctx->type == TGSI_PROCESSOR_FRAGMENT)
+               lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.pixel.dummy", LLVMVoidTypeInContext(base->gallivm->context), 0, 0);
 }
 
 static void llvm_emit_tex(
index a4d94bb4af502fc1ff77a1562fde0f14e7aad63c..4634c30b6bce0820fc173c0d095a339dc348d493 100644 (file)
@@ -526,6 +526,21 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
        return bytes_read;
 }
 
+static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
+       unsigned char * bytes, unsigned bytes_read)
+{
+       struct r600_bytecode_output output;
+       memset(&output, 0, sizeof(struct r600_bytecode_output));
+       uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read);
+       uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read);
+       if (ctx->bc->chip_class >= EVERGREEN)
+               eg_bytecode_export_read(&output, word0,word1);
+       else
+               r600_bytecode_export_read(&output, word0,word1);
+       r600_bytecode_add_output(ctx->bc, &output);
+       return bytes_read;
+}
+
 static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
                                unsigned char * bytes,  unsigned num_bytes)
 {
@@ -560,6 +575,10 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
                        bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
                                                                bytes_read);
                        break;
+               case 5:
+            bytes_read = r600_export_from_byte_stream(ctx, bytes,
+                                bytes_read);
+            break;
                default:
                        /* XXX: Error here */
                        break;
@@ -1360,7 +1379,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                radeon_llvm_ctx.two_side = shader->two_side;
                radeon_llvm_ctx.face_input = ctx.face_gpr;
                radeon_llvm_ctx.r600_inputs = ctx.shader->input;
+               radeon_llvm_ctx.r600_outputs = ctx.shader->output;
+               radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
                radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
+               radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
                mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
                if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
                        dump = 1;
@@ -1730,10 +1752,12 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                }
        }
        /* add output to bytecode */
-       for (i = 0; i < noutput; i++) {
-               r = r600_bytecode_add_output(ctx.bc, &output[i]);
-               if (r)
-                       goto out_err;
+       if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) {
+               for (i = 0; i < noutput; i++) {
+                       r = r600_bytecode_add_output(ctx.bc, &output[i]);
+                       if (r)
+                               goto out_err;
+               }
        }
        /* add program end */
        if (ctx.bc->chip_class == CAYMAN)
index 6118b118bb8afe9a27f916cd9b51851eeb6817cf..61975c4de090de1b8d106812e7868855b70b875e 100644 (file)
@@ -59,6 +59,9 @@ struct radeon_llvm_context {
        unsigned face_input;
        unsigned two_side;
        struct r600_shader_io * r600_inputs;
+       struct r600_shader_io * r600_outputs;
+       unsigned color_buffer_count;
+       unsigned fs_color_all;
 
        /*=== Front end configuration ===*/