r600g/llvm: tgsi to llvm emits store.swizzle intrinsic for vs/fs output
authorVincent Lejeune <vljn@ovi.com>
Fri, 11 Jan 2013 18:48:29 +0000 (19:48 +0100)
committerTom Stellard <thomas.stellard@amd.com>
Fri, 18 Jan 2013 20:34:26 +0000 (20:34 +0000)
Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
src/gallium/drivers/r600/r600_llvm.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/radeon/radeon_llvm.h

index a9f5825c349f80439bd564f3bd9a9f6118b37ecf..0f0eb84557e754dd951c4055a8fe194d8cf2df5a 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "r600.h"
 #include "r600_asm.h"
+#include "r600_sq.h"
 #include "r600_opcodes.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
@@ -242,7 +243,9 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
        struct lp_build_context * base = &bld_base->base;
        struct pipe_stream_output_info * so = ctx->stream_outputs;
        unsigned i;
-       
+       unsigned next_pos = 60;
+       unsigned next_param = 0;
+
        unsigned color_count = 0;
        boolean has_color = false;
 
@@ -280,70 +283,152 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
        /* Add the necessary export instructions */
        for (i = 0; i < ctx->output_reg_count; i++) {
                unsigned chan;
+               LLVMValueRef elements[4];
                for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                       LLVMValueRef output;
-                       unsigned adjusted_reg_idx = i +
-                                       ctx->reserved_reg_count;
-
-                       output = LLVMBuildLoad(base->gallivm->builder,
+                       elements[chan] = LLVMBuildLoad(base->gallivm->builder,
                                ctx->soa.outputs[i][chan], "");
-
-                       if (ctx->type == TGSI_PROCESSOR_VERTEX) {
-                               LLVMValueRef reg_index = lp_build_const_int32(
-                                       base->gallivm,
-                                       radeon_llvm_reg_index_soa(adjusted_reg_idx, chan));
-                               lp_build_intrinsic_binary(
+               }
+               LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4);
+
+               if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                       switch (ctx->r600_outputs[i].name) {
+                       case TGSI_SEMANTIC_POSITION:
+                       case TGSI_SEMANTIC_PSIZE: {
+                               LLVMValueRef args[3];
+                               args[0] = output;
+                               args[1] = lp_build_const_int32(base->gallivm, next_pos++);
+                               args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
+                               build_intrinsic(
+                                       base->gallivm->builder,
+                                       "llvm.R600.store.swizzle",
+                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                       args, 3, 0);
+                               break;
+                       }
+                       case TGSI_SEMANTIC_CLIPVERTEX: {
+                               LLVMValueRef args[3];
+                               unsigned reg_index;
+                               unsigned base_vector_chan;
+                               LLVMValueRef adjusted_elements[4];
+                               for (reg_index = 0; reg_index < 2; reg_index ++) {
+                                       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+                                               LLVMValueRef offset[2] = {
+                                                       LLVMConstInt(LLVMInt64TypeInContext(bld_base->base.gallivm->context), 0, false),
+                                                       lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan)
+                                               };
+                                               LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024), 9);
+                                               LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, "");
+                                               LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, "");
+                                               LLVMValueRef base_vector = LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, "");
+                                               args[0] = output;
+                                               args[1] = base_vector;
+                                               adjusted_elements[chan] = build_intrinsic(base->gallivm->builder,
+                                                       "llvm.AMDGPU.dp4", bld_base->base.elem_type,
+                                                       args, 2, LLVMReadNoneAttribute);
+                                       }
+                                       args[0] = lp_build_gather_values(base->gallivm,
+                                               adjusted_elements, 4);
+                                       args[1] = lp_build_const_int32(base->gallivm, next_pos++);
+                                       args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
+                                       build_intrinsic(
+                                               base->gallivm->builder,
+                                               "llvm.R600.store.swizzle",
+                                               LLVMVoidTypeInContext(base->gallivm->context),
+                                               args, 3, 0);
+                               }
+                               break;
+                       }
+                       case TGSI_SEMANTIC_CLIPDIST : {
+                               LLVMValueRef args[3];
+                               args[0] = output;
+                               args[1] = lp_build_const_int32(base->gallivm, next_pos++);
+                               args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS);
+                               build_intrinsic(
                                        base->gallivm->builder,
-                                       "llvm.AMDGPU.store.output",
+                                       "llvm.R600.store.swizzle",
                                        LLVMVoidTypeInContext(base->gallivm->context),
-                                       output, reg_index);
-                       } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
-                               switch (ctx->r600_outputs[i].name) {
-                               case TGSI_SEMANTIC_COLOR:
-                                       has_color = true;
-                                       if ( color_count/4 < ctx->color_buffer_count) {
-                                               if (ctx->fs_color_all) {
-                                                       for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
-                                                               LLVMValueRef reg_index = lp_build_const_int32(
-                                                                       base->gallivm,
-                                                                       (j * 4) + chan);
-                                                               lp_build_intrinsic_binary(
-                                                                       base->gallivm->builder,
-                                                                       "llvm.R600.store.pixel.color",
-                                                                       LLVMVoidTypeInContext(base->gallivm->context),
-                                                                       output, reg_index);
-                                                       }
-                                               } else {
-                                                       LLVMValueRef reg_index = lp_build_const_int32(
-                                                               base->gallivm,
-                                                               (color_count++/4) * 4 + chan);
-                                                       lp_build_intrinsic_binary(
+                                       args, 3, 0);
+                               args[1] = lp_build_const_int32(base->gallivm, next_param++);
+                               args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
+                               build_intrinsic(
+                                       base->gallivm->builder,
+                                       "llvm.R600.store.swizzle",
+                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                       args, 3, 0);
+                               break;
+                       }
+                       case TGSI_SEMANTIC_FOG: {
+                               elements[0] = LLVMBuildLoad(base->gallivm->builder,
+                                       ctx->soa.outputs[i][0], "");
+                               elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f);
+                               elements[3] = lp_build_const_float(base->gallivm, 1.0f);
+
+                               LLVMValueRef args[3];
+                               args[0] = lp_build_gather_values(base->gallivm, elements, 4);
+                               args[1] = lp_build_const_int32(base->gallivm, next_param++);
+                               args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
+                               build_intrinsic(
+                                       base->gallivm->builder,
+                                       "llvm.R600.store.swizzle",
+                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                       args, 3, 0);
+                               break;
+                       }
+                       default: {
+                               LLVMValueRef args[3];
+                               args[0] = output;
+                               args[1] = lp_build_const_int32(base->gallivm, next_param++);
+                               args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM);
+                               build_intrinsic(
+                                       base->gallivm->builder,
+                                       "llvm.R600.store.swizzle",
+                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                       args, 3, 0);
+                               break;
+                       }
+                       }
+               } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                       switch (ctx->r600_outputs[i].name) {
+                       case TGSI_SEMANTIC_COLOR:
+                               has_color = true;
+                               if ( color_count < ctx->color_buffer_count) {
+                                       LLVMValueRef args[3];
+                                       args[0] = output;
+                                       if (ctx->fs_color_all) {
+                                               for (unsigned j = 0; j < ctx->color_buffer_count; j++) {
+                                                       args[1] = lp_build_const_int32(base->gallivm, j);
+                                                       args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
+                                                       build_intrinsic(
                                                                base->gallivm->builder,
-                                                               "llvm.R600.store.pixel.color",
+                                                               "llvm.R600.store.swizzle",
                                                                LLVMVoidTypeInContext(base->gallivm->context),
-                                                               output, reg_index);
+                                                               args, 3, 0);
                                                }
+                                       } else {
+                                               args[1] = lp_build_const_int32(base->gallivm, color_count++);
+                                               args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL);
+                                               build_intrinsic(
+                                                       base->gallivm->builder,
+                                                       "llvm.R600.store.swizzle",
+                                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                                       args, 3, 0);
                                        }
-                                       break;
-                               case TGSI_SEMANTIC_POSITION:
-                                       if (chan != 2)
-                                               continue;
-                                       lp_build_intrinsic_unary(
-                                               base->gallivm->builder,
-                                               "llvm.R600.store.pixel.depth",
-                                               LLVMVoidTypeInContext(base->gallivm->context),
-                                               output);
-                                       break;
-                               case TGSI_SEMANTIC_STENCIL:
-                                       if (chan != 1)
-                                               continue;
-                                       lp_build_intrinsic_unary(
-                                               base->gallivm->builder,
-                                               "llvm.R600.store.pixel.stencil",
-                                               LLVMVoidTypeInContext(base->gallivm->context),
-                                               output);
-                                       break;
                                }
+                               break;
+                       case TGSI_SEMANTIC_POSITION:
+                               lp_build_intrinsic_unary(
+                                       base->gallivm->builder,
+                                       "llvm.R600.store.pixel.depth",
+                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                       LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], ""));
+                               break;
+                       case TGSI_SEMANTIC_STENCIL:
+                               lp_build_intrinsic_unary(
+                                       base->gallivm->builder,
+                                       "llvm.R600.store.pixel.stencil",
+                                       LLVMVoidTypeInContext(base->gallivm->context),
+                                       LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], ""));
+                               break;
                        }
                }
        }
index ad8b91fb08bffa07d143919d0051304b1d8eea6d..11802f0d2ed48c3a6c2aab4753f97e93c0c92f98 100644 (file)
@@ -1417,6 +1417,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                radeon_llvm_ctx.chip_class = ctx.bc->chip_class;
                radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN);
                radeon_llvm_ctx.stream_outputs = &so;
+               radeon_llvm_ctx.clip_vertex = ctx.cv_output;
                mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
                if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) {
                        dump = 1;
@@ -1565,7 +1566,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                alu.dst.write = (j == ochan);
                                if (j == 3)
                                        alu.last = 1;
-                               r = r600_bytecode_add_alu(ctx.bc, &alu);
+                               if (!use_llvm)
+                                       r = r600_bytecode_add_alu(ctx.bc, &alu);
                                if (r)
                                        return r;
                        }
@@ -1851,7 +1853,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                }
        }
        /* add output to bytecode */
-       if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) {
+       if (!use_llvm) {
                for (i = 0; i < noutput; i++) {
                        r = r600_bytecode_add_output(ctx.bc, &output[i]);
                        if (r)
index c5932282901ec4094da6e0579045a348bceb0df2..1edcbd46d721dc1f26ccab379682b77efe867a3d 100644 (file)
@@ -58,6 +58,7 @@ struct radeon_llvm_context {
        unsigned type;
        unsigned face_input;
        unsigned two_side;
+       unsigned clip_vertex;
        struct r600_shader_io * r600_inputs;
        struct r600_shader_io * r600_outputs;
        struct pipe_stream_output_info *stream_outputs;