From 3b14ce2cafea03de1b39e44cc8c37439b031e3eb Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Fri, 11 Jan 2013 19:48:29 +0100 Subject: [PATCH] r600g/llvm: tgsi to llvm emits store.swizzle intrinsic for vs/fs output Reviewed-by: Tom Stellard --- src/gallium/drivers/r600/r600_llvm.c | 197 ++++++++++++++++------- src/gallium/drivers/r600/r600_shader.c | 6 +- src/gallium/drivers/radeon/radeon_llvm.h | 1 + 3 files changed, 146 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index a9f5825c349..0f0eb84557e 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -9,6 +9,7 @@ #include "r600.h" #include "r600_asm.h" +#include "r600_sq.h" #include "r600_opcodes.h" #include "r600_shader.h" #include "r600_pipe.h" @@ -242,7 +243,9 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) struct lp_build_context * base = &bld_base->base; struct pipe_stream_output_info * so = ctx->stream_outputs; unsigned i; - + unsigned next_pos = 60; + unsigned next_param = 0; + unsigned color_count = 0; boolean has_color = false; @@ -280,70 +283,152 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) /* Add the necessary export instructions */ for (i = 0; i < ctx->output_reg_count; i++) { unsigned chan; + LLVMValueRef elements[4]; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef output; - unsigned adjusted_reg_idx = i + - ctx->reserved_reg_count; - - output = LLVMBuildLoad(base->gallivm->builder, + elements[chan] = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][chan], ""); - - if (ctx->type == TGSI_PROCESSOR_VERTEX) { - LLVMValueRef reg_index = lp_build_const_int32( - base->gallivm, - radeon_llvm_reg_index_soa(adjusted_reg_idx, chan)); - lp_build_intrinsic_binary( + } + LLVMValueRef output = lp_build_gather_values(base->gallivm, elements, 4); + + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + switch (ctx->r600_outputs[i].name) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_PSIZE: { + LLVMValueRef args[3]; + args[0] = output; + args[1] = lp_build_const_int32(base->gallivm, next_pos++); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); + build_intrinsic( + base->gallivm->builder, + "llvm.R600.store.swizzle", + LLVMVoidTypeInContext(base->gallivm->context), + args, 3, 0); + break; + } + case TGSI_SEMANTIC_CLIPVERTEX: { + LLVMValueRef args[3]; + unsigned reg_index; + unsigned base_vector_chan; + LLVMValueRef adjusted_elements[4]; + for (reg_index = 0; reg_index < 2; reg_index ++) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + LLVMValueRef offset[2] = { + LLVMConstInt(LLVMInt64TypeInContext(bld_base->base.gallivm->context), 0, false), + lp_build_const_int32(bld_base->base.gallivm, reg_index * 4 + chan) + }; + LLVMTypeRef const_ptr_type = LLVMPointerType(LLVMArrayType(LLVMVectorType(bld_base->base.elem_type, 4), 1024), 9); + LLVMValueRef const_ptr = LLVMBuildIntToPtr(bld_base->base.gallivm->builder, lp_build_const_int32(bld_base->base.gallivm, 0), const_ptr_type, ""); + LLVMValueRef ptr = LLVMBuildGEP(bld_base->base.gallivm->builder, const_ptr, offset, 2, ""); + LLVMValueRef base_vector = LLVMBuildLoad(bld_base->base.gallivm->builder, ptr, ""); + args[0] = output; + args[1] = base_vector; + adjusted_elements[chan] = build_intrinsic(base->gallivm->builder, + "llvm.AMDGPU.dp4", bld_base->base.elem_type, + args, 2, LLVMReadNoneAttribute); + } + args[0] = lp_build_gather_values(base->gallivm, + adjusted_elements, 4); + args[1] = lp_build_const_int32(base->gallivm, next_pos++); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); + build_intrinsic( + base->gallivm->builder, + "llvm.R600.store.swizzle", + LLVMVoidTypeInContext(base->gallivm->context), + args, 3, 0); + } + break; + } + case TGSI_SEMANTIC_CLIPDIST : { + LLVMValueRef args[3]; + args[0] = output; + args[1] = lp_build_const_int32(base->gallivm, next_pos++); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS); + build_intrinsic( base->gallivm->builder, - "llvm.AMDGPU.store.output", + "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), - output, reg_index); - } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { - switch (ctx->r600_outputs[i].name) { - case TGSI_SEMANTIC_COLOR: - has_color = true; - if ( color_count/4 < ctx->color_buffer_count) { - if (ctx->fs_color_all) { - for (unsigned j = 0; j < ctx->color_buffer_count; j++) { - LLVMValueRef reg_index = lp_build_const_int32( - base->gallivm, - (j * 4) + chan); - lp_build_intrinsic_binary( - base->gallivm->builder, - "llvm.R600.store.pixel.color", - LLVMVoidTypeInContext(base->gallivm->context), - output, reg_index); - } - } else { - LLVMValueRef reg_index = lp_build_const_int32( - base->gallivm, - (color_count++/4) * 4 + chan); - lp_build_intrinsic_binary( + args, 3, 0); + args[1] = lp_build_const_int32(base->gallivm, next_param++); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); + build_intrinsic( + base->gallivm->builder, + "llvm.R600.store.swizzle", + LLVMVoidTypeInContext(base->gallivm->context), + args, 3, 0); + break; + } + case TGSI_SEMANTIC_FOG: { + elements[0] = LLVMBuildLoad(base->gallivm->builder, + ctx->soa.outputs[i][0], ""); + elements[1] = elements[2] = lp_build_const_float(base->gallivm, 0.0f); + elements[3] = lp_build_const_float(base->gallivm, 1.0f); + + LLVMValueRef args[3]; + args[0] = lp_build_gather_values(base->gallivm, elements, 4); + args[1] = lp_build_const_int32(base->gallivm, next_param++); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); + build_intrinsic( + base->gallivm->builder, + "llvm.R600.store.swizzle", + LLVMVoidTypeInContext(base->gallivm->context), + args, 3, 0); + break; + } + default: { + LLVMValueRef args[3]; + args[0] = output; + args[1] = lp_build_const_int32(base->gallivm, next_param++); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM); + build_intrinsic( + base->gallivm->builder, + "llvm.R600.store.swizzle", + LLVMVoidTypeInContext(base->gallivm->context), + args, 3, 0); + break; + } + } + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + switch (ctx->r600_outputs[i].name) { + case TGSI_SEMANTIC_COLOR: + has_color = true; + if ( color_count < ctx->color_buffer_count) { + LLVMValueRef args[3]; + args[0] = output; + if (ctx->fs_color_all) { + for (unsigned j = 0; j < ctx->color_buffer_count; j++) { + args[1] = lp_build_const_int32(base->gallivm, j); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); + build_intrinsic( base->gallivm->builder, - "llvm.R600.store.pixel.color", + "llvm.R600.store.swizzle", LLVMVoidTypeInContext(base->gallivm->context), - output, reg_index); + args, 3, 0); } + } else { + args[1] = lp_build_const_int32(base->gallivm, color_count++); + args[2] = lp_build_const_int32(base->gallivm, V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL); + build_intrinsic( + base->gallivm->builder, + "llvm.R600.store.swizzle", + LLVMVoidTypeInContext(base->gallivm->context), + args, 3, 0); } - break; - case TGSI_SEMANTIC_POSITION: - if (chan != 2) - continue; - lp_build_intrinsic_unary( - base->gallivm->builder, - "llvm.R600.store.pixel.depth", - LLVMVoidTypeInContext(base->gallivm->context), - output); - break; - case TGSI_SEMANTIC_STENCIL: - if (chan != 1) - continue; - lp_build_intrinsic_unary( - base->gallivm->builder, - "llvm.R600.store.pixel.stencil", - LLVMVoidTypeInContext(base->gallivm->context), - output); - break; } + break; + case TGSI_SEMANTIC_POSITION: + lp_build_intrinsic_unary( + base->gallivm->builder, + "llvm.R600.store.pixel.depth", + LLVMVoidTypeInContext(base->gallivm->context), + LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][2], "")); + break; + case TGSI_SEMANTIC_STENCIL: + lp_build_intrinsic_unary( + base->gallivm->builder, + "llvm.R600.store.pixel.stencil", + LLVMVoidTypeInContext(base->gallivm->context), + LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][1], "")); + break; } } } diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index ad8b91fb08b..11802f0d2ed 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -1417,6 +1417,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, radeon_llvm_ctx.chip_class = ctx.bc->chip_class; radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN); radeon_llvm_ctx.stream_outputs = &so; + radeon_llvm_ctx.clip_vertex = ctx.cv_output; mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { dump = 1; @@ -1565,7 +1566,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, alu.dst.write = (j == ochan); if (j == 3) alu.last = 1; - r = r600_bytecode_add_alu(ctx.bc, &alu); + if (!use_llvm) + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -1851,7 +1853,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } } /* add output to bytecode */ - if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) { + if (!use_llvm) { for (i = 0; i < noutput; i++) { r = r600_bytecode_add_output(ctx.bc, &output[i]); if (r) diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index c5932282901..1edcbd46d72 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -58,6 +58,7 @@ struct radeon_llvm_context { unsigned type; unsigned face_input; unsigned two_side; + unsigned clip_vertex; struct r600_shader_io * r600_inputs; struct r600_shader_io * r600_outputs; struct pipe_stream_output_info *stream_outputs; -- 2.30.2