From: Vincent Lejeune Date: Sat, 29 Sep 2012 14:49:13 +0000 (+0200) Subject: r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=84b437213294ff4e1a3bcae2f9cbb36a9b4955c4;p=mesa.git r600g: make tgsi-to-llvm generates store.pixel* intrinsic for fs Reviewed-by: Tom Stellard --- diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 310d4242574..70dc94af2ef 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -145,3 +145,20 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) } return 0; } + +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) +{ + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); + + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); + output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); + output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); +} diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index f04a92062f6..5f2548e3194 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2947,3 +2947,20 @@ void r600_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint3 G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1); } } + +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) +{ + output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0); + output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0); + output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0); + output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0); + + output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1); + output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1); + output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1); + output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1); + output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1); + output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1); + output->inst = EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1)); + output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1); +} diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 2c7db2cefd7..f3b036d08c8 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -247,5 +247,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf); int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id); void r700_bytecode_alu_read(struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); +void r600_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1); +void eg_bytecode_export_read(struct r600_bytecode_output *output, uint32_t word0, uint32_t word1); #endif diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 3dec8ae2537..b3d4e6bab68 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -229,6 +229,9 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_context * base = &bld_base->base; unsigned i; + + unsigned color_count = 0; + boolean has_color = false; /* Add the necessary export instructions */ for (i = 0; i < ctx->output_reg_count; i++) { @@ -237,20 +240,72 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) LLVMValueRef output; unsigned adjusted_reg_idx = i + ctx->reserved_reg_count; - LLVMValueRef reg_index = lp_build_const_int32( - base->gallivm, - radeon_llvm_reg_index_soa(adjusted_reg_idx, chan)); output = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][chan], ""); - lp_build_intrinsic_binary( - base->gallivm->builder, - "llvm.AMDGPU.store.output", - LLVMVoidTypeInContext(base->gallivm->context), - output, reg_index); + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, + radeon_llvm_reg_index_soa(adjusted_reg_idx, chan)); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.AMDGPU.store.output", + LLVMVoidTypeInContext(base->gallivm->context), + output, reg_index); + } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + switch (ctx->r600_outputs[i].name) { + case TGSI_SEMANTIC_COLOR: + has_color = true; + if ( color_count/4 < ctx->color_buffer_count) { + if (ctx->fs_color_all) { + for (unsigned j = 0; j < ctx->color_buffer_count; j++) { + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, + (j * 4) + chan); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.pixel.color", + LLVMVoidTypeInContext(base->gallivm->context), + output, reg_index); + } + } else { + LLVMValueRef reg_index = lp_build_const_int32( + base->gallivm, + (color_count++/4) * 4 + chan); + lp_build_intrinsic_binary( + base->gallivm->builder, + "llvm.R600.store.pixel.color", + LLVMVoidTypeInContext(base->gallivm->context), + output, reg_index); + } + } + break; + case TGSI_SEMANTIC_POSITION: + if (chan != 2) + continue; + lp_build_intrinsic_unary( + base->gallivm->builder, + "llvm.R600.store.pixel.depth", + LLVMVoidTypeInContext(base->gallivm->context), + output); + break; + case TGSI_SEMANTIC_STENCIL: + if (chan != 1) + continue; + lp_build_intrinsic_unary( + base->gallivm->builder, + "llvm.R600.store.pixel.stencil", + LLVMVoidTypeInContext(base->gallivm->context), + output); + break; + } + } } } + + if (!has_color && ctx->type == TGSI_PROCESSOR_FRAGMENT) + lp_build_intrinsic(base->gallivm->builder, "llvm.R600.store.pixel.dummy", LLVMVoidTypeInContext(base->gallivm->context), 0, 0); } static void llvm_emit_tex( diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a4d94bb4af5..4634c30b6bc 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -526,6 +526,21 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, return bytes_read; } +static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx, + unsigned char * bytes, unsigned bytes_read) +{ + struct r600_bytecode_output output; + memset(&output, 0, sizeof(struct r600_bytecode_output)); + uint32_t word0 = i32_from_byte_stream(bytes, &bytes_read); + uint32_t word1 = i32_from_byte_stream(bytes, &bytes_read); + if (ctx->bc->chip_class >= EVERGREEN) + eg_bytecode_export_read(&output, word0,word1); + else + r600_bytecode_export_read(&output, word0,word1); + r600_bytecode_add_output(ctx->bc, &output); + return bytes_read; +} + static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned num_bytes) { @@ -560,6 +575,10 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, bytes_read = r600_vtx_from_byte_stream(ctx, bytes, bytes_read); break; + case 5: + bytes_read = r600_export_from_byte_stream(ctx, bytes, + bytes_read); + break; default: /* XXX: Error here */ break; @@ -1360,7 +1379,10 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, radeon_llvm_ctx.two_side = shader->two_side; radeon_llvm_ctx.face_input = ctx.face_gpr; radeon_llvm_ctx.r600_inputs = ctx.shader->input; + radeon_llvm_ctx.r600_outputs = ctx.shader->output; + radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); radeon_llvm_ctx.chip_class = ctx.bc->chip_class; + radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->chip_class >= EVERGREEN); mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE)) { dump = 1; @@ -1730,10 +1752,12 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, } } /* add output to bytecode */ - for (i = 0; i < noutput; i++) { - r = r600_bytecode_add_output(ctx.bc, &output[i]); - if (r) - goto out_err; + if (!use_llvm || ctx.type != TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < noutput; i++) { + r = r600_bytecode_add_output(ctx.bc, &output[i]); + if (r) + goto out_err; + } } /* add program end */ if (ctx.bc->chip_class == CAYMAN) diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 6118b118bb8..61975c4de09 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -59,6 +59,9 @@ struct radeon_llvm_context { unsigned face_input; unsigned two_side; struct r600_shader_io * r600_inputs; + struct r600_shader_io * r600_outputs; + unsigned color_buffer_count; + unsigned fs_color_all; /*=== Front end configuration ===*/