From 0b6a9b2dbe7bedbc6c34370cd07dcaa93b4c5fda Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 11 Aug 2009 13:19:16 +0100 Subject: [PATCH] llvmpipe: Replace lp_fs_llvm.c. Based on lp_fs_exec.c/lp_fs_sse.c and tgsi_exec.c. --- src/gallium/drivers/llvmpipe/lp_fs_llvm.c | 449 ++++++++++++++++------ 1 file changed, 327 insertions(+), 122 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c b/src/gallium/drivers/llvmpipe/lp_fs_llvm.c index 8c186badb44..8999b8de09b 100644 --- a/src/gallium/drivers/llvmpipe/lp_fs_llvm.c +++ b/src/gallium/drivers/llvmpipe/lp_fs_llvm.c @@ -1,5 +1,6 @@ /************************************************************************** * + * Copyright 2009 VMware, Inc. * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * @@ -27,20 +28,30 @@ /** * Execute fragment shader using LLVM code generation. - * Authors: - * Zack Rusin */ -#include "lp_context.h" -#include "lp_state.h" -#include "lp_fs.h" #include "pipe/p_state.h" #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "tgsi/tgsi_sse2.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" +#include "lp_bld_type.h" +#include "lp_bld_tgsi.h" +#include "lp_screen.h" +#include "lp_context.h" +#include "lp_state.h" +#include "lp_fs.h" +#include "lp_quad.h" + + +typedef void +(*lp_shader_fs_func)(void *inputs, + void *consts, + void *outputs, + struct tgsi_sampler **samplers); -#if 0 /** * Subclass of lp_fragment_shader @@ -48,158 +59,352 @@ struct lp_llvm_fragment_shader { struct lp_fragment_shader base; - struct gallivm_prog *llvm_prog; + + struct llvmpipe_screen *screen; + + LLVMValueRef function; + + lp_shader_fs_func jit_function; }; +/** cast wrapper */ +static INLINE struct lp_llvm_fragment_shader * +lp_llvm_fragment_shader(const struct lp_fragment_shader *base) +{ + return (struct lp_llvm_fragment_shader *) base; +} + + static void -shade_quad_llvm(struct quad_stage *qs, - struct quad_header *quad) +shader_generate(struct llvmpipe_screen *screen, + struct lp_llvm_fragment_shader *shader) { - struct quad_shade_stage *qss = quad_shade_stage(qs); - struct llvmpipe_context *llvmpipe = qs->llvmpipe; - float dests[4][16][4] ALIGN16_ATTRIB; - float inputs[4][16][4] ALIGN16_ATTRIB; - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; - struct gallivm_prog *llvm = qss->llvm_prog; - - inputs[0][0][0] = fx; - inputs[1][0][0] = fx + 1.0f; - inputs[2][0][0] = fx; - inputs[3][0][0] = fx + 1.0f; - - inputs[0][0][1] = fy; - inputs[1][0][1] = fy; - inputs[2][0][1] = fy + 1.0f; - inputs[3][0][1] = fy + 1.0f; - - - gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); - -#if DLLVM - debug_printf("MASK = %d\n", quad->mask); - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 2; ++j) { - debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, - inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); - } - } -#endif + const struct tgsi_token *tokens = shader->base.shader.tokens; + union lp_type type; + LLVMTypeRef elem_type; + LLVMTypeRef vec_type; + LLVMTypeRef args[4]; + LLVMValueRef inputs_ptr; + LLVMValueRef consts_ptr; + LLVMValueRef outputs_ptr; + LLVMValueRef samplers_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][4]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][4]; + char name[32]; + unsigned i, j; - quad->mask &= - gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, - llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT], - qss->samplers); -#if DLLVM - debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", - dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], - dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); -#endif + type.value = 0; + type.floating = TRUE; + type.sign = FALSE; + type.norm = TRUE; + type.width = 32; + type.length = 4; - /* store result color */ - if (qss->colorOutSlot >= 0) { - unsigned i; - /* XXX need to handle multiple color outputs someday */ - allvmrt(qss->stage.llvmpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - for (i = 0; i < QUAD_SIZE; ++i) { - quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; - quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; - quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; - quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; - } - } -#if DLLVM - for (int i = 0; i < QUAD_SIZE; ++i) { - debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, - quad->outputs.color[0][0][i], - quad->outputs.color[0][1][i], - quad->outputs.color[0][2][i], - quad->outputs.color[0][3][i]); - } -#endif + elem_type = lp_build_elem_type(type); + vec_type = lp_build_vec_type(type); + + args[0] = LLVMPointerType(vec_type, 0); + args[1] = LLVMPointerType(elem_type, 0); + args[2] = LLVMPointerType(vec_type, 0); + args[3] = LLVMPointerType(LLVMInt8Type(), 0); + shader->function = LLVMAddFunction(screen->module, "shader", LLVMFunctionType(LLVMVoidType(), args, 4, 0)); + LLVMSetFunctionCallConv(shader->function, LLVMCCallConv); + + inputs_ptr = LLVMGetParam(shader->function, 0); + consts_ptr = LLVMGetParam(shader->function, 1); + outputs_ptr = LLVMGetParam(shader->function, 2); + samplers_ptr = LLVMGetParam(shader->function, 3); + + LLVMSetValueName(inputs_ptr, "inputs"); + LLVMSetValueName(consts_ptr, "consts"); + LLVMSetValueName(outputs_ptr, "outputs"); + LLVMSetValueName(samplers_ptr, "samplers"); - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = dests[i][0][2]; + block = LLVMAppendBasicBlock(shader->function, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + for(i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i) { + for(j = 0; j < 4; ++j) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0); + util_snprintf(name, sizeof name, "input%u.%c", i, "xywz"[j]); + inputs[i][j] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, inputs_ptr, &index, 1, ""), name); } } - else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = inputs[i][0][2]; + + memset(outputs, 0, sizeof outputs); + + lp_build_tgsi_soa(builder, tokens, type, inputs, consts_ptr, outputs, samplers_ptr); + + for(i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) { + for(j = 0; j < 4; ++j) { + if(outputs[i][j]) { + LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i*4 + j, 0); + util_snprintf(name, sizeof name, "output%u.%c", i, "xywz"[j]); + LLVMBuildStore(builder, outputs[i][j], LLVMBuildGEP(builder, outputs_ptr, &index, 1, name)); + } } } -#if DLLVM - debug_printf("D [%f, %f, %f, %f] mask = %d\n", - quad->outputs.depth[0], - quad->outputs.depth[1], - quad->outputs.depth[2], - quad->outputs.depth[3], quad->mask); -#endif - /* shader may cull fragments */ - if( quad->mask ) { - qs->next->run( qs->next, quad ); + LLVMBuildRetVoid(builder);; + + LLVMDisposeBuilder(builder); +} + + + +static void +fs_llvm_prepare( const struct lp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler **samplers ) +{ + /* + * Bind tokens/shader to the interpreter's machine state. + * Avoid redundant binding. + */ + if (machine->Tokens != base->shader.tokens) { + tgsi_exec_machine_bind_shader( machine, + base->shader.tokens, + PIPE_MAX_SAMPLERS, + samplers ); } } -unsigned -run_llvm_fs( const struct lp_fragment_shader *base, - struct foo *machine ) + + +/** + * Evaluate a constant-valued coefficient at the position of the + * current quad. + */ +static void +eval_constant_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) { + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } } +/** + * Evaluate a linear-valued coefficient at the position of the + * current quad. + */ +static void +eval_linear_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} -void -delete_llvm_fs( struct lp_fragment_shader *base ) +/** + * Evaluate a perspective-valued coefficient at the position of the + * current quad. + */ +static void +eval_perspective_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) { - FREE(base); + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; } -struct lp_fragment_shader * -llvmpipe_create_fs_llvm(struct llvmpipe_context *llvmpipe, - const struct pipe_shader_state *templ) +typedef void +(*eval_coef_func)(struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ); + + +static void +exec_declaration( + struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl ) { - struct lp_llvm_fragment_shader *shader = NULL; + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + eval_coef_func eval; - /* LLVM fragment shaders currently disabled: - */ - state = CALLOC_STRUCT(lp_llvm_shader_state); - if (!state) - return NULL; + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; - state->llvm_prog = 0; + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; - if (!gallivm_global_cpu_engine()) { - gallivm_cpu_engine_create(state->llvm_prog); - } - else - gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); - - if (shader) { - shader->base.run = run_llvm_fs; - shader->base.delete = delete_llvm_fs; + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + eval = NULL; + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + eval( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + eval( mach, i, j ); + } + } + } + } + } } +} + +/* TODO: codegenerate the whole run function, skip this wrapper. + * TODO: break dependency on tgsi_exec_machine struct + * TODO: push Position calculation into the generated shader + * TODO: process >1 quad at a time + */ +static unsigned +fs_llvm_run( const struct lp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct quad_header *quad ) +{ + struct lp_llvm_fragment_shader *shader = lp_llvm_fragment_shader(base); + unsigned i; + unsigned mask; + + /* Compute X, Y, Z, W vals for this quad */ + lp_setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + &machine->QuadPos); + + /* init kill mask */ + tgsi_set_kill_mask(machine, 0x0); + tgsi_set_exec_mask(machine, 1, 1, 1, 1); + + /* execute declarations (interpolants) */ + for (i = 0; i < machine->NumDeclarations; i++) + exec_declaration( machine, &machine->Declarations[i] ); + + memset(machine->Outputs, 0, sizeof machine->Outputs); + + shader->jit_function( machine->Inputs, + machine->Consts, + machine->Outputs, + machine->Samplers); - return shader; + /* FIXME */ + mask = ~0; + + return mask; } -#else +static void +fs_llvm_delete( struct lp_fragment_shader *base ) +{ + struct lp_llvm_fragment_shader *shader = lp_llvm_fragment_shader(base); + struct llvmpipe_screen *screen = shader->screen; + + if(shader->function) { + if(shader->jit_function) + LLVMFreeMachineCodeForFunction(screen->engine, shader->function); + LLVMDeleteFunction(shader->function); + } + + FREE((void *) shader->base.shader.tokens); + FREE(shader); +} + struct lp_fragment_shader * llvmpipe_create_fs_llvm(struct llvmpipe_context *llvmpipe, - const struct pipe_shader_state *templ) + const struct pipe_shader_state *templ) { - return NULL; -} + struct llvmpipe_screen *screen = llvmpipe_screen(llvmpipe->pipe.screen); + struct lp_llvm_fragment_shader *shader; + LLVMValueRef fetch_texel; + + shader = CALLOC_STRUCT(lp_llvm_fragment_shader); + if (!shader) + return NULL; + + /* we need to keep a local copy of the tokens */ + shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); + shader->base.prepare = fs_llvm_prepare; + shader->base.run = fs_llvm_run; + shader->base.delete = fs_llvm_delete; + + shader->screen = screen; + tgsi_dump(templ->tokens, 0); + + shader_generate(screen, shader); + + LLVMRunFunctionPassManager(screen->pass, shader->function); + +#if 1 + LLVMDumpValue(shader->function); + debug_printf("\n"); #endif + + if(LLVMVerifyFunction(shader->function, LLVMPrintMessageAction)) { + LLVMDumpValue(shader->function); + abort(); + } + + fetch_texel = LLVMGetNamedFunction(screen->module, "fetch_texel"); + if(fetch_texel) { + static boolean first_time = TRUE; + if(first_time) { + LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_build_tgsi_fetch_texel_soa); + first_time = FALSE; + } + } + + shader->jit_function = (lp_shader_fs_func)LLVMGetPointerToGlobal(screen->engine, shader->function); + + return &shader->base; +} + -- 2.30.2