From 058b978a5ae2a56e09fed6335b686c654444f4ac Mon Sep 17 00:00:00 2001 From: michal Date: Wed, 15 Aug 2007 18:16:11 +0100 Subject: [PATCH] Add UsageMask to DECLARATION in TGSI. Interpolate FS attributes in the shader. Do not copy WPOS in FS. --- src/mesa/pipe/softpipe/sp_headers.h | 21 +-- src/mesa/pipe/softpipe/sp_prim_setup.c | 2 +- src/mesa/pipe/softpipe/sp_quad_fs.c | 172 ++++------------------ src/mesa/pipe/softpipe/sp_state_derived.c | 4 +- src/mesa/pipe/tgsi/core/tgsi_build.c | 4 + src/mesa/pipe/tgsi/core/tgsi_build.h | 1 + src/mesa/pipe/tgsi/core/tgsi_dump.c | 31 ++++ src/mesa/pipe/tgsi/core/tgsi_exec.c | 127 +++++++++++++++- src/mesa/pipe/tgsi/core/tgsi_exec.h | 52 ++++--- src/mesa/pipe/tgsi/core/tgsi_token.h | 37 ++--- src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c | 76 +++++----- 11 files changed, 276 insertions(+), 251 deletions(-) diff --git a/src/mesa/pipe/softpipe/sp_headers.h b/src/mesa/pipe/softpipe/sp_headers.h index b7f46cb6583..cc8294b18e8 100644 --- a/src/mesa/pipe/softpipe/sp_headers.h +++ b/src/mesa/pipe/softpipe/sp_headers.h @@ -31,6 +31,7 @@ #ifndef SP_HEADERS_H #define SP_HEADERS_H +#include "../tgsi/core/tgsi_core.h" #define PRIM_POINT 1 #define PRIM_LINE 2 @@ -44,7 +45,6 @@ #define QUAD_BOTTOM_RIGHT 1 #define QUAD_TOP_LEFT 2 #define QUAD_TOP_RIGHT 3 -#define QUAD_SIZE (2*2) #define MASK_BOTTOM_LEFT 0x1 #define MASK_BOTTOM_RIGHT 0x2 @@ -53,17 +53,6 @@ #define MASK_ALL 0xf -#define NUM_CHANNELS 4 /* avoid confusion between 4 pixels and 4 channels */ - - -struct setup_coefficient { - float a0[NUM_CHANNELS]; /* in an xyzw layout */ - float dadx[NUM_CHANNELS]; - float dady[NUM_CHANNELS]; -}; - - - /** * Encodes everything we need to know about a 2x2 pixel block. Uses * "Channel-Serial" or "SoA" layout. @@ -76,17 +65,13 @@ struct quad_header { unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ struct { - float color[4][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ + float color[NUM_CHANNELS][QUAD_SIZE]; /* rrrr, gggg, bbbb, aaaa */ float depth[QUAD_SIZE]; } outputs; float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ - const struct setup_coefficient *coef; - - const enum interp_mode *interp; /* XXX: this information should be - * encoded in fragment program DECL - * statements. */ + const struct tgsi_interp_coef *coef; unsigned nr_attrs; }; diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c index 45d09860c3c..71ef798cbbb 100644 --- a/src/mesa/pipe/softpipe/sp_prim_setup.c +++ b/src/mesa/pipe/softpipe/sp_prim_setup.c @@ -80,7 +80,7 @@ struct setup_stage { float oneoverarea; - struct setup_coefficient coef[FRAG_ATTRIB_MAX]; + struct tgsi_interp_coef coef[FRAG_ATTRIB_MAX]; struct quad_header quad; struct { diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c index bffb03a6bcc..0a345bbf118 100644 --- a/src/mesa/pipe/softpipe/sp_quad_fs.c +++ b/src/mesa/pipe/softpipe/sp_quad_fs.c @@ -33,15 +33,12 @@ */ #include "pipe/p_util.h" -#include "tgsi/core/tgsi_core.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_quad.h" #include "sp_tex_sample.h" -#include "main/mtypes.h" - #if defined __GNUC__ #define USE_ALIGNED_ATTRIBS 1 @@ -66,157 +63,33 @@ quad_shade_stage(struct quad_stage *qs) return (struct quad_shade_stage *) qs; } - - -struct exec_machine { - const struct setup_coefficient *coef; /**< will point to quad->coef */ - float attr[PIPE_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE] ALIGN16_SUFFIX; -}; - - -/** - * Compute quad's attributes values, as constants (GL_FLAT shading). - */ -static INLINE void cinterp( struct exec_machine *exec, - unsigned attrib, - unsigned i ) -{ - unsigned j; - - for (j = 0; j < QUAD_SIZE; j++) { - exec->attr[attrib][i][j] = exec->coef[attrib].a0[i]; - } -} - - -/** - * Compute quad's attribute values by linear interpolation. - * - * Push into the fp: - * - * INPUT[attr] = MAD COEF_A0[attr], COEF_DADX[attr], INPUT_WPOS.xxxx - * INPUT[attr] = MAD INPUT[attr], COEF_DADY[attr], INPUT_WPOS.yyyy - */ -static INLINE void linterp( struct exec_machine *exec, - unsigned attrib, - unsigned i ) -{ - unsigned j; - - for (j = 0; j < QUAD_SIZE; j++) { - const float x = exec->attr[FRAG_ATTRIB_WPOS][0][j]; - const float y = exec->attr[FRAG_ATTRIB_WPOS][1][j]; - exec->attr[attrib][i][j] = (exec->coef[attrib].a0[i] + - exec->coef[attrib].dadx[i] * x + - exec->coef[attrib].dady[i] * y); - } -} - - -/** - * Compute quad's attribute values by linear interpolation with - * perspective correction. - * - * Push into the fp: - * - * INPUT[attr] = MAD COEF_DADX[attr], INPUT_WPOS.xxxx, COEF_A0[attr] - * INPUT[attr] = MAD COEF_DADY[attr], INPUT_WPOS.yyyy, INPUT[attr] - * TMP = RCP INPUT_WPOS.w - * INPUT[attr] = MUL INPUT[attr], TMP.xxxx - * - */ -static INLINE void pinterp( struct exec_machine *exec, - unsigned attrib, - unsigned i ) -{ - unsigned j; - - for (j = 0; j < QUAD_SIZE; j++) { - const float x = exec->attr[FRAG_ATTRIB_WPOS][0][j]; - const float y = exec->attr[FRAG_ATTRIB_WPOS][1][j]; - /* FRAG_ATTRIB_WPOS.w here is really 1/w */ - const float w = 1.0 / exec->attr[FRAG_ATTRIB_WPOS][3][j]; - exec->attr[attrib][i][j] = ((exec->coef[attrib].a0[i] + - exec->coef[attrib].dadx[i] * x + - exec->coef[attrib].dady[i] * y) * w); - } -} - - /* This should be done by the fragment shader execution unit (code * generated from the decl instructions). Do it here for now. */ static void -shade_quad( struct quad_stage *qs, struct quad_header *quad ) +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) { - struct quad_shade_stage *qss = quad_shade_stage(qs); + struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; - struct exec_machine exec; const float fx = quad->x0; const float fy = quad->y0; unsigned attr, i; struct tgsi_exec_machine machine; #if USE_ALIGNED_ATTRIBS - struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX] ALIGN16_SUFFIX; + struct tgsi_exec_vector inputs[PIPE_ATTRIB_MAX] ALIGN16_SUFFIX; + struct tgsi_exec_vector outputs[PIPE_ATTRIB_MAX] ALIGN16_SUFFIX; #else - struct tgsi_exec_vector inputs[FRAG_ATTRIB_MAX + 1]; - struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX + 1]; + struct tgsi_exec_vector inputs[PIPE_ATTRIB_MAX + 1]; + struct tgsi_exec_vector outputs[PIPE_ATTRIB_MAX + 1]; #endif - exec.coef = quad->coef; - - /* Position: - */ - exec.attr[FRAG_ATTRIB_WPOS][0][0] = fx; - exec.attr[FRAG_ATTRIB_WPOS][0][1] = fx + 1.0; - exec.attr[FRAG_ATTRIB_WPOS][0][2] = fx; - exec.attr[FRAG_ATTRIB_WPOS][0][3] = fx + 1.0; - - exec.attr[FRAG_ATTRIB_WPOS][1][0] = fy; - exec.attr[FRAG_ATTRIB_WPOS][1][1] = fy; - exec.attr[FRAG_ATTRIB_WPOS][1][2] = fy + 1.0; - exec.attr[FRAG_ATTRIB_WPOS][1][3] = fy + 1.0; - - /* Z and W are done by linear interpolation */ - if (softpipe->need_z) { - linterp(&exec, 0, 2); /* attr[0].z */ - } - - if (softpipe->need_w) { - linterp(&exec, 0, 3); /* attr[0].w */ - /*invert(&exec, 0, 3);*/ - } - - /* Interpolate all the remaining attributes. This will get pushed - * into the fragment program's responsibilities at some point. - * Start at 1 to skip fragment position attribute (computed above). - */ - for (attr = 1; attr < quad->nr_attrs; attr++) { - switch (softpipe->interp[attr]) { - case INTERP_CONSTANT: - for (i = 0; i < NUM_CHANNELS; i++) - cinterp(&exec, attr, i); - break; - - case INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - linterp(&exec, attr, i); - break; - - case INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - pinterp(&exec, attr, i); - break; - } - } - #ifdef DEBUG memset( &machine, 0, sizeof( machine ) ); #endif - assert( sizeof( struct tgsi_exec_vector ) == sizeof( exec.attr[0] ) ); - /* init machine state */ tgsi_exec_machine_init( &machine, @@ -228,33 +101,40 @@ shade_quad( struct quad_stage *qs, struct quad_header *quad ) machine.Consts = softpipe->fs.constants->constant; #if USE_ALIGNED_ATTRIBS - machine.Inputs = (struct tgsi_exec_vector *) exec.attr; + machine.Inputs = inputs; machine.Outputs = outputs; #else machine.Inputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs ); machine.Outputs = (struct tgsi_exec_vector *) tgsi_align_128bit( outputs ); - - memcpy( - machine.Inputs, - exec.attr, - softpipe->nr_attrs * sizeof( struct tgsi_exec_vector ) ); #endif + machine.InterpCoefs = quad->coef; + + machine.Inputs[0].xyzw[0].f[0] = fx; + machine.Inputs[0].xyzw[0].f[1] = fx + 1.0; + machine.Inputs[0].xyzw[0].f[2] = fx; + machine.Inputs[0].xyzw[0].f[3] = fx + 1.0; + + machine.Inputs[0].xyzw[1].f[0] = fy; + machine.Inputs[0].xyzw[1].f[1] = fy; + machine.Inputs[0].xyzw[1].f[2] = fy + 1.0; + machine.Inputs[0].xyzw[1].f[3] = fy + 1.0; + /* run shader */ tgsi_exec_machine_run( &machine ); /* store result color */ memcpy( quad->outputs.color, - &machine.Outputs[FRAG_ATTRIB_COL0].xyzw[0].f[0], + &machine.Outputs[1].xyzw[0].f[0], sizeof( quad->outputs.color ) ); if( softpipe->need_z ) { /* XXX temporary */ - quad->outputs.depth[0] = exec.attr[0][2][0]; - quad->outputs.depth[1] = exec.attr[0][2][1]; - quad->outputs.depth[2] = exec.attr[0][2][2]; - quad->outputs.depth[3] = exec.attr[0][2][3]; + memcpy( + quad->outputs.depth, + &machine.Outputs[0].xyzw[2], + sizeof( quad->outputs.depth ) ); } /* shader may cull fragments */ diff --git a/src/mesa/pipe/softpipe/sp_state_derived.c b/src/mesa/pipe/softpipe/sp_state_derived.c index 0a2cfbb7d1b..cd67d1c46fb 100644 --- a/src/mesa/pipe/softpipe/sp_state_derived.c +++ b/src/mesa/pipe/softpipe/sp_state_derived.c @@ -87,7 +87,7 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) * fragment position (XYZW). */ if (softpipe->depth_test.enabled || - (inputsRead & FRAG_ATTRIB_WPOS)) + (inputsRead & (1 << FRAG_ATTRIB_WPOS))) softpipe->need_z = TRUE; else softpipe->need_z = FALSE; @@ -95,7 +95,7 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) /* Need W if we do any perspective-corrected interpolation or the * fragment program uses the fragment position. */ - if (inputsRead & FRAG_ATTRIB_WPOS) + if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) softpipe->need_w = TRUE; else softpipe->need_w = FALSE; diff --git a/src/mesa/pipe/tgsi/core/tgsi_build.c b/src/mesa/pipe/tgsi/core/tgsi_build.c index d9c9a45532d..d6f8af656ac 100644 --- a/src/mesa/pipe/tgsi/core/tgsi_build.c +++ b/src/mesa/pipe/tgsi/core/tgsi_build.c @@ -88,6 +88,7 @@ tgsi_default_declaration( void ) declaration.Size = 1; declaration.File = TGSI_FILE_NULL; declaration.Declare = TGSI_DECLARE_RANGE; + declaration.UsageMask = TGSI_WRITEMASK_XYZW; declaration.Interpolate = 0; declaration.Semantic = 0; declaration.Padding = 0; @@ -100,6 +101,7 @@ struct tgsi_declaration tgsi_build_declaration( unsigned file, unsigned declare, + unsigned usage_mask, unsigned interpolate, unsigned semantic, struct tgsi_header *header ) @@ -112,6 +114,7 @@ tgsi_build_declaration( declaration = tgsi_default_declaration(); declaration.File = file; declaration.Declare = declare; + declaration.UsageMask = usage_mask; declaration.Interpolate = interpolate; declaration.Semantic = semantic; @@ -162,6 +165,7 @@ tgsi_build_full_declaration( *declaration = tgsi_build_declaration( full_decl->Declaration.File, full_decl->Declaration.Declare, + full_decl->Declaration.UsageMask, full_decl->Declaration.Interpolate, full_decl->Declaration.Semantic, header ); diff --git a/src/mesa/pipe/tgsi/core/tgsi_build.h b/src/mesa/pipe/tgsi/core/tgsi_build.h index b3eb0715eef..116c78abf34 100644 --- a/src/mesa/pipe/tgsi/core/tgsi_build.h +++ b/src/mesa/pipe/tgsi/core/tgsi_build.h @@ -38,6 +38,7 @@ struct tgsi_declaration tgsi_build_declaration( unsigned file, unsigned declare, + unsigned usage_mask, unsigned interpolate, unsigned semantic, struct tgsi_header *header ); diff --git a/src/mesa/pipe/tgsi/core/tgsi_dump.c b/src/mesa/pipe/tgsi/core/tgsi_dump.c index 5a9f92004ed..622d617e296 100644 --- a/src/mesa/pipe/tgsi/core/tgsi_dump.c +++ b/src/mesa/pipe/tgsi/core/tgsi_dump.c @@ -633,6 +633,22 @@ dump_declaration_short( assert( 0 ); } + if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) { + CHR( '.' ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'x' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'w' ); + } + } + if( decl->Declaration.Interpolate ) { TXT( ", " ); ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); @@ -659,6 +675,21 @@ dump_declaration_verbose( ENM( decl->Declaration.File, TGSI_FILES ); TXT( "\nDeclare : " ); ENM( decl->Declaration.Declare, TGSI_DECLARES ); + if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { + TXT( "\nUsageMask : " ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'X' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'Y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'Z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'W' ); + } + } if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { TXT( "\nInterpolate: " ); UID( decl->Declaration.Interpolate ); diff --git a/src/mesa/pipe/tgsi/core/tgsi_exec.c b/src/mesa/pipe/tgsi/core/tgsi_exec.c index 0a34754c40b..0ffed20066e 100644 --- a/src/mesa/pipe/tgsi/core/tgsi_exec.c +++ b/src/mesa/pipe/tgsi/core/tgsi_exec.c @@ -62,7 +62,7 @@ void tgsi_exec_machine_init( struct tgsi_exec_machine *mach, - struct tgsi_token *tokens, + const struct tgsi_token *tokens, GLuint numSamplers, struct tgsi_sampler *samplers) { @@ -1063,7 +1063,131 @@ fetch_texel( struct tgsi_sampler *sampler, } } +static void +constant_interpolation( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +static void +linear_interpolation( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = mach->Inputs[0].xyzw[0].f[i]; + const float y = mach->Inputs[0].xyzw[1].f[i]; + + mach->Inputs[attrib].xyzw[chan].f[i] = + mach->InterpCoefs[attrib].a0[chan] + + mach->InterpCoefs[attrib].dadx[chan] * x + + mach->InterpCoefs[attrib].dady[chan] * y; + } +} + +static void +perspective_interpolation( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = mach->Inputs[0].xyzw[0].f[i]; + const float y = mach->Inputs[0].xyzw[1].f[i]; + // WPOS.w here is really 1/w + const float w = 1.0f / mach->Inputs[0].xyzw[3].f[i]; + + mach->Inputs[attrib].xyzw[chan].f[i] = + (mach->InterpCoefs[attrib].a0[chan] + + mach->InterpCoefs[attrib].dadx[chan] * x + + mach->InterpCoefs[attrib].dady[chan] * y) * w; + } +} + +typedef void (* interpolation_func)( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration( + struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl ) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask, i, j; + interpolation_func interp; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + /* Do not touch WPOS.xy */ + if( first == 0 ) { + mask &= ~TGSI_WRITEMASK_XY; + if( mask == TGSI_WRITEMASK_NONE ) { + first++; + if( first > last ) { + return; + } + } + } + + switch( decl->Interpolation.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + interp = constant_interpolation; + break; + + case TGSI_INTERPOLATE_LINEAR: + interp = linear_interpolation; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = perspective_interpolation; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + interp( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + interp( mach, i, j ); + } + } + } + } + } + } +} static void exec_instruction( @@ -2161,6 +2285,7 @@ tgsi_exec_machine_run2( tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: + exec_declaration( mach, &parse.FullToken.FullDeclaration ); break; case TGSI_TOKEN_TYPE_IMMEDIATE: break; diff --git a/src/mesa/pipe/tgsi/core/tgsi_exec.h b/src/mesa/pipe/tgsi/core/tgsi_exec.h index eed2207d7d3..e5e8c3608ed 100644 --- a/src/mesa/pipe/tgsi/core/tgsi_exec.h +++ b/src/mesa/pipe/tgsi/core/tgsi_exec.h @@ -11,23 +11,27 @@ extern "C" { #endif // defined __cplusplus +#define NUM_CHANNELS 4 /* R,G,B,A */ +#define QUAD_SIZE 4 /* 4 pixel/quad */ + union tgsi_exec_channel { - float f[4]; - int i[4]; - unsigned u[4]; + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; }; struct tgsi_exec_vector { - union tgsi_exec_channel xyzw[4]; + union tgsi_exec_channel xyzw[NUM_CHANNELS]; }; - -#define NUM_CHANNELS 4 /* R,G,B,A */ -#ifndef QUAD_SIZE -#define QUAD_SIZE 4 /* 4 pixel/quad */ -#endif +struct tgsi_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; #define TEX_CACHE_TILE_SIZE 8 #define TEX_CACHE_NUM_ENTRIES 8 @@ -55,8 +59,8 @@ struct tgsi_sampler struct tgsi_exec_labels { - unsigned labels[128][2]; - unsigned count; + unsigned labels[128][2]; + unsigned count; }; #define TGSI_EXEC_TEMP_00000000_I 32 @@ -109,15 +113,15 @@ struct tgsi_exec_cond_state { struct tgsi_exec_cond_regs IfPortion; struct tgsi_exec_cond_regs ElsePortion; - unsigned Condition; - boolean WasElse; + unsigned Condition; + boolean WasElse; }; /* XXX: This is temporary */ struct tgsi_exec_cond_stack { struct tgsi_exec_cond_state States[8]; - unsigned Index; /* into States[] */ + unsigned Index; /* into States[] */ }; struct tgsi_exec_machine @@ -138,15 +142,19 @@ struct tgsi_exec_machine struct tgsi_sampler *Samplers; - float Imms[256][4]; - unsigned ImmLimit; - float (*Consts)[4]; - const struct tgsi_exec_vector *Inputs; + float Imms[256][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct tgsi_exec_vector *Inputs; struct tgsi_exec_vector *Outputs; - struct tgsi_token *Tokens; - unsigned Processor; + const struct tgsi_token *Tokens; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; - unsigned *Primitives; + /* FRAGMENT processor only. */ + const struct tgsi_interp_coef *InterpCoefs; struct tgsi_exec_cond_stack CondStack; #if XXX_SSE @@ -157,7 +165,7 @@ struct tgsi_exec_machine void tgsi_exec_machine_init( struct tgsi_exec_machine *mach, - struct tgsi_token *tokens, + const struct tgsi_token *tokens, unsigned numSamplers, struct tgsi_sampler *samplers); diff --git a/src/mesa/pipe/tgsi/core/tgsi_token.h b/src/mesa/pipe/tgsi/core/tgsi_token.h index f72c7dc4be4..3ed341fb7b1 100644 --- a/src/mesa/pipe/tgsi/core/tgsi_token.h +++ b/src/mesa/pipe/tgsi/core/tgsi_token.h @@ -52,15 +52,33 @@ struct tgsi_token #define TGSI_DECLARE_RANGE 0 #define TGSI_DECLARE_MASK 1 +#define TGSI_WRITEMASK_NONE 0x00 +#define TGSI_WRITEMASK_X 0x01 +#define TGSI_WRITEMASK_Y 0x02 +#define TGSI_WRITEMASK_XY 0x03 +#define TGSI_WRITEMASK_Z 0x04 +#define TGSI_WRITEMASK_XZ 0x05 +#define TGSI_WRITEMASK_YZ 0x06 +#define TGSI_WRITEMASK_XYZ 0x07 +#define TGSI_WRITEMASK_W 0x08 +#define TGSI_WRITEMASK_XW 0x09 +#define TGSI_WRITEMASK_YW 0x0A +#define TGSI_WRITEMASK_XYW 0x0B +#define TGSI_WRITEMASK_ZW 0x0C +#define TGSI_WRITEMASK_XZW 0x0D +#define TGSI_WRITEMASK_YZW 0x0E +#define TGSI_WRITEMASK_XYZW 0x0F + struct tgsi_declaration { unsigned Type : 4; /* TGSI_TOKEN_TYPE_DECLARATION */ unsigned Size : 8; /* UINT */ unsigned File : 4; /* TGSI_FILE_ */ unsigned Declare : 4; /* TGSI_DECLARE_ */ + unsigned UsageMask : 4; /* TGSI_WRITEMASK_ */ unsigned Interpolate : 1; /* BOOL */ unsigned Semantic : 1; /* BOOL */ - unsigned Padding : 9; + unsigned Padding : 5; unsigned Extended : 1; /* BOOL */ }; @@ -1226,23 +1244,6 @@ struct tgsi_instruction_ext_texture unsigned Extended : 1; /* BOOL */ }; -#define TGSI_WRITEMASK_NONE 0x00 -#define TGSI_WRITEMASK_X 0x01 -#define TGSI_WRITEMASK_Y 0x02 -#define TGSI_WRITEMASK_XY 0x03 -#define TGSI_WRITEMASK_Z 0x04 -#define TGSI_WRITEMASK_XZ 0x05 -#define TGSI_WRITEMASK_YZ 0x06 -#define TGSI_WRITEMASK_XYZ 0x07 -#define TGSI_WRITEMASK_W 0x08 -#define TGSI_WRITEMASK_XW 0x09 -#define TGSI_WRITEMASK_YW 0x0A -#define TGSI_WRITEMASK_XYW 0x0B -#define TGSI_WRITEMASK_ZW 0x0C -#define TGSI_WRITEMASK_XZW 0x0D -#define TGSI_WRITEMASK_YZW 0x0E -#define TGSI_WRITEMASK_XYZW 0x0F - struct tgsi_instruction_ext_predicate { unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_PREDICATE */ diff --git a/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c b/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c index 609d3292428..2ac04e8d56f 100644 --- a/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c +++ b/src/mesa/pipe/tgsi/mesa/mesa_to_tgsi.c @@ -467,13 +467,15 @@ static struct tgsi_full_declaration make_frag_input_decl( GLuint first, GLuint last, - GLuint interpolate ) + GLuint interpolate, + GLuint usage_mask ) { struct tgsi_full_declaration decl; decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; decl.Declaration.Declare = TGSI_DECLARE_RANGE; + decl.Declaration.UsageMask = usage_mask; decl.Declaration.Interpolate = 1; decl.u.DeclarationRange.First = first; decl.u.DeclarationRange.Last = last; @@ -485,13 +487,15 @@ make_frag_input_decl( static struct tgsi_full_declaration make_frag_output_decl( GLuint index, - GLuint semantic_name ) + GLuint semantic_name, + GLuint usage_mask ) { struct tgsi_full_declaration decl; decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_OUTPUT; decl.Declaration.Declare = TGSI_DECLARE_RANGE; + decl.Declaration.UsageMask = usage_mask; decl.Declaration.Semantic = 1; decl.u.DeclarationRange.First = index; decl.u.DeclarationRange.Last = index; @@ -514,6 +518,7 @@ tgsi_mesa_compile_fp_program( struct tgsi_full_dst_register *fulldst; struct tgsi_full_src_register *fullsrc; GLuint inputs_read; + GLboolean reads_wpos; GLuint preamble_size = 0; *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); @@ -523,19 +528,33 @@ tgsi_mesa_compile_fp_program( ti = 2; - /* - * Input 0 is always read, at least implicitly by the MOV instruction generated - * below, so mark it as used. - */ - inputs_read = program->Base.InputsRead | 1; + reads_wpos = program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS); + inputs_read = program->Base.InputsRead | (1 << FRAG_ATTRIB_WPOS); /* * Declare input attributes. Note that we do not interpolate fragment position. */ + + /* Fragment position. */ + if( reads_wpos ) { + fulldecl = make_frag_input_decl( + 0, + 0, + TGSI_INTERPOLATE_CONSTANT, + TGSI_WRITEMASK_XY ); + ti += tgsi_build_full_declaration( + &fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + + /* Fragment zw. */ fulldecl = make_frag_input_decl( 0, 0, - TGSI_INTERPOLATE_CONSTANT ); + TGSI_INTERPOLATE_LINEAR, + reads_wpos ? TGSI_WRITEMASK_ZW : TGSI_WRITEMASK_Z ); ti += tgsi_build_full_declaration( &fulldecl, &tokens[ti], @@ -552,7 +571,8 @@ tgsi_mesa_compile_fp_program( fulldecl = make_frag_input_decl( 1, 1 + count - 1, - TGSI_INTERPOLATE_LINEAR ); + TGSI_INTERPOLATE_LINEAR, + TGSI_WRITEMASK_XYZW ); ti += tgsi_build_full_declaration( &fulldecl, &tokens[ti], @@ -569,7 +589,8 @@ tgsi_mesa_compile_fp_program( fulldecl = make_frag_output_decl( 0, - TGSI_SEMANTIC_DEPTH ); + TGSI_SEMANTIC_DEPTH, + TGSI_WRITEMASK_Z ); ti += tgsi_build_full_declaration( &fulldecl, &tokens[ti], @@ -579,7 +600,8 @@ tgsi_mesa_compile_fp_program( if( program->Base.OutputsWritten & (1 << FRAG_RESULT_COLR) ) { fulldecl = make_frag_output_decl( 1, - TGSI_SEMANTIC_COLOR ); + TGSI_SEMANTIC_COLOR, + TGSI_WRITEMASK_XYZW ); ti += tgsi_build_full_declaration( &fulldecl, &tokens[ti], @@ -587,38 +609,6 @@ tgsi_mesa_compile_fp_program( maxTokens - ti ); } - /* - * Copy input fragment xyz to output xyz. - * If the shader writes depth, do not copy the z component. - */ - - fullinst = tgsi_default_full_instruction(); - - fullinst.Instruction.Opcode = TGSI_OPCODE_MOV; - fullinst.Instruction.NumDstRegs = 1; - fullinst.Instruction.NumSrcRegs = 1; - - fulldst = &fullinst.FullDstRegisters[0]; - fulldst->DstRegister.File = TGSI_FILE_OUTPUT; - fulldst->DstRegister.Index = 0; - if( program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR) ) { - fulldst->DstRegister.WriteMask = TGSI_WRITEMASK_XY; - } - else { - fulldst->DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; - } - - fullsrc = &fullinst.FullSrcRegisters[0]; - fullsrc->SrcRegister.File = TGSI_FILE_INPUT; - fullsrc->SrcRegister.Index = 0; - - ti += tgsi_build_full_instruction( - &fullinst, - &tokens[ti], - header, - maxTokens - ti ); - preamble_size++; - for( i = 0; i < program->Base.NumInstructions; i++ ) { if( compile_instruction( &program->Base.Instructions[i], -- 2.30.2