From e785f190f0d49f0367f7468c22b77962d0f14ea0 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 14 Dec 2007 11:00:46 -0700 Subject: [PATCH] Don't always declare frag shader INPUT[0] as fragment position. We were doing this for the sake of softpipe and the tgsi intergrepter since we always need the fragment position and W-coordinate information in order to compute fragment interpolants. But that's not appropriate for hardware drivers. The tgsi interpreter now get x,y,w information from a separate tgsi_exec_vector variable setup by softpipe. The new pipe_shader_state->input_map[] defines how vert shader outputs map to frag shader inputs. It may go away though, since one can also examine the semantic label on frag shader input[0] to figure things out. --- src/mesa/pipe/p_state.h | 1 + src/mesa/pipe/softpipe/sp_context.h | 2 - src/mesa/pipe/softpipe/sp_headers.h | 1 + src/mesa/pipe/softpipe/sp_prim_setup.c | 339 ++++++++++++++-------- src/mesa/pipe/softpipe/sp_quad_earlyz.c | 6 +- src/mesa/pipe/softpipe/sp_quad_fs.c | 68 +++-- src/mesa/pipe/softpipe/sp_state_derived.c | 12 +- src/mesa/pipe/tgsi/exec/tgsi_exec.c | 27 +- src/mesa/pipe/tgsi/exec/tgsi_exec.h | 2 +- src/mesa/state_tracker/st_atom_shader.c | 25 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 46 +-- src/mesa/state_tracker/st_program.c | 25 +- 12 files changed, 324 insertions(+), 230 deletions(-) diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h index 4e42838f1db..43b710ff3b6 100644 --- a/src/mesa/pipe/p_state.h +++ b/src/mesa/pipe/p_state.h @@ -139,6 +139,7 @@ struct pipe_shader_state { const struct tgsi_token *tokens; ubyte num_inputs; ubyte num_outputs; + ubyte input_map[PIPE_MAX_SHADER_INPUTS]; /* XXX this may be temporary */ ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ diff --git a/src/mesa/pipe/softpipe/sp_context.h b/src/mesa/pipe/softpipe/sp_context.h index 2c038de5f7b..8fd44933f26 100644 --- a/src/mesa/pipe/softpipe/sp_context.h +++ b/src/mesa/pipe/softpipe/sp_context.h @@ -110,8 +110,6 @@ struct softpipe_context { struct vertex_info vertex_info; unsigned attr_mask; unsigned nr_frag_attrs; /**< number of active fragment attribs */ - boolean need_z; /**< produce quad/fragment Z values? */ - boolean need_w; /**< produce quad/fragment W values? */ int psize_slot; #if 0 diff --git a/src/mesa/pipe/softpipe/sp_headers.h b/src/mesa/pipe/softpipe/sp_headers.h index b9f2b2205a8..0ae31d87961 100644 --- a/src/mesa/pipe/softpipe/sp_headers.h +++ b/src/mesa/pipe/softpipe/sp_headers.h @@ -73,6 +73,7 @@ struct quad_header { float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ const struct tgsi_interp_coef *coef; + const struct tgsi_interp_coef *posCoef; unsigned nr_attrs; }; diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c index fc96f92af1e..8d8dceadc5c 100644 --- a/src/mesa/pipe/softpipe/sp_prim_setup.c +++ b/src/mesa/pipe/softpipe/sp_prim_setup.c @@ -36,10 +36,12 @@ #include "sp_context.h" #include "sp_headers.h" #include "sp_quad.h" +#include "sp_state.h" #include "sp_prim_setup.h" #include "pipe/draw/draw_private.h" #include "pipe/draw/draw_vertex.h" #include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" #define DEBUG_VERTS 0 @@ -80,8 +82,11 @@ struct setup_stage { float oneoverarea; struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ struct quad_header quad; + uint firstFpInput; /** Semantic type of first frag input */ + struct { int left[2]; /**< [0] = row0, [1] = row1 */ int right[2]; @@ -365,18 +370,17 @@ static boolean setup_sort_vertices( struct setup_stage *setup, * \param i which component of the slot (0..3) */ static void const_coeff( struct setup_stage *setup, - unsigned slot, - unsigned i ) + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup->coef[slot].dadx[i] = 0; - setup->coef[slot].dady[i] = 0; + coef->dadx[i] = 0; + coef->dady[i] = 0; /* need provoking vertex info! */ - setup->coef[slot].a0[i] = setup->vprovoke->data[slot][i]; + coef->a0[i] = setup->vprovoke->data[vertSlot][i]; } @@ -385,19 +389,20 @@ static void const_coeff( struct setup_stage *setup, * for a triangle. */ static void tri_linear_coeff( struct setup_stage *setup, - unsigned slot, - unsigned i) + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - float botda = setup->vmid->data[slot][i] - setup->vmin->data[slot][i]; - float majda = setup->vmax->data[slot][i] - setup->vmin->data[slot][i]; + float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; + float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; float a = setup->ebot.dy * majda - botda * setup->emaj.dy; float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - - assert(slot < PIPE_MAX_SHADER_INPUTS); + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + assert(i <= 3); - setup->coef[slot].dadx[i] = a * setup->oneoverarea; - setup->coef[slot].dady[i] = b * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at @@ -411,9 +416,9 @@ static void tri_linear_coeff( struct setup_stage *setup, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - setup->coef[slot].a0[i] = (setup->vmin->data[slot][i] - - (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + - setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f))); + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); /* _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", @@ -434,39 +439,68 @@ static void tri_linear_coeff( struct setup_stage *setup, * divide the interpolated value by the interpolated W at that fragment. */ static void tri_persp_coeff( struct setup_stage *setup, - unsigned slot, - unsigned i ) + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - /* premultiply by 1/w: + /* premultiply by 1/w (v->data[0][3] is always W): */ - float mina = setup->vmin->data[slot][i] * setup->vmin->data[0][3]; - float mida = setup->vmid->data[slot][i] * setup->vmid->data[0][3]; - float maxa = setup->vmax->data[slot][i] * setup->vmax->data[0][3]; - + float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3]; + float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; float botda = mida - mina; float majda = maxa - mina; float a = setup->ebot.dy * majda - botda * setup->emaj.dy; float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; /* - printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup->vmin->data[slot][i], - setup->vmid->data[slot][i], - setup->vmax->data[slot][i] + printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin->data[vertSlot][i], + setup->vmid->data[vertSlot][i], + setup->vmax->data[vertSlot][i] ); */ - - assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup->coef[slot].dadx[i] = a * setup->oneoverarea; - setup->coef[slot].dady[i] = b * setup->oneoverarea; - setup->coef[slot].a0[i] = (mina - - (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + - setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f))); + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_stage *setup) +{ + const int winHeight = setup->softpipe->framebuffer.cbufs[0]->height; + /*X*/ + setup->coef[0].a0[0] = 0; + setup->coef[0].dadx[0] = 1.0; + setup->coef[0].dady[0] = 0.0; + /*Y*/ + setup->coef[0].a0[1] = winHeight - 1; + setup->coef[0].dadx[1] = 0.0; + setup->coef[0].dady[1] = -1.0; + /*Z*/ + setup->coef[0].a0[2] = setup->posCoef.a0[2]; + setup->coef[0].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[0].dady[2] = setup->posCoef.dady[2]; + /*w*/ + setup->coef[0].a0[3] = setup->posCoef.a0[3]; + setup->coef[0].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[0].dady[3] = setup->posCoef.dady[3]; } + /** * Compute the setup->coef[] array dadx, dady, a0 values. * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. @@ -474,36 +508,67 @@ static void tri_persp_coeff( struct setup_stage *setup, static void setup_tri_coefficients( struct setup_stage *setup ) { const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode; - unsigned slot, j; +#define USE_INPUT_MAP 0 +#if USE_INPUT_MAP + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; +#endif + uint fragSlot; /* z and w are done by linear interpolation: */ - tri_linear_coeff(setup, 0, 2); - tri_linear_coeff(setup, 0, 3); + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); /* setup interpolation for all the remaining attributes: */ - for (slot = 1; slot < setup->quad.nr_attrs; slot++) { - switch (interp[slot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, slot, j); - break; - - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - tri_linear_coeff(setup, slot, j); - break; - - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - tri_persp_coeff(setup, slot, j); - break; - - default: - /* invalid interp mode */ - assert(0); + for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) { + /* which vertex output maps to this fragment input: */ +#if !USE_INPUT_MAP + uint vertSlot; + if (setup->firstFpInput == TGSI_SEMANTIC_POSITION) { + if (fragSlot == 0) { + setup_fragcoord_coeff(setup); + continue; + } + vertSlot = fragSlot; + } + else { + vertSlot = fragSlot + 1; } + +#else + uint vertSlot = fs->input_map[fragSlot]; + + if (vertSlot == 0) { + /* special case: shader is reading gl_FragCoord */ + /* XXX with a new INTERP_POSITION token, we could just add a + * new case to the switch below. + */ + setup_fragcoord_coeff(setup); + } + else { +#endif + uint j; + switch (interp[vertSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + default: + /* invalid interp mode */ + assert(0); + } +#if USE_INPUT_MAP + } +#endif } } @@ -660,17 +725,18 @@ static void setup_tri( struct draw_stage *stage, * for a line. */ static void -line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i) +line_linear_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - const float da = setup->vmax->data[slot][i] - setup->vmin->data[slot][i]; + const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; const float dadx = da * setup->emaj.dx * setup->oneoverarea; const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef[slot].dadx[i] = dadx; - setup->coef[slot].dady[i] = dady; - setup->coef[slot].a0[i] - = (setup->vmin->data[slot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); } @@ -679,21 +745,21 @@ line_linear_coeff(struct setup_stage *setup, unsigned slot, unsigned i) * for a line. */ static void -line_persp_coeff(struct setup_stage *setup, unsigned slot, unsigned i) +line_persp_coeff(struct setup_stage *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin->data[slot][i] * setup->vmin->data[0][3]; - const float a1 = setup->vmax->data[slot][i] * setup->vmin->data[0][3]; + const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; + const float a1 = setup->vmax->data[vertSlot][i] * setup->vmin->data[0][3]; const float da = a1 - a0; const float dadx = da * setup->emaj.dx * setup->oneoverarea; const float dady = da * setup->emaj.dy * setup->oneoverarea; - setup->coef[slot].dadx[i] = dadx; - setup->coef[slot].dady[i] = dady; - setup->coef[slot].a0[i] - = (setup->vmin->data[slot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); - + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin->data[vertSlot][i] - + (dadx * (setup->vmin->data[0][0] - 0.5f) + + dady * (setup->vmin->data[0][1] - 0.5f))); } @@ -705,7 +771,8 @@ static INLINE void setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) { const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode; - unsigned slot, j; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; + unsigned fragSlot; /* use setup->vmin, vmax to point to vertices */ setup->vprovoke = prim->v[1]; @@ -720,31 +787,39 @@ setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) /* z and w are done by linear interpolation: */ - line_linear_coeff(setup, 0, 2); - line_linear_coeff(setup, 0, 3); + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); /* setup interpolation for all the remaining attributes: */ - for (slot = 1; slot < setup->quad.nr_attrs; slot++) { - switch (interp[slot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, slot, j); - break; - - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - line_linear_coeff(setup, slot, j); - break; - - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - line_persp_coeff(setup, slot, j); - break; - - default: - /* invalid interp mode */ - assert(0); + for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) { + /* which vertex output maps to this fragment input: */ + uint vertSlot = fs->input_map[fragSlot]; + + if (vertSlot == 0) { + /* special case: shader is reading gl_FragCoord */ + setup_fragcoord_coeff(setup); + } + else { + uint j; + switch (interp[vertSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + + default: + /* invalid interp mode */ + assert(0); + } } } } @@ -910,14 +985,15 @@ setup_line(struct draw_stage *stage, struct prim_header *prim) static void -point_persp_coeff(struct setup_stage *setup, const struct vertex_header *vert, - uint slot, uint i) +point_persp_coeff(struct setup_stage *setup, + const struct vertex_header *vert, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { - assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup->coef[slot].dadx[i] = 0.0F; - setup->coef[slot].dady[i] = 0.0F; - setup->coef[slot].a0[i] = vert->data[slot][i] * vert->data[0][3]; + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3]; } @@ -930,6 +1006,7 @@ static void setup_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; const enum interp_mode *interp = setup->softpipe->vertex_info.interp_mode; const struct vertex_header *v0 = prim->v[0]; const int sizeAttr = setup->softpipe->psize_slot; @@ -940,7 +1017,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; const float x = v0->data[0][0]; /* Note: data[0] is always position */ const float y = v0->data[0][1]; - unsigned slot, j; + uint fragSlot; /* For points, all interpolants are constant-valued. * However, for point sprites, we'll need to setup texcoords appropriately. @@ -959,22 +1036,36 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) * probably should be ruled out on that basis. */ setup->vprovoke = prim->v[0]; - const_coeff(setup, 0, 2); - const_coeff(setup, 0, 3); - for (slot = 1; slot < setup->quad.nr_attrs; slot++) { - switch (interp[slot]) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, slot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - point_persp_coeff(setup, v0, slot, j); - break; - default: - assert(0); + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < setup->quad.nr_attrs; fragSlot++) { + /* which vertex output maps to this fragment input: */ + uint vertSlot = fs->input_map[fragSlot]; + + if (vertSlot == 0) { + /* special case: shader is reading gl_FragCoord */ + setup_fragcoord_coeff(setup); + } + else { + uint j; + switch (interp[vertSlot]) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + default: + assert(0); + } } } @@ -1108,9 +1199,12 @@ static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); struct softpipe_context *sp = setup->softpipe; + const struct pipe_shader_state *fs = &setup->softpipe->fs->shader; setup->quad.nr_attrs = setup->softpipe->nr_frag_attrs; + setup->firstFpInput = fs->input_semantic_name[0]; + sp->quad.first->begin(sp->quad.first); } @@ -1151,6 +1245,7 @@ struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) setup->stage.destroy = render_destroy; setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; return &setup->stage; } diff --git a/src/mesa/pipe/softpipe/sp_quad_earlyz.c b/src/mesa/pipe/softpipe/sp_quad_earlyz.c index 3abd1f1fb98..22ea99049fe 100644 --- a/src/mesa/pipe/softpipe/sp_quad_earlyz.c +++ b/src/mesa/pipe/softpipe/sp_quad_earlyz.c @@ -47,9 +47,9 @@ earlyz_quad( { const float fx = (float) quad->x0; const float fy = (float) quad->y0; - const float dzdx = quad->coef[0].dadx[2]; - const float dzdy = quad->coef[0].dady[2]; - const float z0 = quad->coef[0].a0[2] + dzdx * fx + dzdy * fy; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; quad->outputs.depth[0] = z0; quad->outputs.depth[1] = z0 + dzdx; diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c index da590b256aa..6e7e7eb074e 100644 --- a/src/mesa/pipe/softpipe/sp_quad_fs.c +++ b/src/mesa/pipe/softpipe/sp_quad_fs.c @@ -74,15 +74,49 @@ quad_shade_stage(struct quad_stage *qs) } +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * Note that we only need to "compute" X and Y for the upper-left fragment. + * We could do less work if we're not depth testing, or there's no + * perspective-corrected attributes, but that's seldom. + */ +static void +setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + /* do Y */ + quadpos->xyzw[1].f[0] = y; + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + typedef void (XSTDCALL *codegen_function)( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, - const struct tgsi_interp_coef *coef ); + const struct tgsi_interp_coef *coef +#if 0 + ,const struct tgsi_exec_vector *quadPos +#endif + ); + -/* This should be done by the fragment shader execution unit (code - * generated from the decl instructions). Do it here for now. +/** + * Execute fragment shader for the four fragments in the quad. */ static void shade_quad( @@ -91,33 +125,15 @@ shade_quad( { struct quad_shade_stage *qss = quad_shade_stage( qs ); struct softpipe_context *softpipe = qs->softpipe; - const float fx = (float) quad->x0; - const float fy = (float) quad->y0; struct tgsi_exec_machine *machine = &qss->machine; - /* Consts does not require 16 byte alignment. */ + /* Consts do not require 16 byte alignment. */ machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; machine->InterpCoefs = quad->coef; -#if 1 /* XXX only do this if the fp really reads fragment.position */ - machine->Inputs[0].xyzw[0].f[0] = fx; - machine->Inputs[0].xyzw[0].f[1] = fx + 1.0f; - machine->Inputs[0].xyzw[0].f[2] = fx; - machine->Inputs[0].xyzw[0].f[3] = fx + 1.0f; - - /* XXX for OpenGL we need to invert the Y pos here (y=0=top). - * but that'll mess up linear/perspective interpolation of other - * attributes... - */ - machine->Inputs[0].xyzw[1].f[0] = fy; - machine->Inputs[0].xyzw[1].f[1] = fy; - machine->Inputs[0].xyzw[1].f[2] = fy + 1.0f; - machine->Inputs[0].xyzw[1].f[3] = fy + 1.0f; -#endif - - machine->QuadX = quad->x0; - machine->QuadY = quad->y0; + /* Compute X, Y, Z, W vals for this quad */ + setup_pos_vector(quad->posCoef, quad->x0, quad->y0, &machine->QuadPos); /* run shader */ #if defined(__i386__) || defined(__386__) @@ -130,9 +146,9 @@ shade_quad( machine->Temps, machine->InterpCoefs #if 0 - ,quad->x0, quad->y0 + ,machine->QuadPos #endif - ); + ); quad->mask &= ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); } else diff --git a/src/mesa/pipe/softpipe/sp_state_derived.c b/src/mesa/pipe/softpipe/sp_state_derived.c index c4f1a0a01a9..736ac1c33b2 100644 --- a/src/mesa/pipe/softpipe/sp_state_derived.c +++ b/src/mesa/pipe/softpipe/sp_state_derived.c @@ -51,18 +51,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) memset(vinfo, 0, sizeof(*vinfo)); - if (softpipe->depth_stencil->depth.enabled) - softpipe->need_z = TRUE; - else - softpipe->need_z = FALSE; - softpipe->need_w = FALSE; if (fs->input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { /* Need Z if depth test is enabled or the fragment program uses the * fragment position (XYZW). */ - softpipe->need_z = TRUE; - softpipe->need_w = TRUE; } softpipe->psize_slot = -1; @@ -121,7 +114,6 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ draw_emit_vertex_attr(vinfo, FORMAT_4F, INTERP_PERSPECTIVE); - softpipe->need_w = TRUE; break; default: @@ -129,7 +121,11 @@ static void calculate_vertex_layout( struct softpipe_context *softpipe ) } } +#if 00 softpipe->nr_frag_attrs = vinfo->num_attribs; +#else + softpipe->nr_frag_attrs = fs->num_inputs; +#endif /* We want these after all other attribs since they won't get passed * to the fragment shader. All prior vertex output attribs should match diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c index 03997f90996..1f43f3643ef 100644 --- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c @@ -1352,7 +1352,8 @@ linear_interpolation( unsigned attrib, unsigned chan ) { - const float x = mach->QuadX, y = mach->QuadY; + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; const float dadx = mach->InterpCoefs[attrib].dadx[chan]; const float dady = mach->InterpCoefs[attrib].dady[chan]; const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; @@ -1368,14 +1369,17 @@ perspective_interpolation( unsigned attrib, unsigned chan ) { - const float x = mach->QuadX, y = mach->QuadY; + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; const float dadx = mach->InterpCoefs[attrib].dadx[chan]; const float dady = mach->InterpCoefs[attrib].dady[chan]; const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / mach->Inputs[0].xyzw[3].f[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / mach->Inputs[0].xyzw[3].f[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / mach->Inputs[0].xyzw[3].f[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / mach->Inputs[0].xyzw[3].f[3]; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; } @@ -1400,17 +1404,6 @@ exec_declaration( last = decl->u.DeclarationRange.Last; mask = decl->Declaration.UsageMask; - /* Do not touch WPOS.xy */ - if( first == 0 ) { - mask &= ~TGSI_WRITEMASK_XY; - if( mask == TGSI_WRITEMASK_NONE ) { - first++; - if( first > last ) { - return; - } - } - } - switch( decl->Interpolation.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: interp = constant_interpolation; diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.h b/src/mesa/pipe/tgsi/exec/tgsi_exec.h index 8d166bb5f42..db92e282dfb 100644 --- a/src/mesa/pipe/tgsi/exec/tgsi_exec.h +++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.h @@ -170,7 +170,6 @@ struct tgsi_exec_machine struct tgsi_exec_vector *Inputs; struct tgsi_exec_vector *Outputs; const struct tgsi_token *Tokens; - float QuadX, QuadY; /**< for frag progs only */ unsigned Processor; /* GEOMETRY processor only. */ @@ -178,6 +177,7 @@ struct tgsi_exec_machine /* FRAGMENT processor only. */ const struct tgsi_interp_coef *InterpCoefs; + struct tgsi_exec_vector QuadPos; /* Conditional execution masks */ uint CondMask; /**< For IF/ELSE/ENDIF */ diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 4ec10badad3..33372b0f399 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -151,8 +151,7 @@ find_translated_vp(struct st_context *st, { static const GLuint UNUSED = ~0; struct translated_vertex_program *xvp; - const GLbitfield fragInputsRead - = stfp->Base.Base.InputsRead | FRAG_BIT_WPOS; + const GLbitfield fragInputsRead = stfp->Base.Base.InputsRead; /* * Translate fragment program if needed. @@ -206,6 +205,7 @@ find_translated_vp(struct st_context *st, if (xvp->serialNo != stvp->serialNo) { GLuint outAttr, dummySlot; const GLbitfield outputsWritten = stvp->Base.Base.OutputsWritten; + GLuint numVpOuts = 0; /* Compute mapping of vertex program outputs to slots, which depends * on the fragment program's input->slot mapping. @@ -214,11 +214,24 @@ find_translated_vp(struct st_context *st, /* set default: */ xvp->output_to_slot[outAttr] = UNUSED; - if (outputsWritten & (1 << outAttr)) { + if (outAttr == VERT_RESULT_HPOS) { + /* always put xformed position into slot zero */ + xvp->output_to_slot[VERT_RESULT_HPOS] = 0; + numVpOuts++; + } + else if (outputsWritten & (1 << outAttr)) { /* see if the frag prog wants this vert output */ - GLint fpIn = vp_out_to_fp_in(outAttr); - if (fpIn >= 0) { - xvp->output_to_slot[outAttr] = stfp->input_to_slot[fpIn]; + GLint fpInAttrib = vp_out_to_fp_in(outAttr); + if (fpInAttrib >= 0) { + GLuint fpInSlot = stfp->input_to_slot[fpInAttrib]; + GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot]; + xvp->output_to_slot[outAttr] = vpOutSlot; + numVpOuts++; + } + else if (outAttr == VERT_RESULT_BFC0 || + outAttr == VERT_RESULT_BFC1) { + /* backface colors go into last slots */ + xvp->output_to_slot[outAttr] = numVpOuts++; } } } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index b392edf16d2..27dab5b9c06 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -675,44 +675,22 @@ tgsi_translate_mesa_program( if (procType == TGSI_PROCESSOR_FRAGMENT) { for (i = 0; i < numInputs; i++) { struct tgsi_full_declaration fulldecl; - switch (inputSemanticName[i]) { - case TGSI_SEMANTIC_POSITION: - /* Fragment XY pos */ - fulldecl = make_input_decl(i, - GL_TRUE, TGSI_INTERPOLATE_CONSTANT, - TGSI_WRITEMASK_XY, - GL_TRUE, TGSI_SEMANTIC_POSITION, 0 ); - ti += tgsi_build_full_declaration( - &fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - /* Fragment ZW pos */ - fulldecl = make_input_decl(i, - GL_TRUE, TGSI_INTERPOLATE_LINEAR, - TGSI_WRITEMASK_ZW, - GL_TRUE, TGSI_SEMANTIC_POSITION, 0 ); - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - break; - default: - fulldecl = make_input_decl(i, - GL_TRUE, interpMode[i], - TGSI_WRITEMASK_XYZW, - GL_TRUE, inputSemanticName[i], - inputSemanticIndex[i]); - ti += tgsi_build_full_declaration(&fulldecl, - &tokens[ti], - header, - maxTokens - ti ); - break; - } + fulldecl = make_input_decl(i, + GL_TRUE, interpMode[i], + TGSI_WRITEMASK_XYZW, + GL_TRUE, inputSemanticName[i], + inputSemanticIndex[i]); + ti += tgsi_build_full_declaration(&fulldecl, + &tokens[ti], + header, + maxTokens - ti ); } } else { /* vertex prog */ + /* XXX: this could probaby be merged with the clause above. + * the only difference is the semantic tags. + */ for (i = 0; i < numInputs; i++) { struct tgsi_full_declaration fulldecl; fulldecl = make_input_decl(i, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index e64bf14d56a..fe22233c937 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -47,7 +47,7 @@ #include "st_mesa_to_tgsi.h" -#define TGSI_DEBUG 0 +#define TGSI_DEBUG 01 /** @@ -283,16 +283,17 @@ st_translate_fragment_program(struct st_context *st, const struct cso_fragment_shader *cso; GLuint interpMode[16]; /* XXX size? */ GLuint attr; - GLbitfield inputsRead = stfp->Base.Base.InputsRead; - - /* For software rendering, we always need the fragment input position - * in order to calculate interpolated values. - * For i915, we always want to emit the semantic info for position. - */ - inputsRead |= FRAG_BIT_WPOS; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; + GLuint vslot = 0; memset(&fs, 0, sizeof(fs)); + /* which vertex output goes to the first fragment input: */ + if (inputsRead & FRAG_BIT_WPOS) + vslot = 0; + else + vslot = 1; + /* * Convert Mesa program inputs to TGSI input register semantics. */ @@ -300,15 +301,17 @@ st_translate_fragment_program(struct st_context *st, if (inputsRead & (1 << attr)) { const GLuint slot = fs.num_inputs; - fs.num_inputs++; - defaultInputMapping[attr] = slot; + fs.input_map[slot] = vslot++; + + fs.num_inputs++; + switch (attr) { case FRAG_ATTRIB_WPOS: fs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; fs.input_semantic_index[slot] = 0; - interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; break; case FRAG_ATTRIB_COL0: fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; -- 2.30.2