From 8e2badfc269082f4b52a82ac1c5b4350bef0d01b Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Sun, 5 Sep 2010 05:42:59 +0200 Subject: [PATCH] nvfx: add rewritten swtnl support The old swtnl code was broken by the new shader linkage support for GLSL. This is a rewrite of swtnl support, which should instead work properly, be faster and more closer to the much more tested hardware pipeline. --- src/gallium/drivers/nvfx/nvfx_context.h | 12 +- src/gallium/drivers/nvfx/nvfx_draw.c | 236 +++++---------------- src/gallium/drivers/nvfx/nvfx_fragprog.c | 2 +- src/gallium/drivers/nvfx/nvfx_screen.c | 2 +- src/gallium/drivers/nvfx/nvfx_state.h | 19 +- src/gallium/drivers/nvfx/nvfx_state_emit.c | 31 +-- src/gallium/drivers/nvfx/nvfx_vbo.c | 38 ++++ src/gallium/drivers/nvfx/nvfx_vertprog.c | 161 +++++++++----- 8 files changed, 238 insertions(+), 263 deletions(-) diff --git a/src/gallium/drivers/nvfx/nvfx_context.h b/src/gallium/drivers/nvfx/nvfx_context.h index b837437c58f..369c2163882 100644 --- a/src/gallium/drivers/nvfx/nvfx_context.h +++ b/src/gallium/drivers/nvfx/nvfx_context.h @@ -151,14 +151,6 @@ struct nvfx_context { /* HW state derived from pipe states */ struct nvfx_state state; - struct { - struct nvfx_vertex_program *vertprog; - - unsigned nr_attribs; - unsigned hw[PIPE_MAX_SHADER_INPUTS]; - unsigned draw[PIPE_MAX_SHADER_INPUTS]; - unsigned emit[PIPE_MAX_SHADER_INPUTS]; - } swtnl; enum { HW, SWTNL, SWRAST @@ -170,7 +162,7 @@ struct nvfx_context { struct pipe_scissor_state scissor; unsigned stipple[32]; struct pipe_clip_state clip; - struct nvfx_vertex_program *vertprog; + struct nvfx_pipe_vertex_program *vertprog; struct nvfx_pipe_fragment_program *fragprog; struct pipe_resource *constbuf[PIPE_SHADER_TYPES]; unsigned constbuf_nr[PIPE_SHADER_TYPES]; @@ -208,6 +200,7 @@ struct nvfx_context { int hw_pointsprite_control; int hw_vp_output; struct nvfx_fragment_program* hw_fragprog; + struct nvfx_vertex_program* hw_vertprog; unsigned relocs_needed; }; @@ -326,6 +319,7 @@ extern void nvfx_init_transfer_functions(struct pipe_context *pipe); /* nvfx_vbo.c */ extern boolean nvfx_vbo_validate(struct nvfx_context *nvfx); +extern void nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx); extern void nvfx_vbo_relocate(struct nvfx_context *nvfx); extern void nvfx_idxbuf_validate(struct nvfx_context* nvfx); extern void nvfx_idxbuf_relocate(struct nvfx_context* nvfx); diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c index 2601d5b8e2e..4bf38a9c181 100644 --- a/src/gallium/drivers/nvfx/nvfx_draw.c +++ b/src/gallium/drivers/nvfx/nvfx_draw.c @@ -1,6 +1,5 @@ #include "pipe/p_shader_tokens.h" #include "util/u_inlines.h" -#include "tgsi/tgsi_ureg.h" #include "util/u_pack_color.h" @@ -11,11 +10,6 @@ #include "nvfx_context.h" #include "nvfx_resource.h" -/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very - * often at all. Uses "quadro style" vertex submission + a fixed vertex - * layout to avoid the need to generate a vertex program or vtxfmt. - */ - struct nvfx_render_stage { struct draw_stage stage; struct nvfx_context *nvfx; @@ -28,58 +22,18 @@ nvfx_render_stage(struct draw_stage *stage) return (struct nvfx_render_stage *)stage; } -static INLINE void -nvfx_render_vertex(struct nvfx_context *nvfx, const struct vertex_header *v) +static void +nvfx_render_flush(struct draw_stage *stage, unsigned flags) { - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; + struct nvfx_render_stage *rs = nvfx_render_stage(stage); + struct nvfx_context *nvfx = rs->nvfx; + struct nouveau_channel *chan = nvfx->screen->base.channel; - for (i = 0; i < nvfx->swtnl.nr_attribs; i++) { - unsigned idx = nvfx->swtnl.draw[i]; - unsigned hw = nvfx->swtnl.hw[i]; - - WAIT_RING(chan, 5); - switch (nvfx->swtnl.emit[i]) { - case EMIT_OMIT: - break; - case EMIT_1F: - nvfx_emit_vtx_attr(chan, hw, v->data[idx], 1); - break; - case EMIT_2F: - nvfx_emit_vtx_attr(chan, hw, v->data[idx], 2); - break; - case EMIT_3F: - nvfx_emit_vtx_attr(chan, hw, v->data[idx], 3); - break; - case EMIT_4F: - nvfx_emit_vtx_attr(chan, hw, v->data[idx], 4); - break; - case 0xff: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4F_X(hw), 4); - OUT_RING (chan, fui(v->data[idx][0] / v->data[idx][3])); - OUT_RING (chan, fui(v->data[idx][1] / v->data[idx][3])); - OUT_RING (chan, fui(v->data[idx][2] / v->data[idx][3])); - OUT_RING (chan, fui(1.0f / v->data[idx][3])); - break; - case EMIT_4UB: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][0]), - float_to_ubyte(v->data[idx][1]), - float_to_ubyte(v->data[idx][2]), - float_to_ubyte(v->data[idx][3]))); - case EMIT_4UB_BGRA: - BEGIN_RING(chan, eng3d, NV34TCL_VTX_ATTR_4UB(hw), 1); - OUT_RING (chan, pack_ub4(float_to_ubyte(v->data[idx][2]), - float_to_ubyte(v->data[idx][1]), - float_to_ubyte(v->data[idx][0]), - float_to_ubyte(v->data[idx][3]))); - break; - default: - assert(0); - break; - } + if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { + assert(AVAIL_RING(chan) >= 2); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, NV34TCL_VERTEX_BEGIN_END_STOP); + rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; } } @@ -92,42 +46,61 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim, struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - unsigned i; + boolean no_elements = nvfx->vertprog->draw_no_elements; + unsigned num_attribs = nvfx->vertprog->draw_elements; - /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - NOUVEAU_ERR("AIII, missed flush\n"); - assert(0); - } + /* we need to account the flush as well here even if it is done afterthis + * function + */ + if (AVAIL_RING(chan) < ((1 + count * num_attribs * 4) + 6 + 64)) { + nvfx_render_flush(stage, 0); FIRE_RING(chan); nvfx_state_emit(nvfx); + + assert(AVAIL_RING(chan) >= ((1 + count * num_attribs * 4) + 6 + 64)); } /* Switch primitive modes if necessary */ if (rs->prim != mode) { if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); + OUT_RING(chan, NV34TCL_VERTEX_BEGIN_END_STOP); + } + + /* XXX: any command a lot of times seems to (mostly) fix corruption that would otherwise happen */ + /* this seems to cause issues on nv3x, and also be unneeded there */ + if(nvfx->is_nv4x) + { + int i; + for(i = 0; i < 32; ++i) + { + OUT_RING(chan, RING_3D(0x1dac, 1)); + OUT_RING(chan, 0); + } } - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING(chan, RING_3D(NV34TCL_VERTEX_BEGIN_END, 1)); OUT_RING (chan, mode); rs->prim = mode; } - /* Emit vertex data */ - for (i = 0; i < count; i++) - nvfx_render_vertex(nvfx, prim->v[i]); - - /* If it's likely we'll need to empty the push buffer soon, finish - * off the primitive now. - */ - if (AVAIL_RING(chan) < ((count * 20) + 6)) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); - rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; + OUT_RING(chan, RING_3D_NI(NV34TCL_VERTEX_DATA, num_attribs * 4 * count)); + if(no_elements) { + OUT_RING(chan, 0); + OUT_RING(chan, 0); + OUT_RING(chan, 0); + OUT_RING(chan, 0); + } else { + for (unsigned i = 0; i < count; ++i) + { + struct vertex_header* v = prim->v[i]; + /* TODO: disable divide where it's causing the problem, and remove this hack */ + OUT_RING(chan, fui(v->data[0][0] / v->data[0][3])); + OUT_RING(chan, fui(v->data[0][1] / v->data[0][3])); + OUT_RING(chan, fui(v->data[0][2] / v->data[0][3])); + OUT_RING(chan, fui(1.0f / v->data[0][3])); + OUT_RINGp(chan, &v->data[1][0], 4 * (num_attribs - 1)); + } } } @@ -149,25 +122,11 @@ nvfx_render_tri(struct draw_stage *draw, struct prim_header *prim) nvfx_render_prim(draw, prim, NV34TCL_VERTEX_BEGIN_END_TRIANGLES, 3); } -static void -nvfx_render_flush(struct draw_stage *draw, unsigned flags) -{ - struct nvfx_render_stage *rs = nvfx_render_stage(draw); - struct nvfx_context *nvfx = rs->nvfx; - struct nvfx_screen *screen = nvfx->screen; - struct nouveau_channel *chan = screen->base.channel; - struct nouveau_grobj *eng3d = screen->eng3d; - - if (rs->prim != NV34TCL_VERTEX_BEGIN_END_STOP) { - BEGIN_RING(chan, eng3d, NV34TCL_VERTEX_BEGIN_END, 1); - OUT_RING (chan, NV34TCL_VERTEX_BEGIN_END_STOP); - rs->prim = NV34TCL_VERTEX_BEGIN_END_STOP; - } -} - static void nvfx_render_reset_stipple_counter(struct draw_stage *draw) { + /* this doesn't really seem to work, but it matters rather little */ + nvfx_render_flush(draw, 0); } static void @@ -176,40 +135,11 @@ nvfx_render_destroy(struct draw_stage *draw) FREE(draw); } -static struct nvfx_vertex_program * -nvfx_create_drawvp(struct nvfx_context *nvfx) -{ - struct ureg_program *ureg; - uint i; - - ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); - if (ureg == NULL) - return NULL; - - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0), ureg_DECL_vs_input(ureg, 3)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1), ureg_DECL_vs_input(ureg, 4)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 0), ureg_DECL_vs_input(ureg, 3)); - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_BCOLOR, 1), ureg_DECL_vs_input(ureg, 4)); - ureg_MOV(ureg, - ureg_writemask(ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 1), TGSI_WRITEMASK_X), - ureg_DECL_vs_input(ureg, 5)); - for (i = 0; i < 8; ++i) - ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, i), ureg_DECL_vs_input(ureg, 8 + i)); - - ureg_END( ureg ); - - return ureg_create_shader_and_destroy( ureg, &nvfx->pipe ); -} - struct draw_stage * nvfx_draw_render_stage(struct nvfx_context *nvfx) { struct nvfx_render_stage *render = CALLOC_STRUCT(nvfx_render_stage); - if (!nvfx->swtnl.vertprog) - nvfx->swtnl.vertprog = nvfx_create_drawvp(nvfx); - render->nvfx = nvfx; render->stage.draw = nvfx->draw; render->stage.point = nvfx_render_point; @@ -231,6 +161,7 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info if (!nvfx_state_validate_swtnl(nvfx)) return; + nvfx_state_emit(nvfx); /* these must be passed without adding the offsets */ @@ -256,62 +187,3 @@ nvfx_draw_vbo_swtnl(struct pipe_context *pipe, const struct pipe_draw_info* info draw_flush(nvfx->draw); } - -static INLINE void -emit_attrib(struct nvfx_context *nvfx, unsigned hw, unsigned emit, - unsigned semantic, unsigned index) -{ - unsigned draw_out = draw_find_shader_output(nvfx->draw, semantic, index); - unsigned a = nvfx->swtnl.nr_attribs++; - - nvfx->swtnl.hw[a] = hw; - nvfx->swtnl.emit[a] = emit; - nvfx->swtnl.draw[a] = draw_out; -} - -void -nvfx_vtxfmt_validate(struct nvfx_context *nvfx) -{ - struct nvfx_pipe_fragment_program *pfp = nvfx->fragprog; - unsigned colour = 0, texcoords = 0, fog = 0, i; - - /* Determine needed fragprog inputs */ - for (i = 0; i < pfp->info.num_inputs; i++) { - switch (pfp->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - break; - case TGSI_SEMANTIC_COLOR: - colour |= (1 << pfp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_GENERIC: - texcoords |= (1 << pfp->info.input_semantic_index[i]); - break; - case TGSI_SEMANTIC_FOG: - fog = 1; - break; - default: - assert(0); - } - } - - nvfx->swtnl.nr_attribs = 0; - - /* Map draw vtxprog output to hw attribute IDs */ - for (i = 0; i < 2; i++) { - if (!(colour & (1 << i))) - continue; - emit_attrib(nvfx, 3 + i, EMIT_4F, TGSI_SEMANTIC_COLOR, i); - } - - for (i = 0; i < 8; i++) { - if (!(texcoords & (1 << i))) - continue; - emit_attrib(nvfx, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); - } - - if (fog) { - emit_attrib(nvfx, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); - } - - emit_attrib(nvfx, 0, 0xff, TGSI_SEMANTIC_POSITION, 0); -} diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c index 93ba5382419..86df7f00496 100644 --- a/src/gallium/drivers/nvfx/nvfx_fragprog.c +++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c @@ -1263,7 +1263,7 @@ nvfx_fragprog_validate(struct nvfx_context *nvfx) pfp->fps[key] = fp; } - vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog; + vp = nvfx->hw_vertprog; if (fp->last_vp_id != vp->id || fp->last_sprite_coord_enable != sprite_coord_enable) { int sprite_real_input = -1; diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index ac8053f26b3..3900821de48 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -432,7 +432,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - screen->force_swtnl = debug_get_bool_option("NOUVEAU_SWTNL", FALSE); + screen->force_swtnl = debug_get_bool_option("NVFX_SWTNL", FALSE); screen->trace_draw = debug_get_bool_option("NVFX_TRACE_DRAW", FALSE); screen->buffer_allocation_cost = debug_get_num_option("NVFX_BUFFER_ALLOCATION_COST", 16384); diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h index 9200f78a545..8fafca1950c 100644 --- a/src/gallium/drivers/nvfx/nvfx_state.h +++ b/src/gallium/drivers/nvfx/nvfx_state.h @@ -17,13 +17,8 @@ struct nvfx_vertex_program_data { }; struct nvfx_vertex_program { - struct pipe_shader_state pipe; unsigned long long id; - struct draw_vertex_shader *draw; - - boolean translated; - struct nvfx_vertex_program_exec *insns; unsigned nr_insns; struct nvfx_vertex_program_data *consts; @@ -46,6 +41,20 @@ struct nvfx_vertex_program { struct util_dynarray const_relocs; }; +#define NVFX_VP_FAILED ((struct nvfx_vertex_program*)-1) + +struct nvfx_pipe_vertex_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + unsigned draw_elements; + boolean draw_no_elements; + struct draw_vertex_shader *draw_vs; + struct nvfx_vertex_program* draw_vp; + + struct nvfx_vertex_program* vp; +}; + struct nvfx_fragment_program_data { unsigned offset; unsigned index; diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c index 308c25fbe1b..30ef12a95b6 100644 --- a/src/gallium/drivers/nvfx/nvfx_state_emit.c +++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c @@ -145,7 +145,7 @@ nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx) { struct nouveau_channel* chan = nvfx->screen->base.channel; unsigned i; - struct nvfx_vertex_program* vp = nvfx->vertprog; + struct nvfx_vertex_program* vp = nvfx->hw_vertprog; if(nvfx->clip.nr != vp->clip_nr) { unsigned idx; @@ -230,7 +230,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(nvfx->render_mode == HW) { - if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST | NVFX_NEW_UCP)) + if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_VERTCONST)) { if(!nvfx_vertprog_validate(nvfx)) return FALSE; @@ -252,12 +252,10 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) } else { - /* TODO: this looks a bit misdesigned */ - if(dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) - nvfx_vertprog_validate(nvfx); - - if(dirty & (NVFX_NEW_ARRAYS | NVFX_NEW_INDEX | NVFX_NEW_FRAGPROG)) - nvfx_vtxfmt_validate(nvfx); + if(dirty & NVFX_NEW_VERTPROG) { + assert(nvfx_vertprog_validate(nvfx)); + nvfx_vbo_swtnl_validate(nvfx); + } } if(dirty & NVFX_NEW_RAST) @@ -284,7 +282,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx) if(nvfx->is_nv4x) { - unsigned vp_output = nvfx->vertprog->or | nvfx->hw_fragprog->or; + unsigned vp_output = nvfx->hw_vertprog->or | nvfx->hw_fragprog->or; vp_output |= (1 << (nvfx->clip.nr + 6)) - (1 << 6); if(vp_output != nvfx->hw_vp_output) @@ -399,8 +397,6 @@ nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs) boolean nvfx_state_validate(struct nvfx_context *nvfx) { - boolean was_sw = nvfx->fallback_swtnl ? TRUE : FALSE; - if (nvfx->render_mode != HW) { /* Don't even bother trying to go back to hw if none * of the states that caused swtnl previously have changed. @@ -429,7 +425,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) /* Setup for swtnl */ if (nvfx->render_mode == HW) { - NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); + static boolean warned = FALSE; + if(!warned) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nvfx->fallback_swtnl); + warned = TRUE; + } nvfx->pipe.flush(&nvfx->pipe, 0, NULL); nvfx->dirty |= (NVFX_NEW_VIEWPORT | NVFX_NEW_VERTPROG | @@ -437,8 +437,11 @@ nvfx_state_validate_swtnl(struct nvfx_context *nvfx) nvfx->render_mode = SWTNL; } - if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) - draw_bind_vertex_shader(draw, nvfx->vertprog->draw); + if (nvfx->draw_dirty & NVFX_NEW_VERTPROG) { + if(!nvfx->vertprog->draw_vs) + nvfx->vertprog->draw_vs = draw_create_vertex_shader(draw, &nvfx->vertprog->pipe); + draw_bind_vertex_shader(draw, nvfx->vertprog->draw_vs); + } if (nvfx->draw_dirty & NVFX_NEW_RAST) draw_set_rasterizer_state(draw, &nvfx->rasterizer->pipe, diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c index 611de808af5..c35e926a7a1 100644 --- a/src/gallium/drivers/nvfx/nvfx_vbo.c +++ b/src/gallium/drivers/nvfx/nvfx_vbo.c @@ -338,6 +338,44 @@ nvfx_vbo_validate(struct nvfx_context *nvfx) return TRUE; } +void +nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx) +{ + struct nouveau_channel* chan = nvfx->screen->base.channel; + unsigned num_outputs = nvfx->vertprog->draw_elements; + int elements = MAX2(num_outputs, nvfx->hw_vtxelt_nr); + + if (!elements) + return; + + WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2); + + OUT_RING(chan, RING_3D(NV34TCL_VTXFMT(0), elements)); + for(unsigned i = 0; i < num_outputs; ++i) + OUT_RING(chan, (4 << NV34TCL_VTXFMT_SIZE_SHIFT) | NV34TCL_VTXFMT_TYPE_32_FLOAT); + for(unsigned i = num_outputs; i < elements; ++i) + OUT_RING(chan, NV34TCL_VTXFMT_TYPE_32_FLOAT); + + if(nvfx->is_nv4x) { + unsigned i; + /* seems to be some kind of cache flushing */ + for(i = 0; i < 3; ++i) { + OUT_RING(chan, RING_3D(0x1718, 1)); + OUT_RING(chan, 0); + } + } + + OUT_RING(chan, RING_3D(NV34TCL_VTXBUF_ADDRESS(0), elements)); + for (unsigned i = 0; i < elements; i++) + OUT_RING(chan, 0); + + OUT_RING(chan, RING_3D(0x1710, 1)); + OUT_RING(chan, 0); + + nvfx->hw_vtxelt_nr = num_outputs; + nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF; +} + void nvfx_vbo_relocate(struct nvfx_context *nvfx) { diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index bc78ed400a9..30385b26f79 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -8,6 +8,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_ureg.h" #include "draw/draw_context.h" @@ -37,6 +38,7 @@ struct nvfx_loop_entry struct nvfx_vpc { struct nvfx_context* nvfx; + struct pipe_shader_state pipe; struct nvfx_vertex_program *vp; struct nvfx_vertex_program_exec *vpi; @@ -813,7 +815,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) unsigned num_outputs; unsigned num_texcoords = nvfx->is_nv4x ? 10 : 8; - num_outputs = util_semantic_set_from_program_file(&set, vpc->vp->pipe.tokens, TGSI_FILE_OUTPUT); + num_outputs = util_semantic_set_from_program_file(&set, vpc->pipe.tokens, TGSI_FILE_OUTPUT); if(num_outputs > num_texcoords) { NOUVEAU_ERR("too many vertex program outputs: %i\n", num_outputs); @@ -840,7 +842,7 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) } } - tgsi_parse_init(&p, vpc->vp->pipe.tokens); + tgsi_parse_init(&p, vpc->pipe.tokens); while (!tgsi_parse_end_of_tokens(&p)) { const union tgsi_full_token *tok = &p.FullToken; @@ -917,21 +919,35 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) DEBUG_GET_ONCE_BOOL_OPTION(nvfx_dump_vp, "NVFX_DUMP_VP", FALSE) -static void -nvfx_vertprog_translate(struct nvfx_context *nvfx, - struct nvfx_vertex_program *vp) +static struct nvfx_vertex_program* +nvfx_vertprog_translate(struct nvfx_context *nvfx, const struct pipe_shader_state* vps) { struct tgsi_parse_context parse; + struct nvfx_vertex_program* vp = NULL; struct nvfx_vpc *vpc = NULL; struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0)); struct util_dynarray insns; int i; - vpc = CALLOC(1, sizeof(struct nvfx_vpc)); + tgsi_parse_init(&parse, vps->tokens); + + vp = CALLOC_STRUCT(nvfx_vertex_program); + if(!vp) + goto out_err; + + vpc = CALLOC_STRUCT(nvfx_vpc); if (!vpc) - return; + goto out_err; + vpc->nvfx = nvfx; vpc->vp = vp; + vpc->pipe = *vps; + + { + // TODO: use a 64-bit atomic here! + static unsigned long long id = 0; + vp->id = ++id; + } /* reserve space for ucps */ if(nvfx->use_vp_clipping) @@ -942,7 +958,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, if (!nvfx_vertprog_prepare(nvfx, vpc)) { FREE(vpc); - return; + return NULL; } /* Redirect post-transform vertex position to a temp if user clip @@ -955,8 +971,6 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, vpc->r_temps_discard = 0; } - tgsi_parse_init(&parse, vp->pipe.tokens); - util_dynarray_init(&insns); while (!tgsi_parse_end_of_tokens(&parse)) { tgsi_parse_token(&parse); @@ -1058,7 +1072,7 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, if(debug_get_option_nvfx_dump_vp()) { debug_printf("\n"); - tgsi_dump(vp->pipe.tokens, 0); + tgsi_dump(vpc->pipe.tokens, 0); debug_printf("\n%s vertex program:\n", nvfx->is_nv4x ? "nv4x" : "nv3x"); for (i = 0; i < vp->nr_insns; i++) @@ -1068,20 +1082,49 @@ nvfx_vertprog_translate(struct nvfx_context *nvfx, vp->clip_nr = -1; vp->exec_start = -1; - vp->translated = TRUE; -out_err: + +out: tgsi_parse_free(&parse); - util_dynarray_fini(&vpc->label_relocs); - util_dynarray_fini(&vpc->loop_stack); - if (vpc->r_temp) + if(vpc) { + util_dynarray_fini(&vpc->label_relocs); + util_dynarray_fini(&vpc->loop_stack); FREE(vpc->r_temp); - if (vpc->r_address) FREE(vpc->r_address); - if (vpc->r_const) FREE(vpc->r_const); - if (vpc->imm) FREE(vpc->imm); - FREE(vpc); + FREE(vpc); + } + return vp; + +out_err: + FREE(vp); + vp = NULL; + goto out; +} + +static struct nvfx_vertex_program* +nvfx_vertprog_translate_draw_vp(struct nvfx_context *nvfx, struct nvfx_pipe_vertex_program* pvp) +{ + struct nvfx_vertex_program* vp = NULL; + struct pipe_shader_state vps; + struct ureg_program *ureg = NULL; + unsigned num_outputs = MIN2(pvp->info.num_outputs, 16); + + ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); + if(ureg == NULL) + return 0; + + for (unsigned i = 0; i < num_outputs; i++) + ureg_MOV(ureg, ureg_DECL_output(ureg, pvp->info.output_semantic_name[i], pvp->info.output_semantic_index[i]), ureg_DECL_vs_input(ureg, i)); + + ureg_END( ureg ); + + vps.tokens = ureg_get_tokens(ureg, 0); + vp = nvfx_vertprog_translate(nvfx, &vps); + ureg_free_tokens(vps.tokens); + ureg_destroy(ureg); + + return vp; } boolean @@ -1090,30 +1133,44 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; struct nouveau_grobj *eng3d = screen->eng3d; - struct nvfx_vertex_program *vp; + struct nvfx_pipe_vertex_program *pvp = nvfx->vertprog; + struct nvfx_vertex_program* vp; struct pipe_resource *constbuf; boolean upload_code = FALSE, upload_data = FALSE; int i; if (nvfx->render_mode == HW) { - vp = nvfx->vertprog; - constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; - } else { - vp = nvfx->swtnl.vertprog; - constbuf = NULL; - } - - /* Translate TGSI shader into hw bytecode */ - if (!vp->translated) - { nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; - nvfx_vertprog_translate(nvfx, vp); - if (!vp->translated) { + vp = pvp->vp; + + if(!vp) { + vp = nvfx_vertprog_translate(nvfx, &pvp->pipe); + if(!vp) + vp = NVFX_VP_FAILED; + pvp->vp = vp; + } + + if(vp == NVFX_VP_FAILED) { nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; return FALSE; } + + constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; + } else { + vp = pvp->draw_vp; + if(!vp) + { + pvp->draw_vp = vp = nvfx_vertprog_translate_draw_vp(nvfx, pvp); + if(!vp) { + _debug_printf("Error: unable to create a swtnl passthrough vertex shader: aborting."); + abort(); + } + } + constbuf = NULL; } + nvfx->hw_vertprog = vp; + /* Allocate hw vtxprog exec slots */ if (!vp->exec) { struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; @@ -1294,24 +1351,22 @@ nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) util_dynarray_fini(&vp->branch_relocs); util_dynarray_fini(&vp->const_relocs); + FREE(vp); } static void * -nvfx_vp_state_create(struct pipe_context *pipe, - const struct pipe_shader_state *cso) +nvfx_vp_state_create(struct pipe_context *pipe, const struct pipe_shader_state *cso) { struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_vertex_program *vp; - - // TODO: use a 64-bit atomic here! - static unsigned long long id = 0; + struct nvfx_pipe_vertex_program *pvp; - vp = CALLOC(1, sizeof(struct nvfx_vertex_program)); - vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); - vp->draw = draw_create_vertex_shader(nvfx->draw, &vp->pipe); - vp->id = ++id; + pvp = CALLOC(1, sizeof(struct nvfx_pipe_vertex_program)); + pvp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + tgsi_scan_shader(pvp->pipe.tokens, &pvp->info); + pvp->draw_elements = MAX2(1, MIN2(pvp->info.num_outputs, 16)); + pvp->draw_no_elements = pvp->info.num_outputs == 0; - return (void *)vp; + return (void *)pvp; } static void @@ -1327,13 +1382,17 @@ nvfx_vp_state_bind(struct pipe_context *pipe, void *hwcso) static void nvfx_vp_state_delete(struct pipe_context *pipe, void *hwcso) { - struct nvfx_context *nvfx = nvfx_context(pipe); - struct nvfx_vertex_program *vp = hwcso; - - draw_delete_vertex_shader(nvfx->draw, vp->draw); - nvfx_vertprog_destroy(nvfx, vp); - FREE((void*)vp->pipe.tokens); - FREE(vp); + struct nvfx_context *nvfx = nvfx_context(pipe); + struct nvfx_pipe_vertex_program *pvp = hwcso; + + if(pvp->draw_vs) + draw_delete_vertex_shader(nvfx->draw, pvp->draw_vs); + if(pvp->vp && pvp->vp != NVFX_VP_FAILED) + nvfx_vertprog_destroy(nvfx, pvp->vp); + if(pvp->draw_vp) + nvfx_vertprog_destroy(nvfx, pvp->draw_vp); + FREE((void*)pvp->pipe.tokens); + FREE(pvp); } void -- 2.30.2