X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_render.c;h=dd9da80fd01d8fda334203b2ae6890c99dd38678;hb=72cd2c8c0c863873d280a0e49dfa381e5c3236c8;hp=20b33024266c91d0e61aac9faeb496c827e0a621;hpb=c6abbbfd0296ca0f8e4eecb29fce20cd7125f98a;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 20b33024266..dd9da80fd01 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -25,27 +25,45 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -/* - * Authors: - * Nicolai Haehnle +/** + * \file + * + * \brief R300 Render (Vertex Buffer Implementation) + * + * The immediate implementation has been removed from CVS in favor of the vertex + * buffer implementation. + * + * The render functions are called by the pipeline manager to render a batch of + * primitives. They return TRUE to pass on to the next stage (i.e. software + * rasterization) or FALSE to indicate that the pipeline has finished after + * rendering something. + * + * When falling back to software TCL still attempt to use hardware + * rasterization. + * + * I am not sure that the cache related registers are setup correctly, but + * obviously this does work... Further investigation is needed. + * + * \author Nicolai Haehnle + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... */ -#include "glheader.h" -#include "state.h" -#include "imports.h" -#include "enums.h" -#include "macros.h" -#include "context.h" -#include "dd.h" -#include "simple_list.h" - -#include "api_arrayelt.h" +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" - #include "radeon_reg.h" #include "radeon_macros.h" #include "radeon_ioctl.h" @@ -54,293 +72,348 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" -#include "r300_program.h" #include "r300_tex.h" -#include "r300_maos.h" #include "r300_emit.h" - +#include "r300_fragprog.h" extern int future_hw_tcl_on; -/********************************************************************** -* Hardware rasterization -* -* When we fell back to software TCL, we still try to use the -* rasterization hardware for rendering. -**********************************************************************/ - -static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int prim) +/** + * \brief Convert a OpenGL primitive type into a R300 primitive type. + */ +int r300PrimitiveType(r300ContextPtr rmesa, int prim) { - int type=-1; - switch (prim & PRIM_MODE_MASK) { case GL_POINTS: - type=R300_VAP_VF_CNTL__PRIM_POINTS; - break; + return R300_VAP_VF_CNTL__PRIM_POINTS; + break; case GL_LINES: - type=R300_VAP_VF_CNTL__PRIM_LINES; - break; + return R300_VAP_VF_CNTL__PRIM_LINES; + break; case GL_LINE_STRIP: - type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP; - break; + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; case GL_LINE_LOOP: - type=R300_VAP_VF_CNTL__PRIM_LINE_LOOP; - break; - case GL_TRIANGLES: - type=R300_VAP_VF_CNTL__PRIM_TRIANGLES; - break; - case GL_TRIANGLE_STRIP: - type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; - break; - case GL_TRIANGLE_FAN: - type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; - break; + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case GL_TRIANGLES: + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case GL_TRIANGLE_STRIP: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case GL_TRIANGLE_FAN: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; case GL_QUADS: - type=R300_VAP_VF_CNTL__PRIM_QUADS; - break; + return R300_VAP_VF_CNTL__PRIM_QUADS; + break; case GL_QUAD_STRIP: - type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; - break; + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; case GL_POLYGON: - type=R300_VAP_VF_CNTL__PRIM_POLYGON; + return R300_VAP_VF_CNTL__PRIM_POLYGON; break; - default: - fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n", - __FILE__, __FUNCTION__, - prim & PRIM_MODE_MASK); + default: + assert(0); return -1; - break; - } - return type; + break; + } } -int r300_get_num_verts(r300ContextPtr rmesa, int num_verts, int prim) +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) { - int verts_off=0; - char *name="UNKNOWN"; + int verts_off = 0; switch (prim & PRIM_MODE_MASK) { case GL_POINTS: - name="P"; verts_off = 0; - break; + break; case GL_LINES: - name="L"; verts_off = num_verts % 2; - break; + break; case GL_LINE_STRIP: - name="LS"; - if(num_verts < 2) + if (num_verts < 2) verts_off = num_verts; - break; + break; case GL_LINE_LOOP: - name="LL"; - if(num_verts < 2) + if (num_verts < 2) verts_off = num_verts; - break; - case GL_TRIANGLES: - name="T"; + break; + case GL_TRIANGLES: verts_off = num_verts % 3; - break; - case GL_TRIANGLE_STRIP: - name="TS"; - if(num_verts < 3) + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) verts_off = num_verts; - break; - case GL_TRIANGLE_FAN: - name="TF"; - if(num_verts < 3) + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) verts_off = num_verts; - break; + break; case GL_QUADS: - name="Q"; verts_off = num_verts % 4; - break; + break; case GL_QUAD_STRIP: - name="QS"; - if(num_verts < 4) + if (num_verts < 4) verts_off = num_verts; else verts_off = num_verts % 2; - break; + break; case GL_POLYGON: - name="P"; - if(num_verts < 3) + if (num_verts < 3) verts_off = num_verts; break; - default: - fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n", - __FILE__, __FUNCTION__, - prim & PRIM_MODE_MASK); + default: + assert(0); return -1; - break; - } - - if (RADEON_DEBUG & DEBUG_VERTS) { - if (num_verts - verts_off == 0) { - WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts, name); - return 0; - } - - if (verts_off > 0) { - WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name); - } + break; } return num_verts - verts_off; } -/* Immediate implementation has been removed from CVS. */ - -/* vertex buffer implementation */ +static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + void *out; + + rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, + 0, n_elts * 4, 4, + RADEON_GEM_DOMAIN_GTT, 0); + rmesa->state.elt_dma_offset = 0; + radeon_bo_map(rmesa->state.elt_dma_bo, 1); + out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset; + memcpy(out, elts, n_elts * 4); + radeon_bo_unmap(rmesa->state.elt_dma_bo); +} -static void inline fire_EB(r300ContextPtr rmesa, unsigned long addr, int vertex_count, int type, int elt_size) +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) { - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; - unsigned long t_addr; - unsigned long magic_1, magic_2; - GLcontext *ctx; - ctx = rmesa->radeon.glCtx; - - assert(elt_size == 2 || elt_size == 4); - - if(addr & (elt_size-1)){ - WARN_ONCE("Badly aligned buffer\n"); - return ; - } -#ifdef OPTIMIZE_ELTS - magic_1 = (addr % 32) / 4; - t_addr = addr & (~0x1d); - magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; + BATCH_LOCALS(rmesa); + + if (vertex_count > 0) { + BEGIN_BATCH(8); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | + type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + OUT_BATCH_RELOC(rmesa->state.elt_dma_offset, + rmesa->state.elt_dma_bo, + rmesa->state.elt_dma_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(vertex_count); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + OUT_BATCH(rmesa->state.elt_dma_offset); + OUT_BATCH(vertex_count); + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.elt_dma_bo, + 0, + rmesa->state.elt_dma_bo->size, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_BATCH(); + } +} - check_space(6); +static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) +{ + BATCH_LOCALS(rmesa); + uint32_t voffset; + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - if(elt_size == 4){ - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + + BEGIN_BATCH(sz+2); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->state.aos[i].components << 0) | + (rmesa->state.aos[i].stride << 8) | + (rmesa->state.aos[i + 1].components << 16) | + (rmesa->state.aos[i + 1].stride << 24)); + + voffset = rmesa->state.aos[i + 0].offset + + offset * 4 * rmesa->state.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->state.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->state.aos[i + 1].offset + + offset * 4 * rmesa->state.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->state.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); - if(elt_size == 4){ - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr /*& 0xffffffe3*/); - } else { - e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); - e32(t_addr); + if (nr & 1) { + OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | + (rmesa->state.aos[nr - 1].stride << 8)); + voffset = rmesa->state.aos[nr - 1].offset + + offset * 4 * rmesa->state.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->state.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - - if(elt_size == 4){ - e32(vertex_count); /* Total number of dwords needed? */ - } else { - e32(magic_2); /* Total number of dwords needed? */ + } else { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->state.aos[i].components << 0) | + (rmesa->state.aos[i].stride << 8) | + (rmesa->state.aos[i + 1].components << 16) | + (rmesa->state.aos[i + 1].stride << 24)); + + voffset = rmesa->state.aos[i + 0].offset + + offset * 4 * rmesa->state.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->state.aos[i + 1].offset + + offset * 4 * rmesa->state.aos[i + 1].stride; + OUT_BATCH(voffset); } - //cp_delay(rmesa, 1); -#else - (void)magic_2, (void)magic_1, (void)t_addr; - check_space(6); - - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - if(elt_size == 4){ - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + if (nr & 1) { + OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | + (rmesa->state.aos[nr - 1].stride << 8)); + voffset = rmesa->state.aos[nr - 1].offset + + offset * 4 * rmesa->state.aos[nr - 1].stride; + OUT_BATCH(voffset); } - - start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr /*& 0xffffffe3*/); - - if(elt_size == 4){ - e32(vertex_count); /* Total number of dwords needed? */ - } else { - e32((vertex_count+1)/2); /* Total number of dwords needed? */ + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->state.aos[i + 0].offset + + offset * 4 * rmesa->state.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.aos[i+0].bo, + voffset, + rmesa->state.aos[i+0].bo->size, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->state.aos[i + 1].offset + + offset * 4 * rmesa->state.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.aos[i+1].bo, + voffset, + rmesa->state.aos[i+1].bo->size, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->state.aos[nr - 1].offset + + offset * 4 * rmesa->state.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.aos[nr-1].bo, + voffset, + rmesa->state.aos[nr-1].bo->size, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - //cp_delay(rmesa, 1); -#endif + } + END_BATCH(); } -static void r300_render_vb_primitive(r300ContextPtr rmesa, - GLcontext *ctx, - int start, - int end, - int prim) +static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { - int type, num_verts; + BATCH_LOCALS(rmesa); - type=r300_get_primitive_type(rmesa, ctx, prim); - num_verts=r300_get_num_verts(rmesa, end-start, prim); + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + END_BATCH(); +} + +static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + int start, int end, int prim) +{ + BATCH_LOCALS(rmesa); + int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; - if(type<0 || num_verts <= 0)return; + type = r300PrimitiveType(rmesa, prim); + num_verts = r300NumVerts(rmesa, end - start, prim); - if(rmesa->state.VB.Elts){ - r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start); - if(num_verts > 65535){ /* not implemented yet */ - WARN_ONCE("Too many elts\n"); + if (type < 0 || num_verts <= 0) return; + + /* Make space for at least 64 dwords. + * This is supposed to ensure that we can get all rendering + * commands into a single command buffer. + */ + r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__); + + if (vb->Elts) { + if (num_verts > 65535) { + /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } + /* Note: The following is incorrect, but it's the best I can do + * without a major refactoring of how DMA memory is handled. + * The problem: Ensuring that both vertex arrays *and* index + * arrays are at the right position, and then ensuring that + * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted + * at once. + * + * So why is the following incorrect? Well, it seems like + * allocating the index array might actually evict the vertex + * arrays. *sigh* + */ + r300EmitElts(ctx, vb->Elts, num_verts); + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireEB(rmesa, num_verts, type); + } else { + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireAOS(rmesa, num_verts, type); } - r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size); - fire_EB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size); - }else{ - r300EmitAOS(rmesa, rmesa->state.aos_count, start); - fire_AOS(rmesa, num_verts, type); - } + COMMIT_BATCH(); } -GLboolean r300_run_vb_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +static GLboolean r300RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct radeon_vertex_buffer *VB = &rmesa->state.VB; int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; - + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if (stage) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - radeon_vb_to_rvb(rmesa, VB, &tnl->vb); - } - r300UpdateShaders(rmesa); if (r300EmitArrays(ctx)) return GL_TRUE; r300UpdateShaderStates(rmesa); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); - + r300EmitCacheFlush(rmesa); r300EmitState(rmesa); - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - - r300_render_vb_primitive(rmesa, ctx, start, start + length, prim); + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + r300RunRenderPrimitive(rmesa, ctx, start, end, prim); } - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A /*R300_RB3D_DSTCACHE_UNKNOWN_02*/); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT,0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03 /*R300_RB3D_ZCACHE_UNKNOWN_01*/); + r300EmitCacheFlush(rmesa); -#ifdef USER_BUFFERS - r300UseArrays(ctx); -#endif r300ReleaseArrays(ctx); + return GL_FALSE; } @@ -354,97 +427,83 @@ GLboolean r300_run_vb_render(GLcontext *ctx, } \ } while(0) -int r300Fallback(GLcontext *ctx) +static int r300Fallback(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *rp = - (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - - if (rp) { - if (!rp->translated) - r300_translate_fragment_shader(r300, rp); - - FALLBACK_IF(!rp->translated); + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r500TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } + } else { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } } - /* We do not do SELECT or FEEDBACK (yet ?) - * Is it worth doing them ? - */ FALLBACK_IF(ctx->RenderMode != GL_RENDER); - FALLBACK_IF(ctx->Stencil._TestTwoSide && - (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] || - ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[1] || - ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[1])); - - if(!r300->disable_lowimpact_fallback){ - /* GL_POLYGON_OFFSET_POINT */ - FALLBACK_IF(ctx->Polygon.OffsetPoint); - /* GL_POLYGON_OFFSET_LINE */ - FALLBACK_IF(ctx->Polygon.OffsetLine); - /* GL_POLYGON_STIPPLE */ + FALLBACK_IF(ctx->Stencil._TestTwoSide + && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] + || ctx->Stencil.ValueMask[0] != + ctx->Stencil.ValueMask[1] + || ctx->Stencil.WriteMask[0] != + ctx->Stencil.WriteMask[1])); + + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); + + if (!r300->disable_lowimpact_fallback) { FALLBACK_IF(ctx->Polygon.StippleFlag); - /* GL_MULTISAMPLE_ARB */ - FALLBACK_IF(ctx->Multisample.Enabled); - /* blender ? */ + FALLBACK_IF(ctx->Multisample._Enabled); FALLBACK_IF(ctx->Line.StippleFlag); - /* GL_LINE_SMOOTH */ FALLBACK_IF(ctx->Line.SmoothFlag); - /* GL_POINT_SMOOTH */ FALLBACK_IF(ctx->Point.SmoothFlag); } - /* Fallback for LOGICOP */ - FALLBACK_IF(ctx->Color.ColorLogicOpEnabled); - - /* Rest could be done with vertex fragments */ - if (ctx->Extensions.NV_point_sprite || - ctx->Extensions.ARB_point_sprite) - /* GL_POINT_SPRITE_NV */ - FALLBACK_IF(ctx->Point.PointSprite); - return R300_FALLBACK_NONE; } -/** - * Called by the pipeline manager to render a batch of primitives. - * We can return true to pass on to the next stage (i.e. software - * rasterization) or false to indicate that the pipeline has finished - * after we render something. - */ -static GLboolean r300_run_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +static GLboolean r300RunNonTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); if (r300Fallback(ctx) >= R300_FALLBACK_RAST) return GL_TRUE; - return r300_run_vb_render(ctx, stage); -} + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; -const struct tnl_pipeline_stage _r300_render_stage = { - "r300 hw rasterize", - NULL, - NULL, - NULL, - NULL, - r300_run_render /* run */ -}; + return r300RunRender(ctx, stage); +} -static GLboolean r300_run_tcl_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +static GLboolean r300RunTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program *vp; - hw_tcl_on=future_hw_tcl_on; + hw_tcl_on = future_hw_tcl_on; if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if(hw_tcl_on == GL_FALSE) + + if (hw_tcl_on == GL_FALSE) return GL_TRUE; if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { @@ -460,17 +519,23 @@ static GLboolean r300_run_tcl_render(GLcontext *ctx, return GL_TRUE; } - //r300UpdateShaderStates(rmesa); - - return r300_run_vb_render(ctx, stage); + return r300RunRender(ctx, stage); } -const struct tnl_pipeline_stage _r300_tcl_stage = { - "r300 tcl", +const struct tnl_pipeline_stage _r300_render_stage = { + "r300 Hardware Rasterization", NULL, NULL, NULL, NULL, - r300_run_tcl_render /* run */ + r300RunNonTCLRender }; +const struct tnl_pipeline_stage _r300_tcl_stage = { + "r300 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, + NULL, + r300RunTCLRender +};