X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_render.c;h=fc07105c5604d319dbe90821cd27846e42f14acb;hb=568d369d7747c6cc2a421a816c85d888ccfc9957;hp=1599f0f6af432ac0eca8a090f5669126caa3d670;hpb=d240b29b95573e37baa627650661a2ced109c68d;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 1599f0f6af4..fc07105c560 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -25,9 +25,29 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -/* - * Authors: - * Nicolai Haehnle +/** + * \file + * + * \brief R300 Render (Vertex Buffer Implementation) + * + * The immediate implementation has been removed from CVS in favor of the vertex + * buffer implementation. + * + * The render functions are called by the pipeline manager to render a batch of + * primitives. They return TRUE to pass on to the next stage (i.e. software + * rasterization) or FALSE to indicate that the pipeline has finished after + * rendering something. + * + * When falling back to software TCL still attempt to use hardware + * rasterization. + * + * I am not sure that the cache related registers are setup correctly, but + * obviously this does work... Further investigation is needed. + * + * \author Nicolai Haehnle + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... */ #include "glheader.h" @@ -38,14 +58,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "context.h" #include "dd.h" #include "simple_list.h" - #include "api_arrayelt.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" -#include "array_cache/acache.h" +#include "vbo/vbo.h" #include "tnl/tnl.h" #include "tnl/t_vp_build.h" - #include "radeon_reg.h" #include "radeon_macros.h" #include "radeon_ioctl.h" @@ -54,866 +72,376 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" -#include "r300_program.h" #include "r300_tex.h" -#include "r300_maos.h" #include "r300_emit.h" - extern int future_hw_tcl_on; -/********************************************************************** -* Hardware rasterization -* -* When we fell back to software TCL, we still try to use the -* rasterization hardware for rendering. -**********************************************************************/ - -static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int prim) +/** + * \brief Convert a OpenGL primitive type into a R300 primitive type. + */ +int r300PrimitiveType(r300ContextPtr rmesa, int prim) { - int type=-1; - switch (prim & PRIM_MODE_MASK) { case GL_POINTS: - type=R300_VAP_VF_CNTL__PRIM_POINTS; - break; + return R300_VAP_VF_CNTL__PRIM_POINTS; + break; case GL_LINES: - type=R300_VAP_VF_CNTL__PRIM_LINES; - break; + return R300_VAP_VF_CNTL__PRIM_LINES; + break; case GL_LINE_STRIP: - type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP; - break; + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; case GL_LINE_LOOP: - type=R300_VAP_VF_CNTL__PRIM_LINE_LOOP; - break; - case GL_TRIANGLES: - type=R300_VAP_VF_CNTL__PRIM_TRIANGLES; - break; - case GL_TRIANGLE_STRIP: - type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; - break; - case GL_TRIANGLE_FAN: - type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; - break; + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case GL_TRIANGLES: + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case GL_TRIANGLE_STRIP: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case GL_TRIANGLE_FAN: + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; case GL_QUADS: - type=R300_VAP_VF_CNTL__PRIM_QUADS; - break; + return R300_VAP_VF_CNTL__PRIM_QUADS; + break; case GL_QUAD_STRIP: - type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; - break; + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; case GL_POLYGON: - type=R300_VAP_VF_CNTL__PRIM_POLYGON; + return R300_VAP_VF_CNTL__PRIM_POLYGON; break; - default: - fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n", - __FILE__, __FUNCTION__, - prim & PRIM_MODE_MASK); + default: + assert(0); return -1; - break; - } - return type; + break; + } } -static int r300_get_num_verts(r300ContextPtr rmesa, - GLcontext *ctx, - int num_verts, - int prim) +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) { - int verts_off=0; - char *name="UNKNOWN"; + int verts_off = 0; switch (prim & PRIM_MODE_MASK) { case GL_POINTS: - name="P"; verts_off = 0; - break; + break; case GL_LINES: - name="L"; verts_off = num_verts % 2; - break; + break; case GL_LINE_STRIP: - name="LS"; - if(num_verts < 2) + if (num_verts < 2) verts_off = num_verts; - break; + break; case GL_LINE_LOOP: - name="LL"; - if(num_verts < 2) + if (num_verts < 2) verts_off = num_verts; - break; - case GL_TRIANGLES: - name="T"; + break; + case GL_TRIANGLES: verts_off = num_verts % 3; - break; - case GL_TRIANGLE_STRIP: - name="TS"; - if(num_verts < 3) + break; + case GL_TRIANGLE_STRIP: + if (num_verts < 3) verts_off = num_verts; - break; - case GL_TRIANGLE_FAN: - name="TF"; - if(num_verts < 3) + break; + case GL_TRIANGLE_FAN: + if (num_verts < 3) verts_off = num_verts; - break; + break; case GL_QUADS: - name="Q"; verts_off = num_verts % 4; - break; + break; case GL_QUAD_STRIP: - name="QS"; - if(num_verts < 4) + if (num_verts < 4) verts_off = num_verts; else verts_off = num_verts % 2; - break; + break; case GL_POLYGON: - name="P"; - if(num_verts < 3) + if (num_verts < 3) verts_off = num_verts; break; - default: - fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n", - __FILE__, __FUNCTION__, - prim & PRIM_MODE_MASK); + default: + assert(0); return -1; - break; - } - - if (RADEON_DEBUG & DEBUG_VERTS) { - if (num_verts - verts_off == 0) { - WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts, name); - return 0; - } - - if (verts_off > 0) { - WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name); - } + break; } return num_verts - verts_off; } -/* This function compiles GL context into state registers that - describe data routing inside of R300 pipeline. - - In particular, it programs input_route, output_vtx_fmt, texture - unit configuration and gb_output_vtx_fmt - - This function encompasses setup_AOS() from r300_lib.c -*/ - - - - -/* Immediate implementation - vertex data is sent via command stream */ - -static GLfloat default_vector[4]={0.0, 0.0, 0.0, 1.0}; - -#define output_vector(v, i) { \ - int _i; \ - for(_i=0;_isize;_i++){ \ - if(VB->Elts){ \ - efloat(VEC_ELT(v, GLfloat, VB->Elts[i])[_i]); \ - }else{ \ - efloat(VEC_ELT(v, GLfloat, i)[_i]); \ - } \ - } \ - for(_i=v->size;_i<4;_i++){ \ - efloat(default_vector[_i]); \ - } \ -} - -/* Immediate implementation - vertex data is sent via command stream */ - -static void r300_render_immediate_primitive(r300ContextPtr rmesa, - GLcontext *ctx, - int start, - int end, - int prim) +static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i, render_inputs; - int k, type, num_verts; - LOCAL_VARS - - type=r300_get_primitive_type(rmesa, ctx, prim); - num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); - -#if 0 - fprintf(stderr,"ObjPtr: size=%d stride=%d\n", - VB->ObjPtr->size, VB->ObjPtr->stride); - fprintf(stderr,"ColorPtr[0]: size=%d stride=%d\n", - VB->ColorPtr[0]->size, VB->ColorPtr[0]->stride); - fprintf(stderr,"TexCoordPtr[0]: size=%d stride=%d\n", - VB->TexCoordPtr[0]->size, VB->TexCoordPtr[0]->stride); -#endif - - if(type<0 || num_verts <= 0)return; - - if(!VB->ObjPtr){ - WARN_ONCE("FIXME: Don't know how to handle GL_ARB_vertex_buffer_object correctly\n"); - return; - } - /* A packet cannot have more than 16383 data words.. */ - if((num_verts*4*rmesa->state.aos_count)>16380){ - WARN_ONCE("Too many vertices to paint. Fix me !\n"); - return; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_dma_region *rvb = &rmesa->state.elt_dma; + void *out; + + if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { + rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; + rvb->start = ((char *)elts) - rvb->address; + rvb->aos_offset = + rmesa->radeon.radeonScreen->gart_texture_offset + + rvb->start; + return; + } else if (r300IsGartMemory(rmesa, elts, 1)) { + WARN_ONCE("Pointer not within GART memory!\n"); + _mesa_exit(-1); } - //fprintf(stderr, "aos_count=%d start=%d end=%d\n", rmesa->state.aos_count, start, end); - - if(rmesa->state.aos_count==0){ - WARN_ONCE("Aeiee ! aos_count==0, while it shouldn't. Skipping rendering\n"); - return; - } - - render_inputs = rmesa->state.render_inputs; - - if(!render_inputs){ - WARN_ONCE("Aeiee ! render_inputs==0. Skipping rendering.\n"); - return; - } - - - start_immediate_packet(num_verts, type, 4*rmesa->state.aos_count); - - for(i=start;iObjPtr, GLfloat, i)[0], - VEC_ELT(VB->ObjPtr, GLfloat, i)[1], - VEC_ELT(VB->ObjPtr, GLfloat, i)[2], - VEC_ELT(VB->ObjPtr, GLfloat, i)[3], - - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3] - ); -#endif - - - /* coordinates */ - if(render_inputs & _TNL_BIT_POS) - output_vector(VB->ObjPtr, i); - if(render_inputs & _TNL_BIT_NORMAL) - output_vector(VB->NormalPtr, i); - - /* color components */ - if(render_inputs & _TNL_BIT_COLOR0) - output_vector(VB->ColorPtr[0], i); - if(render_inputs & _TNL_BIT_COLOR1) - output_vector(VB->SecondaryColorPtr[0], i); - -/* if(render_inputs & _TNL_BIT_FOG) // Causes lock ups when immediate mode is on - output_vector(VB->FogCoordPtr, i);*/ - - /* texture coordinates */ - for(k=0;k < ctx->Const.MaxTextureUnits;k++) - if(render_inputs & (_TNL_BIT_TEX0<TexCoordPtr[k], i); - - if(render_inputs & _TNL_BIT_INDEX) - output_vector(VB->IndexPtr[0], i); - if(render_inputs & _TNL_BIT_POINTSIZE) - output_vector(VB->PointSizePtr, i); - } + r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); + rvb->aos_offset = GET_START(rvb); + out = rvb->address + rvb->start; + memcpy(out, elts, n_elts * 4); } - -static GLboolean r300_run_immediate_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, + int vertex_count, int type) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - LOCAL_VARS - + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; - /* Update texture state - needs to be done only when actually changed.. - All the time for now.. */ - - - if (RADEON_DEBUG == DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - -#if 1 /* we need this, somehow */ - /* Flush state - make sure command buffer is nice and large */ - r300Flush(ctx); - /* Make sure we have enough space */ -#else - /* Count is very imprecize, but should be good upper bound */ - r300EnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size + 4+2+30 - +VB->PrimitiveCount*(1+8)+VB->Count*4*rmesa->state.texture.tc_count+4, __FUNCTION__); -#endif - - /* needed before starting 3d operation .. */ - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - - -#if 0 /* looks like the Z offset issue got fixed */ - rmesa->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA - | R300_VPORT_X_OFFSET_ENA - | R300_VPORT_Y_SCALE_ENA - | R300_VPORT_Y_OFFSET_ENA - | R300_VTX_W0_FMT; - R300_STATECHANGE(rmesa, vte); -#endif + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr); + e32(vertex_count); +} +static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) +{ + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; - /* Magic register - note it is right after 20b0 */ + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); + e32(nr); - if(rmesa->state.texture.tc_count>0){ - reg_start(0x20b4,0); - e32(0x0000000c); + for (i = 0; i + 1 < nr; i += 2) { + e32((rmesa->state.aos[i].aos_size << 0) | + (rmesa->state.aos[i].aos_stride << 8) | + (rmesa->state.aos[i + 1].aos_size << 16) | + (rmesa->state.aos[i + 1].aos_stride << 24)); + e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); + e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); } - r300EmitState(rmesa); - -/* Setup INPUT_ROUTE and INPUT_CNTL */ - r300EmitArrays(ctx, GL_TRUE); - -/* Why do we need this for immediate mode?? Vertex processor needs it to know proper regs */ -// r300EmitLOAD_VBPNTR(rmesa, 0); -/* Okay, it seems I misunderstood something, EmitAOS does the same thing */ - r300EmitAOS(rmesa, rmesa->state.aos_count, 0); - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - - r300_render_immediate_primitive(rmesa, ctx, start, start + length, prim); - } - - /* This sequence is required after any 3d drawing packet - I suspect it work arounds a bug (or deficiency) in hardware */ - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - - return GL_FALSE; + if (nr & 1) { + e32((rmesa->state.aos[nr - 1].aos_size << 0) | + (rmesa->state.aos[nr - 1].aos_stride << 8)); + e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); + } } - -/* vertex buffer implementation */ - -static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type, int elt_size) +static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { - LOCAL_VARS - unsigned long addr_a; - unsigned long t_addr; - unsigned long magic_1, magic_2; - GLcontext *ctx; - ctx = rmesa->radeon.glCtx; - - assert(elt_size == 2 || elt_size == 4); - - if(addr & (elt_size-1)){ - WARN_ONCE("Badly aligned buffer\n"); - return ; - } -#ifdef OPTIMIZE_ELTS - addr_a = 0; - - magic_1 = (addr % 32) / 4; - t_addr = addr & (~0x1d); - magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; - - check_space(6); - - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - if(elt_size == 4){ - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); - } + int cmd_reserved = 0; + int cmd_written = 0; + drm_radeon_cmd_header_t *cmd = NULL; - start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); - if(elt_size == 4){ - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr /*& 0xffffffe3*/); - } else { - e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); - e32(t_addr); - } - - if(elt_size == 4){ - e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ - } else { - e32(magic_2); /* Total number of dwords needed? */ - } - //cp_delay(PASS_PREFIX 1); -#if 0 - fprintf(stderr, "magic_1 %d\n", magic_1); - fprintf(stderr, "t_addr %x\n", t_addr); - fprintf(stderr, "magic_2 %d\n", magic_2); - exit(1); -#endif -#else - (void)magic_2, (void)magic_1, (void)t_addr; - - addr_a = 0; - - check_space(6); - - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - if(elt_size == 4){ - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); - } - - start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr /*& 0xffffffe3*/); - - if(elt_size == 4){ - e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ - } else { - e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */ - } - //cp_delay(PASS_PREFIX 1); -#endif + start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); + e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); } -static void r300_render_vb_primitive(r300ContextPtr rmesa, - GLcontext *ctx, - int start, - int end, - int prim) +static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, + int start, int end, int prim) { - int type, num_verts; - LOCAL_VARS - - type=r300_get_primitive_type(rmesa, ctx, prim); - num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); + int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; - if(type<0 || num_verts <= 0)return; + type = r300PrimitiveType(rmesa, prim); + num_verts = r300NumVerts(rmesa, end - start, prim); - if(rmesa->state.Elts){ - r300EmitAOS(rmesa, rmesa->state.aos_count, 0); -#if 0 - int i; - start_index32_packet(num_verts, type); - for(i=0; i < num_verts; i++) - e32(rmesa->state.Elts[start+i]); /* start ? */ -#else - WARN_ONCE("Rendering with elt buffers\n"); - if(num_verts == 1){ - start_index32_packet(num_verts, type); - e32(rmesa->state.Elts[start]); - return; - } - - if(num_verts > 65535){ /* not implemented yet */ - WARN_ONCE("Too many elts\n"); + if (type < 0 || num_verts <= 0) return; + + if (vb->Elts) { + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + if (num_verts > 65535) { + /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } + r300EmitElts(ctx, vb->Elts, num_verts); + r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); + } else { + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireAOS(rmesa, num_verts, type); } - r300EmitElts(ctx, rmesa->state.Elts+start, num_verts, 4); - fire_EB(PASS_PREFIX GET_START(&(rmesa->state.elt_dma)), num_verts, type, 4); -#endif - }else{ - r300EmitAOS(rmesa, rmesa->state.aos_count, start); - fire_AOS(PASS_PREFIX num_verts, type); - } } -static GLboolean r300_run_vb_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +static GLboolean r300RunRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; int i; - LOCAL_VARS - + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; + + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - - r300ReleaseArrays(ctx); - r300EmitArrays(ctx, GL_FALSE); - -// LOCK_HARDWARE(&(rmesa->radeon)); + r300UpdateShaders(rmesa); + if (r300EmitArrays(ctx)) + return GL_TRUE; - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + r300UpdateShaderStates(rmesa); - reg_start(0x4f18,0); - e32(0x00000003); + r300EmitCacheFlush(rmesa); r300EmitState(rmesa); - - rmesa->state.Elts = VB->Elts; - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - - r300_render_vb_primitive(rmesa, ctx, start, start + length, prim); - } - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; + r300RunRenderPrimitive(rmesa, ctx, start, end, prim); + } - reg_start(0x4f18,0); - e32(0x00000003); + r300EmitCacheFlush(rmesa); #ifdef USER_BUFFERS r300UseArrays(ctx); #endif -// end_3d(PASS_PREFIX_VOID); - /* Flush state - we are done drawing.. */ -// r300FlushCmdBufLocked(rmesa, __FUNCTION__); -// radeonWaitForIdleLocked(&(rmesa->radeon)); + r300ReleaseArrays(ctx); -// UNLOCK_HARDWARE(&(rmesa->radeon)); return GL_FALSE; } -#ifdef RADEON_VTXFMT_A - -static void r300_render_vb_primitive_vtxfmt_a(r300ContextPtr rmesa, - GLcontext *ctx, - int start, - int end, - int prim) -{ - int type, num_verts; - radeonScreenPtr rsp=rmesa->radeon.radeonScreen; - LOCAL_VARS - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - int i; - - type=r300_get_primitive_type(rmesa, ctx, prim); - num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); - - if(type<0 || num_verts <= 0)return; - - if(rmesa->state.VB.Elts){ - r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start); -#if 0 - start_index32_packet(num_verts, type); - for(i=0; i < num_verts; i++) - e32(((unsigned long *)rmesa->state.VB.Elts)[i]/*rmesa->state.Elts[start+i]*/); /* start ? */ -#else - WARN_ONCE("Rendering with elt buffers\n"); - if(num_verts == 1){ - //start_index32_packet(num_verts, type); - //e32(rmesa->state.Elts[start]); - return; - } - - if(num_verts > 65535){ /* not implemented yet */ - WARN_ONCE("Too many elts\n"); - return; - } - - r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size); - fire_EB(PASS_PREFIX rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size); -#endif - }else{ - r300EmitAOS(rmesa, rmesa->state.aos_count, start); - fire_AOS(PASS_PREFIX num_verts, type); - } -} - -void dump_array(struct r300_dma_region *rvb, int count) +#define FALLBACK_IF(expr) \ + do { \ + if (expr) { \ + if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ + WARN_ONCE("Software fallback:%s\n", \ + #expr); \ + return R300_FALLBACK_RAST; \ + } \ + } while(0) + +static int r300Fallback(GLcontext * ctx) { - int *out = (int *)(rvb->address + rvb->start); - int i, ci; - - for (i=0; i < count; i++) { - fprintf(stderr, "{"); - if (rvb->aos_format == AOS_FORMAT_FLOAT) - for (ci=0; ci < rvb->aos_size; ci++) - fprintf(stderr, "%f ", ((float *)out)[ci]); - else - for (ci=0; ci < rvb->aos_size; ci++) - fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); - fprintf(stderr, "}"); - - out += rvb->aos_stride; - } - - fprintf(stderr, "\n"); -} - -void dump_dt(struct dt *dt, int count) -{ - int *out = dt->data; - int i, ci; - - fprintf(stderr, "base at %p ", out); - - for (i=0; i < count; i++){ - fprintf(stderr, "{"); - if (dt->type == GL_FLOAT) - for (ci=0; ci < dt->size; ci++) - fprintf(stderr, "%f ", ((float *)out)[ci]); - else - for (ci=0; ci < dt->size; ci++) - fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); - fprintf(stderr, "}"); - - out = (char *)out + dt->stride; - } - - fprintf(stderr, "\n"); -} - -/*static */GLboolean r300_run_vb_render_vtxfmt_a(GLcontext *ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - //TNLcontext *tnl = TNL_CONTEXT(ctx); - struct radeon_vertex_buffer *VB = &rmesa->state.VB; //&tnl->vb; - int i, j; - LOCAL_VARS - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->state.VB.LockCount == 0) { - r300ReleaseArrays(ctx); - r300EmitArraysVtx(ctx, GL_FALSE); - } else { - /* TODO: Figure out why do we need these. */ - R300_STATECHANGE(rmesa, vir[0]); - R300_STATECHANGE(rmesa, vir[1]); - R300_STATECHANGE(rmesa, vic); - R300_STATECHANGE(rmesa, vof); - -#if 0 - fprintf(stderr, "dt:\n"); - for(i=0; i < VERT_ATTRIB_MAX; i++){ - fprintf(stderr, "dt %d:", i); - dump_dt(&rmesa->state.VB.AttribPtr[i], VB->Count); + r300ContextPtr r300 = R300_CONTEXT(ctx); + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *fp = (struct r500_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r500TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } } - - fprintf(stderr, "before:\n"); - for(i=0; i < rmesa->state.aos_count; i++){ - fprintf(stderr, "aos %d:", i); - dump_array(&rmesa->state.aos[i], VB->Count); - } -#endif -#if 0 - r300ReleaseArrays(ctx); - r300EmitArraysVtx(ctx, GL_FALSE); - - fprintf(stderr, "after:\n"); - for(i=0; i < rmesa->state.aos_count; i++){ - fprintf(stderr, "aos %d:", i); - dump_array(&rmesa->state.aos[i], VB->Count); + } else { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } } -#endif } - -// LOCK_HARDWARE(&(rmesa->radeon)); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); + FALLBACK_IF(ctx->RenderMode != GL_RENDER); - reg_start(0x4f18,0); - e32(0x00000003); -#if 0 - reg_start(R300_VAP_PVS_WAITIDLE,0); - e32(0x00000000); -#endif - r300EmitState(rmesa); - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - - r300_render_vb_primitive_vtxfmt_a(rmesa, ctx, start, start + length, prim); - } + FALLBACK_IF(ctx->Stencil._TestTwoSide + && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[1] + || ctx->Stencil.ValueMask[0] != + ctx->Stencil.ValueMask[1] + || ctx->Stencil.WriteMask[0] != + ctx->Stencil.WriteMask[1])); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a/*0x2*/); + FALLBACK_IF(ctx->Color.ColorLogicOpEnabled); - reg_start(0x4f18,0); - e32(0x00000003/*0x1*/); - -#ifdef USER_BUFFERS - r300UseArrays(ctx); -#endif -// end_3d(PASS_PREFIX_VOID); - - /* Flush state - we are done drawing.. */ -// r300FlushCmdBufLocked(rmesa, __FUNCTION__); -// radeonWaitForIdleLocked(&(rmesa->radeon)); + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); + + if (!r300->disable_lowimpact_fallback) { + FALLBACK_IF(ctx->Polygon.StippleFlag); + FALLBACK_IF(ctx->Multisample.Enabled); + FALLBACK_IF(ctx->Line.StippleFlag); + FALLBACK_IF(ctx->Line.SmoothFlag); + FALLBACK_IF(ctx->Point.SmoothFlag); + } -// UNLOCK_HARDWARE(&(rmesa->radeon)); - return GL_FALSE; + return R300_FALLBACK_NONE; } -#endif -/** - * Called by the pipeline manager to render a batch of primitives. - * We can return true to pass on to the next stage (i.e. software - * rasterization) or false to indicate that the pipeline has finished - * after we render something. - */ -static GLboolean r300_run_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +static GLboolean r300RunNonTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); -#if 0 - return r300_run_immediate_render(ctx, stage); -#else - return r300_run_vb_render(ctx, stage); -#endif -} - - -/** - * Called by the pipeline manager once before rendering. - * We check the GL state here to - * a) decide whether we can do the current state in hardware and - * b) update hardware registers - */ -#define FALLBACK_IF(expr) \ -do { \ - if (expr) { \ - if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ - WARN_ONCE("fallback:%s\n", #expr); \ - /*stage->active = GL_FALSE*/; \ - return; \ - } \ -} while(0) - -static void r300_check_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) -{ - - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); - - /* We only support rendering in hardware for now */ - if (ctx->RenderMode != GL_RENDER) { - //stage->active = GL_FALSE; - return; - } - - - /* I'm almost certain I forgot something here */ -#if 0 /* These should work now.. */ - FALLBACK_IF(ctx->Color.DitherFlag); - FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST - FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND - FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL -#endif - //FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT - //FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE - //FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST - - //FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG disable as swtcl doesnt seem to support this - //FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH disabling to get blender going - FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE - FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB - - FALLBACK_IF(ctx->RenderMode != GL_RENDER); // We do not do SELECT or FEEDBACK (yet ?) - -#if 0 /* ut2k3 fails to start if this is on */ - /* One step at a time - let one texture pass.. */ - for (i = 1; i < ctx->Const.MaxTextureUnits; i++) - FALLBACK_IF(ctx->Texture.Unit[i].Enabled); -#endif - - /* Assumed factor reg is found but pattern is still missing */ - //FALLBACK_IF(ctx->Line.StippleFlag); // GL_LINE_STIPPLE disabling to get blender going - - /* HW doesnt appear to directly support these */ - //FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH disabling to get blender going - FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH - /* Rest could be done with vertex fragments */ - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) - FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV - //GL_POINT_DISTANCE_ATTENUATION_ARB - //GL_POINT_FADE_THRESHOLD_SIZE_ARB - - /* let r300_run_render do its job */ -#if 0 - stage->active = GL_FALSE; -#endif -} + if (r300Fallback(ctx) >= R300_FALLBACK_RAST) + return GL_TRUE; + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; -static void dtr(struct tnl_pipeline_stage *stage) -{ - (void)stage; + return r300RunRender(ctx, stage); } -static GLboolean r300_create_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +static GLboolean r300RunTCLRender(GLcontext * ctx, + struct tnl_pipeline_stage *stage) { - return GL_TRUE; -} - + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program *vp; -const struct tnl_pipeline_stage _r300_render_stage = { - "r300 hw rasterize", - NULL, - r300_create_render, - dtr, /* destructor */ - r300_check_render, /* check */ - r300_run_render /* run */ -}; + hw_tcl_on = future_hw_tcl_on; -static GLboolean r300_run_tcl_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - hw_tcl_on=future_hw_tcl_on; - if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if(hw_tcl_on == GL_FALSE) + + if (hw_tcl_on == GL_FALSE) return GL_TRUE; - - r300UpdateShaderStates(rmesa); - - return r300_run_vb_render(ctx, stage); -} -static void r300_check_tcl_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) -{ + if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); + r300UpdateShaders(rmesa); - /* We only support rendering in hardware for now */ - if (ctx->RenderMode != GL_RENDER) { - //stage->active = GL_FALSE; - return; + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); + if (vp->native == GL_FALSE) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; } + + return r300RunRender(ctx, stage); } +const struct tnl_pipeline_stage _r300_render_stage = { + "r300 Hardware Rasterization", + NULL, + NULL, + NULL, + NULL, + r300RunNonTCLRender +}; + const struct tnl_pipeline_stage _r300_tcl_stage = { - "r300 tcl", + "r300 Hardware Transform, Clipping and Lighting", + NULL, + NULL, + NULL, NULL, - r300_create_render, - dtr, /* destructor */ - r300_check_tcl_render, /* check */ - r300_run_tcl_render /* run */ + r300RunTCLRender };