X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_render.c;h=e6ed06a69d6b6e2096637b0ab33ba3df4cccda22;hb=83a43aca233cfdf8f8cac26895ef4ea4105d96af;hp=c6d57a35517bf96af462aae12f7b6de7aab905fc;hpb=4885a9eb207655ac23b64274034e1d51d2c8e6ce;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index c6d57a35517..e6ed06a69d6 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -44,6 +44,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "array_cache/acache.h" #include "tnl/tnl.h" +#include "tnl/t_vp_build.h" #include "radeon_reg.h" #include "radeon_macros.h" @@ -55,66 +56,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_program.h" #include "r300_tex.h" - +#include "r300_maos.h" #include "r300_emit.h" -#if 0 -/* Turns out we might not need this after all... */ -static void update_zbias(GLcontext * ctx, int prim) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int enabled = 0; - uint32_t values[4]; - //return ; - switch(prim & PRIM_MODE_MASK) { - case GL_POINTS: - if(ctx->Polygon.OffsetPoint == GL_TRUE) - enabled=1; - break; - case GL_LINES: - case GL_LINE_STRIP: - case GL_LINE_LOOP: - if(ctx->Polygon.OffsetLine == GL_TRUE) - enabled=1; - break; - case GL_TRIANGLES: - case GL_TRIANGLE_STRIP: - case GL_TRIANGLE_FAN: - case GL_QUADS: - case GL_QUAD_STRIP: - case GL_POLYGON: - if(ctx->Polygon.OffsetFill == GL_TRUE) - enabled=1; - break; - default: - fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n", - __FILE__, __FUNCTION__, - prim & PRIM_MODE_MASK); - - } - - if(enabled){ - values[0]=values[2]=r300PackFloat32(ctx->Polygon.OffsetFactor * 12.0); - values[1]=values[3]=r300PackFloat32(ctx->Polygon.OffsetUnits * 4.0); - }else{ - values[0]=values[2]=r300PackFloat32(0.0); - values[1]=values[3]=r300PackFloat32(0.0); - } - - if(values[0] != rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] || - values[1] != rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] || - values[2] != rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] || - values[3] != rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT]){ - - R300_STATECHANGE(rmesa, zbs); - rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = values[0]; - rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = values[1]; - rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = values[2]; - rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = values[3]; - - } -} -#endif +extern int future_hw_tcl_on; /********************************************************************** * Hardware rasterization @@ -125,18 +70,15 @@ static void update_zbias(GLcontext * ctx, int prim) static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int prim) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - int type=-1; - + int type=-1; + switch (prim & PRIM_MODE_MASK) { case GL_POINTS: type=R300_VAP_VF_CNTL__PRIM_POINTS; - break; + break; case GL_LINES: type=R300_VAP_VF_CNTL__PRIM_LINES; - break; + break; case GL_LINE_STRIP: type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP; break; @@ -171,33 +113,32 @@ static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int pri return type; } -static int r300_get_num_verts(r300ContextPtr rmesa, +static int r300_get_num_verts(r300ContextPtr rmesa, GLcontext *ctx, int num_verts, int prim) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - int type=-1, verts_off=0; - char *name="UNKNOWN"; - + int verts_off=0; + char *name="UNKNOWN"; + switch (prim & PRIM_MODE_MASK) { case GL_POINTS: name="P"; verts_off = 0; - break; + break; case GL_LINES: name="L"; verts_off = num_verts % 2; - break; + break; case GL_LINE_STRIP: name="LS"; - verts_off = num_verts % 2; + if(num_verts < 2) + verts_off = num_verts; break; case GL_LINE_LOOP: name="LL"; - verts_off = num_verts % 2; + if(num_verts < 2) + verts_off = num_verts; break; case GL_TRIANGLES: name="T"; @@ -236,438 +177,323 @@ static int r300_get_num_verts(r300ContextPtr rmesa, return -1; break; } - - if(num_verts - verts_off == 0){ - WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts, name); - return 0; - } - - if(verts_off > 0){ - WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name); - } - - return num_verts - verts_off; -} - -void dump_inputs(GLcontext *ctx, int render_inputs) -{ - int k; - fprintf(stderr, "inputs:"); - if(render_inputs & _TNL_BIT_POS) - fprintf(stderr, "_TNL_BIT_POS "); - if(render_inputs & _TNL_BIT_NORMAL) - fprintf(stderr, "_TNL_BIT_NORMAL "); - - /* color components */ - if(render_inputs & _TNL_BIT_COLOR0) - fprintf(stderr, "_TNL_BIT_COLOR0 "); - if(render_inputs & _TNL_BIT_COLOR1) - fprintf(stderr, "_TNL_BIT_COLOR1 "); - - if(render_inputs & _TNL_BIT_FOG) - fprintf(stderr, "_TNL_BIT_FOG "); - - /* texture coordinates */ - for(k=0;k < ctx->Const.MaxTextureUnits;k++) - if(render_inputs & (_TNL_BIT_TEX0< 0) { + WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name); + } + } + return num_verts - verts_off; +} +/* Immediate implementation has been removed from CVS. */ -/* Immediate implementation - vertex data is sent via command stream */ +/* vertex buffer implementation */ -static GLfloat default_vector[4]={0.0, 0.0, 0.0, 1.0}; +static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type, int elt_size) +{ + LOCAL_VARS + unsigned long addr_a; + unsigned long t_addr; + unsigned long magic_1, magic_2; + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; + + assert(elt_size == 2 || elt_size == 4); + + if(addr & (elt_size-1)){ + WARN_ONCE("Badly aligned buffer\n"); + return ; + } +#ifdef OPTIMIZE_ELTS + addr_a = 0; + + magic_1 = (addr % 32) / 4; + t_addr = addr & (~0x1d); + magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; + + check_space(6); + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); + if(elt_size == 4){ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + } -#define output_vector(v, i) \ - { \ - int _i; \ - for(_i=0;_isize;_i++){ \ - efloat(VEC_ELT(v, GLfloat, i)[_i]); \ - } \ - for(_i=v->size;_i<4;_i++){ \ - efloat(default_vector[_i]); \ - } \ + start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); + if(elt_size == 4){ + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr /*& 0xffffffe3*/); + } else { + e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); + e32(t_addr); + } + + if(elt_size == 4){ + e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ + } else { + e32(magic_2); /* Total number of dwords needed? */ + } + //cp_delay(PASS_PREFIX 1); +#if 0 + fprintf(stderr, "magic_1 %d\n", magic_1); + fprintf(stderr, "t_addr %x\n", t_addr); + fprintf(stderr, "magic_2 %d\n", magic_2); + exit(1); +#endif +#else + (void)magic_2, (void)magic_1, (void)t_addr; + + addr_a = 0; + + check_space(6); + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); + if(elt_size == 4){ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); } -/* Immediate implementation - vertex data is sent via command stream */ + start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr /*& 0xffffffe3*/); + + if(elt_size == 4){ + e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ + } else { + e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */ + } + //cp_delay(PASS_PREFIX 1); +#endif +} -static void r300_render_immediate_primitive(r300ContextPtr rmesa, +static void r300_render_vb_primitive(r300ContextPtr rmesa, GLcontext *ctx, int start, int end, int prim) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i, render_inputs; - int k, type, num_verts; - LOCAL_VARS - + int type, num_verts; + type=r300_get_primitive_type(rmesa, ctx, prim); num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); - - #if 0 - fprintf(stderr,"ObjPtr: size=%d stride=%d\n", - VB->ObjPtr->size, VB->ObjPtr->stride); - fprintf(stderr,"ColorPtr[0]: size=%d stride=%d\n", - VB->ColorPtr[0]->size, VB->ColorPtr[0]->stride); - fprintf(stderr,"TexCoordPtr[0]: size=%d stride=%d\n", - VB->TexCoordPtr[0]->size, VB->TexCoordPtr[0]->stride); - #endif - + if(type<0 || num_verts <= 0)return; - if(!VB->ObjPtr){ - WARN_ONCE("FIXME: Don't know how to handle GL_ARB_vertex_buffer_object correctly\n"); - return; - } - /* A packet cannot have more than 16383 data words.. */ - if((num_verts*4*rmesa->state.aos_count)>16380){ - WARN_ONCE("Too many vertices to paint. Fix me !\n"); - return; + if(rmesa->state.VB.Elts){ + r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start); +#if 0 + LOCAL_VARS + int i; + start_index32_packet(num_verts, type); + for(i=0; i < num_verts; i++) + e32(((unsigned long *)rmesa->state.VB.Elts)[i]/*rmesa->state.Elts[start+i]*/); /* start ? */ +#else + if(num_verts == 1){ + //start_index32_packet(num_verts, type); + //e32(rmesa->state.Elts[start]); + return; } - - //fprintf(stderr, "aos_count=%d start=%d end=%d\n", rmesa->state.aos_count, start, end); - - if(rmesa->state.aos_count==0){ - WARN_ONCE("Aeiee ! aos_count==0, while it shouldn't. Skipping rendering\n"); - return; - } - - render_inputs = rmesa->state.render_inputs; - - if(!render_inputs){ - WARN_ONCE("Aeiee ! render_inputs==0. Skipping rendering.\n"); - return; - } - - //dump_inputs(ctx, render_inputs); return ; - start_immediate_packet(num_verts, type, 4*rmesa->state.aos_count); - - for(i=start;iObjPtr, GLfloat, i)[0], - VEC_ELT(VB->ObjPtr, GLfloat, i)[1], - VEC_ELT(VB->ObjPtr, GLfloat, i)[2], - VEC_ELT(VB->ObjPtr, GLfloat, i)[3], - - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3] - ); - #endif - - - /* coordinates */ - if(render_inputs & _TNL_BIT_POS) - output_vector(VB->ObjPtr, i); - if(render_inputs & _TNL_BIT_NORMAL) - output_vector(VB->NormalPtr, i); - - /* color components */ - if(render_inputs & _TNL_BIT_COLOR0) - output_vector(VB->ColorPtr[0], i); - if(render_inputs & _TNL_BIT_COLOR1) - output_vector(VB->SecondaryColorPtr[0], i); - -/* if(render_inputs & _TNL_BIT_FOG) // Causes lock ups when immediate mode is on - output_vector(VB->FogCoordPtr, i);*/ - - /* texture coordinates */ - for(k=0;k < ctx->Const.MaxTextureUnits;k++) - if(render_inputs & (_TNL_BIT_TEX0<TexCoordPtr[k], i); - - if(render_inputs & _TNL_BIT_INDEX) - output_vector(VB->IndexPtr[0], i); - if(render_inputs & _TNL_BIT_POINTSIZE) - output_vector(VB->PointSizePtr, i); - } - + if(num_verts > 65535){ /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } + + r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size); + fire_EB(PASS_PREFIX rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size); +#endif + }else{ + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + fire_AOS(PASS_PREFIX num_verts, type); + } } - -static GLboolean r300_run_immediate_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +#if 0 +void dump_array(struct r300_dma_region *rvb, int count) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - /* Only do 2d textures */ - struct gl_texture_object *to=ctx->Texture.Unit[0].Current2D; - r300TexObjPtr t=to->DriverData; - LOCAL_VARS + int *out = (int *)(rvb->address + rvb->start); + int i, ci; - - /* Update texture state - needs to be done only when actually changed.. - All the time for now.. */ - + fprintf(stderr, "stride %d:", rvb->aos_stride); + for (i=0; i < count; i++) { + fprintf(stderr, "{"); + if (rvb->aos_format == AOS_FORMAT_FLOAT) + for (ci=0; ci < rvb->aos_size; ci++) + fprintf(stderr, "%f ", ((float *)out)[ci]); + else + for (ci=0; ci < rvb->aos_size; ci++) + fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); + fprintf(stderr, "}"); + + out += rvb->aos_stride; + } - if (RADEON_DEBUG == DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "\n"); +} - #if 1 /* we need this, somehow */ - /* Flush state - make sure command buffer is nice and large */ - r300Flush(ctx); - /* Make sure we have enough space */ - #else - /* Count is very imprecize, but should be good upper bound */ - r300EnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size + 4+2+30 - +VB->PrimitiveCount*(1+8)+VB->Count*4*rmesa->state.texture.tc_count+4, __FUNCTION__); - #endif - - /* needed before starting 3d operation .. */ - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); +void dump_dt(struct dt *dt, int count) +{ + int *out = dt->data; + int i, ci; - - #if 0 /* looks like the Z offset issue got fixed */ - rmesa->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA - | R300_VPORT_X_OFFSET_ENA - | R300_VPORT_Y_SCALE_ENA - | R300_VPORT_Y_OFFSET_ENA - | R300_VTX_W0_FMT; - R300_STATECHANGE(rmesa, vte); - #endif - + fprintf(stderr, "stride %d", dt->stride); + + for (i=0; i < count; i++){ + fprintf(stderr, "{"); + if (dt->type == GL_FLOAT) + for (ci=0; ci < dt->size; ci++) + fprintf(stderr, "%f ", ((float *)out)[ci]); + else + for (ci=0; ci < dt->size; ci++) + fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); + fprintf(stderr, "}"); + + out = (int *)((char *)out + dt->stride); + } - - /* Magic register - note it is right after 20b0 */ + fprintf(stderr, "\n"); +} +#endif +GLboolean r300_run_vb_render(GLcontext *ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct radeon_vertex_buffer *VB = &rmesa->state.VB; + int i; + LOCAL_VARS - if(rmesa->state.texture.tc_count>0){ - reg_start(0x20b4,0); - e32(0x0000000c); - + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (stage) { + TNLcontext *tnl = TNL_CONTEXT(ctx); + radeon_vb_to_rvb(rmesa, VB, &tnl->vb); } + + r300UpdateShaders(rmesa); + if (rmesa->state.VB.LockCount == 0 || 1) { + r300EmitArrays(ctx, GL_FALSE); + + r300UpdateShaderStates(rmesa); + } else { + /* TODO: Figure out why do we need these. */ + R300_STATECHANGE(rmesa, vir[0]); + R300_STATECHANGE(rmesa, vir[1]); + R300_STATECHANGE(rmesa, vic); + R300_STATECHANGE(rmesa, vof); - r300EmitState(rmesa); - - #if 0 - reg_start(R300_RB3D_COLORMASK, 0); - e32(0xf); - - vsf_start_fragment(0x406, 4); - efloat(0.0); - efloat(0.0); - efloat(0.0); - efloat(1.0); - - vsf_start_fragment(0x400, 4); - efloat(0.0); - efloat(0.0); - efloat(0.0); - efloat(1.0); - #endif - - /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */ - r300EmitLOAD_VBPNTR(rmesa, 0); - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - - r300_render_immediate_primitive(rmesa, ctx, start, start + length, prim); - } +#if 0 + fprintf(stderr, "dt:\n"); + for(i=0; i < VERT_ATTRIB_MAX; i++){ + fprintf(stderr, "dt %d:", i); + dump_dt(&rmesa->state.VB.AttribPtr[i], VB->Count); + } + + fprintf(stderr, "before:\n"); + for(i=0; i < rmesa->state.aos_count; i++){ + fprintf(stderr, "aos %d:", i); + dump_array(&rmesa->state.aos[i], VB->Count); + } +#endif +#if 0 + r300ReleaseArrays(ctx); + r300EmitArrays(ctx, GL_FALSE); + + fprintf(stderr, "after:\n"); + for(i=0; i < rmesa->state.aos_count; i++){ + fprintf(stderr, "aos %d:", i); + dump_array(&rmesa->state.aos[i], VB->Count); + } +#endif + } - /* This sequence is required after any 3d drawing packet - I suspect it work arounds a bug (or deficiency) in hardware */ - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); + reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); e32(0x0000000a); - - reg_start(0x4f18,0); + + reg_start(0x4f18,0); e32(0x00000003); - - return GL_FALSE; -} +#if 0 + reg_start(R300_VAP_PVS_WAITIDLE,0); + e32(0x00000000); +#endif + r300EmitState(rmesa); + + for(i=0; i < VB->PrimitiveCount; i++){ + GLuint prim = VB->Primitive[i].mode; + GLuint start = VB->Primitive[i].start; + GLuint length = VB->Primitive[i].count; + + r300_render_vb_primitive(rmesa, ctx, start, start + length, prim); + } + reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); + e32(0x0000000a/*0x2*/); -/* vertex buffer implementation */ + reg_start(0x4f18,0); + e32(0x00000003/*0x1*/); -/* We use the start part of GART texture buffer for vertices */ +#ifdef USER_BUFFERS + r300UseArrays(ctx); +#endif + r300ReleaseArrays(ctx); + return GL_FALSE; +} +#define FALLBACK_IF(expr) \ +do { \ + if (expr) { \ + if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ + WARN_ONCE("Software fallback:%s\n", #expr); \ + return R300_FALLBACK_RAST; \ + } \ +} while(0) -static void upload_vertex_buffer(r300ContextPtr rmesa, GLcontext *ctx) +int r300Fallback(GLcontext *ctx) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - int idx=0; - int i,j,k; - radeonScreenPtr rsp=rmesa->radeon.radeonScreen; - GLuint render_inputs; - - /* A hack - we don't want to overwrite vertex buffers, so we - just use AGP space for them.. Fix me ! */ - static int offset=0; - if(offset>2*1024*1024){ - //fprintf(stderr, "Wrapping agp vertex buffer offset\n"); - offset=0; - } - /* Not the most efficient implementation, but, for now, I just want something that - works */ - /* to do - make single memcpy per column (is it possible ?) */ - /* to do - use dirty flags to avoid redundant copies */ - #define UPLOAD_VECTOR(v)\ - { \ - /* Is the data dirty ? */ \ - if (v->flags & ((1<size)-1)) { \ - /* fprintf(stderr, "size=%d vs stride=%d\n", v->size, v->stride); */ \ - if(v->size*4==v->stride){\ - /* fast path */ \ - memcpy(rsp->gartTextures.map+offset, v->data, v->stride*VB->Count); \ - } else { \ - for(i=0;iCount;i++){ \ - /* copy one vertex at a time*/ \ - memcpy(rsp->gartTextures.map+offset+i*v->size*4, VEC_ELT(v, GLfloat, i), v->size*4); \ - } \ - } \ - /* v->flags &= ~((1<size)-1);*/ \ - } \ - rmesa->state.aos[idx].offset=rsp->gartTextures.handle+offset; \ - offset+=v->size*4*VB->Count; \ - idx++; \ - } - - render_inputs = rmesa->state.render_inputs; + int i; - if(!render_inputs){ - WARN_ONCE("Aeiee ! render_inputs==0. Skipping rendering.\n"); - return; - } - /* coordinates */ - if(render_inputs & _TNL_BIT_POS) - UPLOAD_VECTOR(VB->ObjPtr); - if(render_inputs & _TNL_BIT_NORMAL) - UPLOAD_VECTOR(VB->NormalPtr); + //FALLBACK_IF(ctx->RenderMode != GL_RENDER); // We do not do SELECT or FEEDBACK (yet ?) - /* color components */ - if(render_inputs & _TNL_BIT_COLOR0) - UPLOAD_VECTOR(VB->ColorPtr[0]); - if(render_inputs & _TNL_BIT_COLOR1) - UPLOAD_VECTOR(VB->SecondaryColorPtr[0]); - - if(render_inputs & _TNL_BIT_FOG) - UPLOAD_VECTOR(VB->FogCoordPtr); +#if 0 /* These should work now.. */ + FALLBACK_IF(ctx->Color.DitherFlag); + FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST + FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND + FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL +#endif + FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT + FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE + //FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST - /* texture coordinates */ - for(k=0;k < ctx->Const.MaxTextureUnits;k++) - if(render_inputs & (_TNL_BIT_TEX0<TexCoordPtr[k]); + //FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG disable as swtcl doesnt seem to support this + //FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH disabling to get blender going + FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE + FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB - if(render_inputs & _TNL_BIT_INDEX) - UPLOAD_VECTOR(VB->IndexPtr[0]); - if(render_inputs & _TNL_BIT_POINTSIZE) - UPLOAD_VECTOR(VB->PointSizePtr); - - if(idx>=R300_MAX_AOS_ARRAYS){ - fprintf(stderr, "Aieee ! Maximum AOS arrays count exceeded.. \n"); - exit(-1); - } - //dump_inputs(ctx, render_inputs); return ; -} - -static void r300_render_vb_primitive(r300ContextPtr rmesa, - GLcontext *ctx, - int start, - int end, - int prim) -{ - int type, num_verts; - LOCAL_VARS - - type=r300_get_primitive_type(rmesa, ctx, prim); - num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); - - if(type<0 || num_verts <= 0)return; - - - fire_AOS(PASS_PREFIX num_verts, type); -} - -static GLboolean r300_run_vb_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - int i, j; - LOCAL_VARS - if (RADEON_DEBUG == DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - - r300_setup_routing(ctx, GL_FALSE); + FALLBACK_IF(ctx->Line.StippleFlag); - r300EmitState(rmesa); - - /* setup array of structures data */ - LOCK_HARDWARE(&(rmesa->radeon)); + /* HW doesnt appear to directly support these */ + FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH + FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH + /* Rest could be done with vertex fragments */ + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV - upload_vertex_buffer(rmesa, ctx); - //fprintf(stderr, "Using %d AOS arrays\n", n_arrays); - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - - /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. */ - r300EmitLOAD_VBPNTR(rmesa, start); - - r300_render_vb_primitive(rmesa, ctx, start, start + length, prim); - } - - /* This sequence is required after any 3d drawing packet - I suspect it works around a bug (or deficiency) in hardware */ - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - - end_3d(PASS_PREFIX_VOID); - - /* Flush state - we are done drawing.. */ - r300FlushCmdBufLocked(ctx, __FUNCTION__); - radeonWaitForIdleLocked(&(rmesa->radeon)); - - UNLOCK_HARDWARE(&(rmesa->radeon)); - return GL_FALSE; + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) + return R300_FALLBACK_TCL; + + return R300_FALLBACK_NONE; } /** @@ -679,142 +505,186 @@ static GLboolean r300_run_vb_render(GLcontext *ctx, static GLboolean r300_run_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - - if (RADEON_DEBUG == DEBUG_PRIMS) + + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - - #if 1 + if (r300Fallback(ctx) >= R300_FALLBACK_RAST) + return GL_TRUE; + + return r300_run_vb_render(ctx, stage); +} + +const struct tnl_pipeline_stage _r300_render_stage = { + "r300 hw rasterize", + NULL, + NULL, + NULL, + NULL, + r300_run_render /* run */ +}; + +static GLboolean r300_run_tcl_render(GLcontext *ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program *vp; + + hw_tcl_on=future_hw_tcl_on; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + if(hw_tcl_on == GL_FALSE) + return GL_TRUE; - #if 1 + if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } - return r300_run_immediate_render(ctx, stage); - #else - return r300_run_vb_render(ctx, stage); - #endif - #else - return GL_TRUE; - #endif + r300UpdateShaders(rmesa); + + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); +#if 0 /* Draw every second request with software arb vp */ + vp->native++; + vp->native &= 1; + //vp->native = GL_FALSE; +#endif +#if 0 /* You dont want to know what this does... */ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct tnl_cache *cache; + struct tnl_cache_item *c; + + cache = tnl->vp_cache; + c = cache->items[0xc000cc0e % cache->size]; + + if(c && c->data == vp) + vp->native = GL_FALSE; + +#endif #if 0 - mgaContextPtr mmesa = MGA_CONTEXT(ctx); + vp->native = GL_FALSE; +#endif + if (vp->native == GL_FALSE) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + //r300UpdateShaderStates(rmesa); + + return r300_run_vb_render(ctx, stage); +} + +const struct tnl_pipeline_stage _r300_tcl_stage = { + "r300 tcl", + NULL, + NULL, + NULL, + NULL, + r300_run_tcl_render /* run */ +}; + +/* R300 texture rectangle expects coords in 0..1 range, not 0..dimension + * as in the extension spec. Need to translate here. + * + * Note that swrast expects 0..dimension, so if a fallback is active, + * don't do anything. (Maybe need to configure swrast to match hw) + */ +struct texrect_stage_data { + GLvector4f texcoord[MAX_TEXTURE_UNITS]; +}; + +#define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr) + + +static GLboolean run_texrect_stage( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) +{ + struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage); + r300ContextPtr rmesa = R300_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint i; - /* Don't handle clipping or indexed vertices or vertex manipulations. - */ - if (mmesa->RenderIndex != 0 || - !mga_validate_render( ctx, VB )) { + if (rmesa->radeon.Fallback) return GL_TRUE; - } - - tnl->Driver.Render.Start( ctx ); - mmesa->SetupNewInputs = ~0; - - for (i = 0 ; i < VB->PrimitiveCount ; i++) - { - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - if (!length) - continue; - - mga_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length, - prim); + for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect; + struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel]; + const GLfloat iw = 1.0/texImage->Width; + const GLfloat ih = 1.0/texImage->Height; + GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data; + GLint instride = VB->TexCoordPtr[i]->stride; + GLfloat (*out)[4] = store->texcoord[i].data; + GLint j; + + store->texcoord[i].size = VB->TexCoordPtr[i]->size; + for (j = 0 ; j < VB->Count ; j++) { + switch (VB->TexCoordPtr[i]->size) { + case 4: + out[j][3] = in[3]; + /* fallthrough */ + case 3: + out[j][2] = in[2]; + /* fallthrough */ + default: + out[j][0] = in[0] * iw; + out[j][1] = in[1] * ih; + } + in = (GLfloat *)((GLubyte *)in + instride); + } + + VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i]; + } } - tnl->Driver.Render.Finish( ctx ); - - return GL_FALSE; /* finished the pipe */ -#endif + return GL_TRUE; } -/** - * Called by the pipeline manager once before rendering. - * We check the GL state here to - * a) decide whether we can do the current state in hardware and - * b) update hardware registers +/* Called the first time stage->run() is invoked. */ -#define FALLBACK_IF(expr) \ -do { \ - if (expr) { \ - if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ - fprintf(stderr, "%s: fallback:%s\n", \ - __FUNCTION__, #expr); \ - stage->active = GL_FALSE; \ - return; \ - } \ -} while(0) - -static void r300_check_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) +static GLboolean alloc_texrect_data( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - int i; + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct texrect_stage_data *store; + GLuint i; - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); + stage->privatePtr = CALLOC(sizeof(*store)); + store = TEXRECT_STAGE_DATA(stage); + if (!store) + return GL_FALSE; - /* We only support rendering in hardware for now */ - if (ctx->RenderMode != GL_RENDER) { - stage->active = GL_FALSE; - return; - } - - // I failed to figure out how dither works in hardware, - // let's just ignore it for now - //FALLBACK_IF(ctx->Color.DitherFlag); - - /* I'm almost certain I forgot something here */ - #if 0 /* This should work now.. */ - FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST - FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND - #endif - //FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG disable as swtcl doesnt seem to support this - FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH - FALLBACK_IF(ctx->Line.StippleFlag); // GL_LINE_STIPPLE - FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) - FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV - FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT - FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE - //FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL - //if(ctx->Polygon.OffsetFill)WARN_ONCE("Polygon.OffsetFill not implemented, ignoring\n"); - FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH - FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE - //FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST - FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB - - /* One step at a time - let one texture pass.. */ - for (i = 1; i < ctx->Const.MaxTextureUnits; i++) - FALLBACK_IF(ctx->Texture.Unit[i].Enabled); - - /* let r300_run_render do its job */ - #if 0 - stage->active = GL_FALSE; - #endif -} + for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) + _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 ); + return GL_TRUE; +} -static void dtr(struct tnl_pipeline_stage *stage) +static void free_texrect_data( struct tnl_pipeline_stage *stage ) { - (void)stage; + struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage); + GLuint i; + + if (store) { + for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++) + if (store->texcoord[i].data) + _mesa_vector4f_free( &store->texcoord[i] ); + FREE( store ); + stage->privatePtr = NULL; + } } -const struct tnl_pipeline_stage _r300_render_stage = { - "r300 hw rasterize", - _NEW_ALL, /* re-check (always re-check for now) */ - 0, /* re-run (always runs) */ - GL_TRUE, /* active */ - 0, 0, /* inputs (set in check_render), outputs */ - 0, 0, /* changed_inputs, private */ - dtr, /* destructor */ - r300_check_render, /* check */ - r300_run_render /* run */ +const struct tnl_pipeline_stage _r300_texrect_stage = +{ + "r300 texrect stage", /* name */ + NULL, + alloc_texrect_data, + free_texrect_data, + NULL, + run_texrect_stage }; +