X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_render.c;h=e6ed06a69d6b6e2096637b0ab33ba3df4cccda22;hb=83a43aca233cfdf8f8cac26895ef4ea4105d96af;hp=ce924eb863c1124ed2cc513fa76c4a2324ac3860;hpb=25faa2d56e2c417d64c3ad50f43e421002bcd29a;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index ce924eb863c..e6ed06a69d6 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -44,7 +44,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "array_cache/acache.h" #include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "radeon_reg.h" +#include "radeon_macros.h" #include "radeon_ioctl.h" #include "radeon_state.h" #include "r300_context.h" @@ -52,9 +55,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_reg.h" #include "r300_program.h" +#include "r300_tex.h" +#include "r300_maos.h" +#include "r300_emit.h" -#include "r300_lib.h" - +extern int future_hw_tcl_on; /********************************************************************** * Hardware rasterization @@ -63,608 +68,433 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * rasterization hardware for rendering. **********************************************************************/ -static int r300_get_primitive_type(r300ContextPtr rmesa, - GLcontext *ctx, - int start, - int end, - int prim) +static int r300_get_primitive_type(r300ContextPtr rmesa, GLcontext *ctx, int prim) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - int type=-1, min_vertices=0; - char *name="UNKNOWN"; - - if(end<=start)return -1; /* do we need to watch for this ? */ - + int type=-1; + switch (prim & PRIM_MODE_MASK) { case GL_POINTS: - name="P"; type=R300_VAP_VF_CNTL__PRIM_POINTS; - min_vertices=1; - break; + break; case GL_LINES: - name="L"; type=R300_VAP_VF_CNTL__PRIM_LINES; - min_vertices=2; - break; + break; case GL_LINE_STRIP: - name="LS"; type=R300_VAP_VF_CNTL__PRIM_LINE_STRIP; - min_vertices=2; break; case GL_LINE_LOOP: - name="LL"; - min_vertices=2; - return -1; + type=R300_VAP_VF_CNTL__PRIM_LINE_LOOP; break; case GL_TRIANGLES: - name="T"; type=R300_VAP_VF_CNTL__PRIM_TRIANGLES; - min_vertices=3; break; case GL_TRIANGLE_STRIP: - name="TS"; type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; - min_vertices=3; break; case GL_TRIANGLE_FAN: - name="TF"; type=R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; - min_vertices=3; break; case GL_QUADS: - name="Q"; type=R300_VAP_VF_CNTL__PRIM_QUADS; - min_vertices=4; break; case GL_QUAD_STRIP: - name="QS"; type=R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; - min_vertices=4; break; + case GL_POLYGON: + type=R300_VAP_VF_CNTL__PRIM_POLYGON; + break; default: - fprintf(stderr, "Cannot handle primitive %02x ", prim & PRIM_MODE_MASK); + fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n", + __FILE__, __FUNCTION__, + prim & PRIM_MODE_MASK); return -1; break; } - #if 0 - fprintf(stderr, "[%d-%d]%s ", start, end, name); - #endif - if(start+min_vertices>=end){ - fprintf(stderr, "Not enough vertices\n"); - return -1; - } return type; } - - -/* Immediate implementation - vertex data is sent via command stream */ - -static GLfloat default_vector[4]={0.0, 0.0, 0.0, 1.0}; - -#define output_vector(v, i) \ - { \ - int _i; \ - for(_i=0;_isize;_i++){ \ - efloat(VEC_ELT(v, GLfloat, i)[_i]); \ - } \ - for(_i=v->size;_i<4;_i++){ \ - efloat(default_vector[_i]); \ - } \ - } - -static void r300_render_flat_primitive(r300ContextPtr rmesa, +static int r300_get_num_verts(r300ContextPtr rmesa, GLcontext *ctx, - int start, - int end, + int num_verts, int prim) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - int k, type; - LOCAL_VARS - - type=r300_get_primitive_type(rmesa, ctx, start, end, prim); - - if(type<0)return; + int verts_off=0; + char *name="UNKNOWN"; + switch (prim & PRIM_MODE_MASK) { + case GL_POINTS: + name="P"; + verts_off = 0; + break; + case GL_LINES: + name="L"; + verts_off = num_verts % 2; + break; + case GL_LINE_STRIP: + name="LS"; + if(num_verts < 2) + verts_off = num_verts; + break; + case GL_LINE_LOOP: + name="LL"; + if(num_verts < 2) + verts_off = num_verts; + break; + case GL_TRIANGLES: + name="T"; + verts_off = num_verts % 3; + break; + case GL_TRIANGLE_STRIP: + name="TS"; + if(num_verts < 3) + verts_off = num_verts; + break; + case GL_TRIANGLE_FAN: + name="TF"; + if(num_verts < 3) + verts_off = num_verts; + break; + case GL_QUADS: + name="Q"; + verts_off = num_verts % 4; + break; + case GL_QUAD_STRIP: + name="QS"; + if(num_verts < 4) + verts_off = num_verts; + else + verts_off = num_verts % 2; + break; + case GL_POLYGON: + name="P"; + if(num_verts < 3) + verts_off = num_verts; + break; + default: + fprintf(stderr, "%s:%s Do not know how to handle primitive %02x - help me !\n", + __FILE__, __FUNCTION__, + prim & PRIM_MODE_MASK); + return -1; + break; + } - start_immediate_packet(end-start, type, 8); + if (RADEON_DEBUG & DEBUG_VERTS) { + if (num_verts - verts_off == 0) { + WARN_ONCE("user error: Need more than %d vertices to draw primitive %s !\n", num_verts, name); + return 0; + } - for(i=start;iObjPtr, GLfloat, i)[0], - VEC_ELT(VB->ObjPtr, GLfloat, i)[1], - VEC_ELT(VB->ObjPtr, GLfloat, i)[2], - VEC_ELT(VB->ObjPtr, GLfloat, i)[3], - - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3] - ); - #endif - - - /* coordinates */ - output_vector(VB->ObjPtr, i); - - /* color components */ - output_vector(VB->ColorPtr[0], i); + if (verts_off > 0) { + WARN_ONCE("user error: %d is not a valid number of vertices for primitive %s !\n", num_verts, name); } + } + return num_verts - verts_off; } -static GLboolean r300_run_flat_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) +/* Immediate implementation has been removed from CVS. */ + +/* vertex buffer implementation */ + +static void inline fire_EB(PREFIX unsigned long addr, int vertex_count, int type, int elt_size) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - AOS_DATA vb_arrays[2]; - LOCAL_VARS + LOCAL_VARS + unsigned long addr_a; + unsigned long t_addr; + unsigned long magic_1, magic_2; + GLcontext *ctx; + ctx = rmesa->radeon.glCtx; - /* Flush state - make sure command buffer is nice and large */ - r300Flush(ctx); + assert(elt_size == 2 || elt_size == 4); - if (RADEON_DEBUG == DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - /* setup array of structures data */ - - /* Note: immediate vertex data includes all coordinates. - To save bandwidth use either VBUF or state-based vertex generation */ - /* xyz */ - vb_arrays[0].element_size=4; - vb_arrays[0].stride=4; - vb_arrays[0].offset=0; /* Not used */ - vb_arrays[0].format=AOS_FORMAT_FLOAT; - vb_arrays[0].ncomponents=4; - vb_arrays[0].reg=REG_COORDS; - - /* color */ - vb_arrays[1].element_size=4; - vb_arrays[1].stride=4; - vb_arrays[1].offset=0; /* Not used */ - vb_arrays[1].format=AOS_FORMAT_FLOAT_COLOR; - vb_arrays[1].ncomponents=4; - vb_arrays[1].reg=REG_COLOR0; - - - /* needed before starting 3d operation .. */ - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); + if(addr & (elt_size-1)){ + WARN_ONCE("Badly aligned buffer\n"); + return ; + } +#ifdef OPTIMIZE_ELTS + addr_a = 0; - rmesa->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA - | R300_VPORT_X_OFFSET_ENA - | R300_VPORT_Y_SCALE_ENA - | R300_VPORT_Y_OFFSET_ENA - | R300_VTX_W0_FMT; - R300_STATECHANGE(rmesa, vte); - - r300EmitState(rmesa); - - FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].length=16; - memcpy(FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4); - - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.length=4; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[0]=0.0; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[1]=0.0; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[2]=1.0; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[3]=0.0; - - program_pipeline(PASS_PREFIX &FLAT_COLOR_PIPELINE); - - /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */ - setup_AOS(PASS_PREFIX vb_arrays, 2); - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - r300_render_flat_primitive(rmesa, ctx, start, start + length, prim); - } + magic_1 = (addr % 32) / 4; + t_addr = addr & (~0x1d); + magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; - /* This sequence is required after any 3d drawing packet - I suspect it work arounds a bug (or deficiency) in hardware */ - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - - return GL_FALSE; -} - -/* vertex buffer implementation */ - -/* We use the start part of GART texture buffer for vertices */ - -/* 8 is somewhat bogus... it is probably something like 24 */ -#define R300_MAX_AOS_ARRAYS 8 + check_space(6); + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); + if(elt_size == 4){ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + } -static void upload_vertex_buffer(r300ContextPtr rmesa, - GLcontext *ctx, AOS_DATA *array, int *n_arrays) -{ - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - int offset=0, idx=0; - int i,j; - radeonScreenPtr rsp=rmesa->radeon.radeonScreen; - /* Not the most efficient implementation, but, for now, I just want something that - works */ - /* to do - make single memcpy per column (is it possible ?) */ - /* to do - use dirty flags to avoid redundant copies */ -#define UPLOAD_VECTOR(v, r, f)\ - { \ - /* Is the data dirty ? */ \ - if (v->flags & ((1<size)-1)) { \ - fprintf(stderr, "size=%d vs stride=%d\n", v->size, v->stride); \ - if(v->size*4==v->stride){\ - /* fast path */ \ - memcpy(rsp->gartTextures.map+offset, v->data, v->stride*VB->Count); \ - } else { \ - for(i=0;iCount;i++){ \ - /* copy one vertex at a time*/ \ - memcpy(rsp->gartTextures.map+offset+i*v->size*4, VEC_ELT(v, GLfloat, i), v->size*4); \ - } \ - } \ - /* v->flags &= ~((1<size)-1);*/ \ - } \ - array[idx].element_size=v->size; \ - array[idx].stride=v->size; \ - array[idx].format=(f); \ - array[idx].ncomponents=v->size; \ - array[idx].offset=rsp->gartTextures.handle+offset; \ - array[idx].reg=r; \ - offset+=v->size*4*VB->Count; \ - idx++; \ + start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); + if(elt_size == 4){ + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr /*& 0xffffffe3*/); + } else { + e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); + e32(t_addr); + } + + if(elt_size == 4){ + e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ + } else { + e32(magic_2); /* Total number of dwords needed? */ } + //cp_delay(PASS_PREFIX 1); +#if 0 + fprintf(stderr, "magic_1 %d\n", magic_1); + fprintf(stderr, "t_addr %x\n", t_addr); + fprintf(stderr, "magic_2 %d\n", magic_2); + exit(1); +#endif +#else + (void)magic_2, (void)magic_1, (void)t_addr; -UPLOAD_VECTOR(VB->ObjPtr, REG_COORDS, AOS_FORMAT_FLOAT); -UPLOAD_VECTOR(VB->ColorPtr[0], REG_COLOR0, AOS_FORMAT_FLOAT_COLOR); + addr_a = 0; + + check_space(6); + + start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); + if(elt_size == 4){ + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type); + } -*n_arrays=idx; -if(idx>=R300_MAX_AOS_ARRAYS){ - fprintf(stderr, "Aieee ! Maximum AOS arrays count exceeded.. \n"); - exit(-1); + start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); + e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + e32(addr /*& 0xffffffe3*/); + + if(elt_size == 4){ + e32(vertex_count /*+ addr_a/4*/); /* Total number of dwords needed? */ + } else { + e32((vertex_count+1)/2 /*+ addr_a/4*/); /* Total number of dwords needed? */ } + //cp_delay(PASS_PREFIX 1); +#endif } -static void r300_render_vb_flat_primitive(r300ContextPtr rmesa, +static void r300_render_vb_primitive(r300ContextPtr rmesa, GLcontext *ctx, int start, int end, int prim) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - int k, type, n_arrays; - LOCAL_VARS - - if(end<=start)return; /* do we need to watch for this ? */ - - type=r300_get_primitive_type(rmesa, ctx, start, end, prim); - if(type<0)return; + int type, num_verts; - fire_AOS(PASS_PREFIX end-start, type); -} + type=r300_get_primitive_type(rmesa, ctx, prim); + num_verts=r300_get_num_verts(rmesa, ctx, end-start, prim); -static VERTEX_SHADER_FRAGMENT default_vector_vsf={ - length: 4, - body: { - f: {0.0, 0.0, 0.0, 1.0} - } - }; + if(type<0 || num_verts <= 0)return; -static GLboolean r300_run_vb_flat_render(GLcontext *ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - int i, j, n_arrays; - AOS_DATA vb_arrays[R300_MAX_AOS_ARRAYS]; - AOS_DATA vb_arrays2[R300_MAX_AOS_ARRAYS]; - LOCAL_VARS + if(rmesa->state.VB.Elts){ + r300EmitAOS(rmesa, rmesa->state.aos_count, /*0*/start); +#if 0 + LOCAL_VARS + int i; + start_index32_packet(num_verts, type); + for(i=0; i < num_verts; i++) + e32(((unsigned long *)rmesa->state.VB.Elts)[i]/*rmesa->state.Elts[start+i]*/); /* start ? */ +#else + if(num_verts == 1){ + //start_index32_packet(num_verts, type); + //e32(rmesa->state.Elts[start]); + return; + } - if (RADEON_DEBUG == DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - /* setup array of structures data */ - - upload_vertex_buffer(rmesa, ctx, vb_arrays, &n_arrays); - fprintf(stderr, "Using %d AOS arrays\n", n_arrays); - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - - r300EmitState(rmesa); - - FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].length=16; - memcpy(FLAT_COLOR_PIPELINE.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4); - - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.length=4; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[0]=0.0; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[1]=0.0; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[2]=1.0; - FLAT_COLOR_PIPELINE.vertex_shader.unknown2.body.f[3]=0.0; - - program_pipeline(PASS_PREFIX &FLAT_COLOR_PIPELINE); - - set_quad0(PASS_PREFIX 1.0,1.0,1.0,1.0); - set_init21(PASS_PREFIX 0.0,1.0); - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - - /* copy arrays */ - memcpy(vb_arrays2, vb_arrays, sizeof(AOS_DATA)*n_arrays); - for(j=0;j 65535){ /* not implemented yet */ + WARN_ONCE("Too many elts\n"); + return; + } - /* This sequence is required after any 3d drawing packet - I suspect it work arounds a bug (or deficiency) in hardware */ - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - - end_3d(PASS_PREFIX_VOID); - - /* Flush state - we are done drawing.. */ - r300Flush(ctx); - fprintf(stderr, "\n"); - return GL_FALSE; + r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, rmesa->state.VB.elt_size); + fire_EB(PASS_PREFIX rmesa->state.elt_dma.aos_offset, num_verts, type, rmesa->state.VB.elt_size); +#endif + }else{ + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + fire_AOS(PASS_PREFIX num_verts, type); + } } -/* Textures... */ - -/* Immediate implementation - vertex data is sent via command stream */ - -static void r300_render_tex_primitive(r300ContextPtr rmesa, - GLcontext *ctx, - int start, - int end, - int prim) +#if 0 +void dump_array(struct r300_dma_region *rvb, int count) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - int k, type; - LOCAL_VARS - - type=r300_get_primitive_type(rmesa, ctx, start, end, prim); + int *out = (int *)(rvb->address + rvb->start); + int i, ci; + + fprintf(stderr, "stride %d:", rvb->aos_stride); + for (i=0; i < count; i++) { + fprintf(stderr, "{"); + if (rvb->aos_format == AOS_FORMAT_FLOAT) + for (ci=0; ci < rvb->aos_size; ci++) + fprintf(stderr, "%f ", ((float *)out)[ci]); + else + for (ci=0; ci < rvb->aos_size; ci++) + fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); + fprintf(stderr, "}"); - #if 1 - fprintf(stderr,"ObjPtr: size=%d stride=%d\n", - VB->ObjPtr->size, VB->ObjPtr->stride); - fprintf(stderr,"ColorPtr[0]: size=%d stride=%d\n", - VB->ColorPtr[0]->size, VB->ColorPtr[0]->stride); - fprintf(stderr,"TexCoordPtr[0]: size=%d stride=%d\n", - VB->TexCoordPtr[0]->size, VB->TexCoordPtr[0]->stride); - #endif - - if(type<0)return; - - - start_immediate_packet(end-start, type, 12); + out += rvb->aos_stride; + } - for(i=start;iObjPtr, GLfloat, i)[0], - VEC_ELT(VB->ObjPtr, GLfloat, i)[1], - VEC_ELT(VB->ObjPtr, GLfloat, i)[2], - VEC_ELT(VB->ObjPtr, GLfloat, i)[3], - - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2], - VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3] - ); - #endif - - - /* coordinates */ - output_vector(VB->ObjPtr, i); + fprintf(stderr, "\n"); +} - /* color components */ - output_vector(VB->ColorPtr[0], i); +void dump_dt(struct dt *dt, int count) +{ + int *out = dt->data; + int i, ci; + + fprintf(stderr, "stride %d", dt->stride); + + for (i=0; i < count; i++){ + fprintf(stderr, "{"); + if (dt->type == GL_FLOAT) + for (ci=0; ci < dt->size; ci++) + fprintf(stderr, "%f ", ((float *)out)[ci]); + else + for (ci=0; ci < dt->size; ci++) + fprintf(stderr, "%d ", ((unsigned char *)out)[ci]); + fprintf(stderr, "}"); - /* texture coordinates */ - output_vector(VB->TexCoordPtr[0], i); - } - + out = (int *)((char *)out + dt->stride); + } + + fprintf(stderr, "\n"); } +#endif -static GLboolean r300_run_tex_render(GLcontext *ctx, +GLboolean r300_run_vb_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - AOS_DATA vb_arrays[3]; - /* Only do 2d textures */ - struct gl_texture_object *to=ctx->Texture.Unit[0].Current2D; - r300TexObjPtr t=to->DriverData; - LOCAL_VARS - - - /* Update texture state - needs to be done only when actually changed.. - All the time for now.. */ - r300UpdateTextureState(ctx); - - /* Flush state - make sure command buffer is nice and large */ - r300Flush(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct radeon_vertex_buffer *VB = &rmesa->state.VB; + int i; + LOCAL_VARS - //fprintf(stderr, "You can enable texture drawing in %s:%s \n", __FILE__, __FUNCTION__); - //return GL_TRUE; - - - if (RADEON_DEBUG == DEBUG_PRIMS) + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - /* setup array of structures data */ - - /* Note: immediate vertex data includes all coordinates. - To save bandwidth use either VBUF or state-based vertex generation */ - /* xyzw */ - vb_arrays[0].element_size=4; - vb_arrays[0].stride=4; - vb_arrays[0].offset=0; /* Not used */ - vb_arrays[0].format=AOS_FORMAT_FLOAT; - vb_arrays[0].ncomponents=4; - vb_arrays[0].reg=REG_COORDS; - - /* color */ - vb_arrays[1].element_size=4; - vb_arrays[1].stride=4; - vb_arrays[1].offset=0; /* Not used */ - vb_arrays[1].format=AOS_FORMAT_FLOAT_COLOR; - vb_arrays[1].ncomponents=4; - vb_arrays[1].reg=REG_COLOR0; - - /* texture coordinates */ - vb_arrays[2].element_size=4; - vb_arrays[2].stride=4; - vb_arrays[2].offset=0; /* Not used */ - vb_arrays[2].format=AOS_FORMAT_FLOAT; - vb_arrays[2].ncomponents=4; - vb_arrays[2].reg=REG_TEX0; - - - /* needed before starting 3d operation .. */ - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); + if (stage) { + TNLcontext *tnl = TNL_CONTEXT(ctx); + radeon_vb_to_rvb(rmesa, VB, &tnl->vb); + } + + r300UpdateShaders(rmesa); + if (rmesa->state.VB.LockCount == 0 || 1) { + r300EmitArrays(ctx, GL_FALSE); + + r300UpdateShaderStates(rmesa); + } else { + /* TODO: Figure out why do we need these. */ + R300_STATECHANGE(rmesa, vir[0]); + R300_STATECHANGE(rmesa, vir[1]); + R300_STATECHANGE(rmesa, vic); + R300_STATECHANGE(rmesa, vof); + +#if 0 + fprintf(stderr, "dt:\n"); + for(i=0; i < VERT_ATTRIB_MAX; i++){ + fprintf(stderr, "dt %d:", i); + dump_dt(&rmesa->state.VB.AttribPtr[i], VB->Count); + } + + fprintf(stderr, "before:\n"); + for(i=0; i < rmesa->state.aos_count; i++){ + fprintf(stderr, "aos %d:", i); + dump_array(&rmesa->state.aos[i], VB->Count); + } +#endif +#if 0 + r300ReleaseArrays(ctx); + r300EmitArrays(ctx, GL_FALSE); + + fprintf(stderr, "after:\n"); + for(i=0; i < rmesa->state.aos_count; i++){ + fprintf(stderr, "aos %d:", i); + dump_array(&rmesa->state.aos[i], VB->Count); + } +#endif + } + + reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); e32(0x0000000a); - - reg_start(0x4f18,0); + + reg_start(0x4f18,0); e32(0x00000003); +#if 0 + reg_start(R300_VAP_PVS_WAITIDLE,0); + e32(0x00000000); +#endif + r300EmitState(rmesa); - - rmesa->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA - | R300_VPORT_X_OFFSET_ENA - | R300_VPORT_Y_SCALE_ENA - | R300_VPORT_Y_OFFSET_ENA - | R300_VTX_W0_FMT; - R300_STATECHANGE(rmesa, vte); - - r300EmitState(rmesa); - - SINGLE_TEXTURE_PIPELINE.vertex_shader.matrix[0].length=16; - memcpy(SINGLE_TEXTURE_PIPELINE.vertex_shader.matrix[0].body.f, ctx->_ModelProjectMatrix.m, 16*4); + for(i=0; i < VB->PrimitiveCount; i++){ + GLuint prim = VB->Primitive[i].mode; + GLuint start = VB->Primitive[i].start; + GLuint length = VB->Primitive[i].count; + + r300_render_vb_primitive(rmesa, ctx, start, start + length, prim); + } - SINGLE_TEXTURE_PIPELINE.vertex_shader.unknown2.length=4; - SINGLE_TEXTURE_PIPELINE.vertex_shader.unknown2.body.f[0]=0.0; - SINGLE_TEXTURE_PIPELINE.vertex_shader.unknown2.body.f[1]=0.0; - SINGLE_TEXTURE_PIPELINE.vertex_shader.unknown2.body.f[2]=1.0; - SINGLE_TEXTURE_PIPELINE.vertex_shader.unknown2.body.f[3]=0.0; + reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); + e32(0x0000000a/*0x2*/); - /* Use actual texture offset */ - - fprintf(stderr,"pp_border_color=%08x pp_cubic_faces=%08x format=%08x size=%08x format_x=%08x\n", - t->pp_border_color, t->pp_cubic_faces, t->format, t->size, t->format_x); - - SINGLE_TEXTURE_PIPELINE.texture_unit[0].offset=rmesa->radeon.radeonScreen->fbLocation+t->offset; - #if 0 - SINGLE_TEXTURE_PIPELINE.texture_unit[0].format=t->format; - #endif - SINGLE_TEXTURE_PIPELINE.texture_unit[0].size=t->size; - SINGLE_TEXTURE_PIPELINE.texture_unit[0].filter=t->filter; - SINGLE_TEXTURE_PIPELINE.texture_unit[0].unknown1=t->pitch; /* Unknown 1 is pitch ! */ - SINGLE_TEXTURE_PIPELINE.texture_unit[0].filter=t->filter; - - - /* Upload texture, a hack, really we can do a lot better */ - #if 0 - memcpy(rsp->gartTextures.map, to->Image[0][0]->Data, to->Image[0][0]->RowStride*to->Image[0][0]->Height*4); - #endif - - /* Program RS unit. This needs to be moved into R300 pipeline */ -reg_start(R300_RS_CNTL_0,1); - /* R300_RS_CNTL_0(4300) */ - e32(0x00040084); - /* RS_INST_COUNT(4304) */ - e32(0x000000c0); - -reg_start(R300_RS_ROUTE_0,0); - e32(0x00024008); - -reg_start(R300_RS_INTERP_0,7); - /* X_MEM0_0(4310) */ - e32(0x00d10000); - /* X_MEM0_1(4314) */ - e32(0x00d10044); - /* X_MEM0_2(4318) */ - e32(0x00d10084); - /* X_MEM0_3(431c) */ - e32(0x00d100c4); - /* X_MEM0_4(4320) */ - e32(0x00d10004); - /* X_MEM0_5(4324) */ - e32(0x00d10004); - /* X_MEM0_6(4328) */ - e32(0x00d10004); - /* X_MEM0_7(432c) */ - e32(0x00d10004); - - reg_start(R300_RS_CNTL_0,0); - e32(0x00040084); - - /* Magic register - note it is right after 20b0 */ - - reg_start(0x20b4,0); - e32(0x0000000c); - - program_pipeline(PASS_PREFIX &SINGLE_TEXTURE_PIPELINE); - - /* We need LOAD_VBPNTR to setup AOS_ATTR fields.. the offsets are irrelevant */ - setup_AOS(PASS_PREFIX vb_arrays, 3); - - for(i=0; i < VB->PrimitiveCount; i++){ - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - r300_render_tex_primitive(rmesa, ctx, start, start + length, prim); - } - - /* This sequence is required after any 3d drawing packet - I suspect it work arounds a bug (or deficiency) in hardware */ - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0); - e32(0x0000000a); - - reg_start(0x4f18,0); - e32(0x00000003); - -// exit(-1); - fprintf(stderr, "\n"); - return GL_FALSE; + reg_start(0x4f18,0); + e32(0x00000003/*0x1*/); + +#ifdef USER_BUFFERS + r300UseArrays(ctx); +#endif + r300ReleaseArrays(ctx); + return GL_FALSE; } +#define FALLBACK_IF(expr) \ +do { \ + if (expr) { \ + if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ + WARN_ONCE("Software fallback:%s\n", #expr); \ + return R300_FALLBACK_RAST; \ + } \ +} while(0) + +int r300Fallback(GLcontext *ctx) +{ + int i; + + //FALLBACK_IF(ctx->RenderMode != GL_RENDER); // We do not do SELECT or FEEDBACK (yet ?) + +#if 0 /* These should work now.. */ + FALLBACK_IF(ctx->Color.DitherFlag); + FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST + FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND + FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL +#endif + FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT + FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE + //FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST + + //FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG disable as swtcl doesnt seem to support this + //FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH disabling to get blender going + FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE + FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB + + + FALLBACK_IF(ctx->Line.StippleFlag); + + /* HW doesnt appear to directly support these */ + FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH + FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH + /* Rest could be done with vertex fragments */ + if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) + FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) + return R300_FALLBACK_TCL; + + return R300_FALLBACK_NONE; +} /** * Called by the pipeline manager to render a batch of primitives. @@ -675,137 +505,186 @@ reg_start(R300_RS_INTERP_0,7); static GLboolean r300_run_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - - if (RADEON_DEBUG == DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - #if 1 - /* Just switch between pipelines.. We could possibly do better.. (?) */ - if(ctx->Texture.Unit[0].Enabled) - return r300_run_tex_render(ctx, stage); - else - return r300_run_flat_render(ctx, stage); - #else - return GL_TRUE; - #endif - -#if 0 - mgaContextPtr mmesa = MGA_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; - GLuint i; - /* Don't handle clipping or indexed vertices or vertex manipulations. - */ - if (mmesa->RenderIndex != 0 || - !mga_validate_render( ctx, VB )) { - return GL_TRUE; - } + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); - tnl->Driver.Render.Start( ctx ); - mmesa->SetupNewInputs = ~0; + if (r300Fallback(ctx) >= R300_FALLBACK_RAST) + return GL_TRUE; - for (i = 0 ; i < VB->PrimitiveCount ; i++) - { - GLuint prim = VB->Primitive[i].mode; - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; + return r300_run_vb_render(ctx, stage); +} - if (!length) - continue; +const struct tnl_pipeline_stage _r300_render_stage = { + "r300 hw rasterize", + NULL, + NULL, + NULL, + NULL, + r300_run_render /* run */ +}; - mga_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length, - prim); - } +static GLboolean r300_run_tcl_render(GLcontext *ctx, + struct tnl_pipeline_stage *stage) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_vertex_program *vp; + + hw_tcl_on=future_hw_tcl_on; + + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s\n", __FUNCTION__); + if(hw_tcl_on == GL_FALSE) + return GL_TRUE; + + if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + + r300UpdateShaders(rmesa); - tnl->Driver.Render.Finish( ctx ); + vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); +#if 0 /* Draw every second request with software arb vp */ + vp->native++; + vp->native &= 1; + //vp->native = GL_FALSE; +#endif - return GL_FALSE; /* finished the pipe */ +#if 0 /* You dont want to know what this does... */ + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct tnl_cache *cache; + struct tnl_cache_item *c; + + cache = tnl->vp_cache; + c = cache->items[0xc000cc0e % cache->size]; + + if(c && c->data == vp) + vp->native = GL_FALSE; + +#endif +#if 0 + vp->native = GL_FALSE; #endif + if (vp->native == GL_FALSE) { + hw_tcl_on = GL_FALSE; + return GL_TRUE; + } + //r300UpdateShaderStates(rmesa); + + return r300_run_vb_render(ctx, stage); } +const struct tnl_pipeline_stage _r300_tcl_stage = { + "r300 tcl", + NULL, + NULL, + NULL, + NULL, + r300_run_tcl_render /* run */ +}; -/** - * Called by the pipeline manager once before rendering. - * We check the GL state here to - * a) decide whether we can do the current state in hardware and - * b) update hardware registers +/* R300 texture rectangle expects coords in 0..1 range, not 0..dimension + * as in the extension spec. Need to translate here. + * + * Note that swrast expects 0..dimension, so if a fallback is active, + * don't do anything. (Maybe need to configure swrast to match hw) */ -#define FALLBACK_IF(expr) \ -do { \ - if (expr) { \ - if (RADEON_DEBUG & DEBUG_FALLBACKS) \ - fprintf(stderr, "%s: fallback:%s\n", \ - __FUNCTION__, #expr); \ - stage->active = GL_FALSE; \ - return; \ - } \ -} while(0) +struct texrect_stage_data { + GLvector4f texcoord[MAX_TEXTURE_UNITS]; +}; + +#define TEXRECT_STAGE_DATA(stage) ((struct texrect_stage_data *)stage->privatePtr) -static void r300_check_render(GLcontext *ctx, struct tnl_pipeline_stage *stage) + +static GLboolean run_texrect_stage( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - int i; + struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *VB = &tnl->vb; + GLuint i; - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); + if (rmesa->radeon.Fallback) + return GL_TRUE; - /* We only support rendering in hardware for now */ - if (ctx->RenderMode != GL_RENDER) { - stage->active = GL_FALSE; - return; - } + for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_RECT_BIT) { + struct gl_texture_object *texObj = ctx->Texture.Unit[i].CurrentRect; + struct gl_texture_image *texImage = texObj->Image[0][texObj->BaseLevel]; + const GLfloat iw = 1.0/texImage->Width; + const GLfloat ih = 1.0/texImage->Height; + GLfloat *in = (GLfloat *)VB->TexCoordPtr[i]->data; + GLint instride = VB->TexCoordPtr[i]->stride; + GLfloat (*out)[4] = store->texcoord[i].data; + GLint j; + + store->texcoord[i].size = VB->TexCoordPtr[i]->size; + for (j = 0 ; j < VB->Count ; j++) { + switch (VB->TexCoordPtr[i]->size) { + case 4: + out[j][3] = in[3]; + /* fallthrough */ + case 3: + out[j][2] = in[2]; + /* fallthrough */ + default: + out[j][0] = in[0] * iw; + out[j][1] = in[1] * ih; + } + in = (GLfloat *)((GLubyte *)in + instride); + } + + VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i] = &store->texcoord[i]; + } + } - // I failed to figure out how dither works in hardware, - // let's just ignore it for now - //FALLBACK_IF(ctx->Color.DitherFlag); + return GL_TRUE; +} - /* I'm almost certain I forgot something here */ - FALLBACK_IF(ctx->Color.AlphaEnabled); // GL_ALPHA_TEST - FALLBACK_IF(ctx->Color.BlendEnabled); // GL_BLEND - FALLBACK_IF(ctx->Fog.Enabled); // GL_FOG - FALLBACK_IF(ctx->Line.SmoothFlag); // GL_LINE_SMOOTH - FALLBACK_IF(ctx->Line.StippleFlag); // GL_LINE_STIPPLE - FALLBACK_IF(ctx->Point.SmoothFlag); // GL_POINT_SMOOTH - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) - FALLBACK_IF(ctx->Point.PointSprite); // GL_POINT_SPRITE_NV - FALLBACK_IF(ctx->Polygon.OffsetPoint); // GL_POLYGON_OFFSET_POINT - FALLBACK_IF(ctx->Polygon.OffsetLine); // GL_POLYGON_OFFSET_LINE - FALLBACK_IF(ctx->Polygon.OffsetFill); // GL_POLYGON_OFFSET_FILL - FALLBACK_IF(ctx->Polygon.SmoothFlag); // GL_POLYGON_SMOOTH - FALLBACK_IF(ctx->Polygon.StippleFlag); // GL_POLYGON_STIPPLE - FALLBACK_IF(ctx->Stencil.Enabled); // GL_STENCIL_TEST - FALLBACK_IF(ctx->Multisample.Enabled); // GL_MULTISAMPLE_ARB - /* One step at a time - let one texture pass.. */ - for (i = 1; i < ctx->Const.MaxTextureUnits; i++) - FALLBACK_IF(ctx->Texture.Unit[i].Enabled); +/* Called the first time stage->run() is invoked. + */ +static GLboolean alloc_texrect_data( GLcontext *ctx, + struct tnl_pipeline_stage *stage ) +{ + struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; + struct texrect_stage_data *store; + GLuint i; + stage->privatePtr = CALLOC(sizeof(*store)); + store = TEXRECT_STAGE_DATA(stage); + if (!store) + return GL_FALSE; - /* let r300_run_render do its job */ - #if 0 - stage->active = GL_FALSE; - #endif -} + for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) + _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 ); + return GL_TRUE; +} -static void dtr(struct tnl_pipeline_stage *stage) +static void free_texrect_data( struct tnl_pipeline_stage *stage ) { - (void)stage; + struct texrect_stage_data *store = TEXRECT_STAGE_DATA(stage); + GLuint i; + + if (store) { + for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++) + if (store->texcoord[i].data) + _mesa_vector4f_free( &store->texcoord[i] ); + FREE( store ); + stage->privatePtr = NULL; + } } -const struct tnl_pipeline_stage _r300_render_stage = { - "r300 hw rasterize", - _NEW_ALL, /* re-check (always re-check for now) */ - 0, /* re-run (always runs) */ - GL_TRUE, /* active */ - 0, 0, /* inputs (set in check_render), outputs */ - 0, 0, /* changed_inputs, private */ - dtr, /* destructor */ - r300_check_render, /* check */ - r300_run_render /* run */ +const struct tnl_pipeline_stage _r300_texrect_stage = +{ + "r300 texrect stage", /* name */ + NULL, + alloc_texrect_data, + free_texrect_data, + NULL, + run_texrect_stage }; +