X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fr300%2Fr300_render.c;h=a8d42a2391eb22aeb9f2219ad79f9a723280bc3b;hb=93a9d2f18de8517af92eba787a4eee34765481ab;hp=a118dab7401ef65e45a6d74947eb35dd3408d2f0;hpb=b47018937896c67aabab69a30c485a2620abcf19;p=mesa.git diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index a118dab7401..a8d42a2391e 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -45,17 +45,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * obviously this does work... Further investigation is needed. * * \author Nicolai Haehnle + * + * \todo Add immediate implementation back? Perhaps this is useful if there are + * no bugs... */ -#include "glheader.h" -#include "state.h" -#include "imports.h" -#include "enums.h" -#include "macros.h" -#include "context.h" -#include "dd.h" -#include "simple_list.h" -#include "api_arrayelt.h" +#include "main/glheader.h" +#include "main/state.h" +#include "main/imports.h" +#include "main/enums.h" +#include "main/macros.h" +#include "main/context.h" +#include "main/dd.h" +#include "main/simple_list.h" +#include "main/api_arrayelt.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" @@ -70,59 +73,54 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_reg.h" #include "r300_tex.h" -#include "r300_maos.h" #include "r300_emit.h" +#include "r300_fragprog.h" extern int future_hw_tcl_on; /** * \brief Convert a OpenGL primitive type into a R300 primitive type. */ -static int r300PrimitiveType(r300ContextPtr rmesa, GLcontext * ctx, int prim) +int r300PrimitiveType(r300ContextPtr rmesa, int prim) { - int type = -1; - switch (prim & PRIM_MODE_MASK) { case GL_POINTS: - type = R300_VAP_VF_CNTL__PRIM_POINTS; + return R300_VAP_VF_CNTL__PRIM_POINTS; break; case GL_LINES: - type = R300_VAP_VF_CNTL__PRIM_LINES; + return R300_VAP_VF_CNTL__PRIM_LINES; break; case GL_LINE_STRIP: - type = R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + return R300_VAP_VF_CNTL__PRIM_LINE_STRIP; break; case GL_LINE_LOOP: - type = R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + return R300_VAP_VF_CNTL__PRIM_LINE_LOOP; break; case GL_TRIANGLES: - type = R300_VAP_VF_CNTL__PRIM_TRIANGLES; + return R300_VAP_VF_CNTL__PRIM_TRIANGLES; break; case GL_TRIANGLE_STRIP: - type = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; break; case GL_TRIANGLE_FAN: - type = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; break; case GL_QUADS: - type = R300_VAP_VF_CNTL__PRIM_QUADS; + return R300_VAP_VF_CNTL__PRIM_QUADS; break; case GL_QUAD_STRIP: - type = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; break; case GL_POLYGON: - type = R300_VAP_VF_CNTL__PRIM_POLYGON; + return R300_VAP_VF_CNTL__PRIM_POLYGON; break; default: - fprintf(stderr, - "%s:%s Do not know how to handle primitive 0x%04x - help me !\n", - __FILE__, __FUNCTION__, prim & PRIM_MODE_MASK); + assert(0); return -1; break; } - return type; } -static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) +int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) { int verts_off = 0; @@ -166,209 +164,253 @@ static int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) verts_off = num_verts; break; default: - fprintf(stderr, - "%s:%s Do not know how to handle primitive 0x%04x - help me !\n", - __FILE__, __FUNCTION__, prim & PRIM_MODE_MASK); + assert(0); return -1; break; } - if (num_verts - verts_off == 0) { - WARN_ONCE - ("user error: Need more than %d vertices to draw primitive 0x%04x !\n", - num_verts, prim & PRIM_MODE_MASK); - return -1; - } - - if (verts_off > 0) { - WARN_ONCE - ("user error: %d is not a valid number of vertices for primitive 0x%04x !\n", - num_verts, prim & PRIM_MODE_MASK); - return -1; - } - return num_verts - verts_off; } -static void inline r300FireEB(r300ContextPtr rmesa, unsigned long addr, - int vertex_count, int type, int elt_size) +static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) { - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; - unsigned long t_addr; - unsigned long magic_1, magic_2; - - assert(elt_size == 2 || elt_size == 4); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + void *out; + + rmesa->state.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, + 0, n_elts * 4, 4, + RADEON_GEM_DOMAIN_GTT, 0); + rmesa->state.elt_dma_offset = 0; + radeon_bo_map(rmesa->state.elt_dma_bo, 1); + out = rmesa->state.elt_dma_bo->ptr + rmesa->state.elt_dma_offset; + memcpy(out, elts, n_elts * 4); + radeon_bo_unmap(rmesa->state.elt_dma_bo); +} - if (addr & (elt_size - 1)) { - WARN_ONCE("Badly aligned buffer\n"); - return; - } +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(rmesa); + + if (vertex_count > 0) { + BEGIN_BATCH(8); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | + type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + + if (!rmesa->radeon.radeonScreen->driScreen->dri2.enabled) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + OUT_BATCH_RELOC(rmesa->state.elt_dma_offset, + rmesa->state.elt_dma_bo, + rmesa->state.elt_dma_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(vertex_count); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); + OUT_BATCH(rmesa->state.elt_dma_offset); + OUT_BATCH(vertex_count); + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.elt_dma_bo, + 0, + rmesa->state.elt_dma_bo->size, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_BATCH(); + } +} - magic_1 = (addr % 32) / 4; - t_addr = addr & ~0x1d; - magic_2 = (vertex_count + 1 + (t_addr & 0x2)) / 2 + magic_1; +static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) +{ + BATCH_LOCALS(rmesa); + uint32_t voffset; + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; + int i; - start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0); - if (elt_size == 4) { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - (vertex_count << 16) | type | - R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - } else { - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | - (vertex_count << 16) | type); + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + + BEGIN_BATCH(sz+2); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + + if (!rmesa->radeon.radeonScreen->driScreen->dri2.enabled) { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->state.aos[i].components << 0) | + (rmesa->state.aos[i].stride << 8) | + (rmesa->state.aos[i + 1].components << 16) | + (rmesa->state.aos[i + 1].stride << 24)); + + voffset = rmesa->state.aos[i + 0].offset + + offset * 4 * rmesa->state.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->state.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->state.aos[i + 1].offset + + offset * 4 * rmesa->state.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->state.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2); -#ifdef OPTIMIZE_ELTS - if (elt_size == 4) { - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr); - } else { - e32(R300_EB_UNK1 | (magic_1 << 16) | R300_EB_UNK2); - e32(t_addr); + if (nr & 1) { + OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | + (rmesa->state.aos[nr - 1].stride << 8)); + voffset = rmesa->state.aos[nr - 1].offset + + offset * 4 * rmesa->state.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->state.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + } else { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->state.aos[i].components << 0) | + (rmesa->state.aos[i].stride << 8) | + (rmesa->state.aos[i + 1].components << 16) | + (rmesa->state.aos[i + 1].stride << 24)); + + voffset = rmesa->state.aos[i + 0].offset + + offset * 4 * rmesa->state.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->state.aos[i + 1].offset + + offset * 4 * rmesa->state.aos[i + 1].stride; + OUT_BATCH(voffset); } -#else - e32(R300_EB_UNK1 | (0 << 16) | R300_EB_UNK2); - e32(addr); -#endif - if (elt_size == 4) { - e32(vertex_count); - } else { -#ifdef OPTIMIZE_ELTS - e32(magic_2); -#else - e32((vertex_count + 1) / 2); -#endif + if (nr & 1) { + OUT_BATCH((rmesa->state.aos[nr - 1].components << 0) | + (rmesa->state.aos[nr - 1].stride << 8)); + voffset = rmesa->state.aos[nr - 1].offset + + offset * 4 * rmesa->state.aos[nr - 1].stride; + OUT_BATCH(voffset); + } + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->state.aos[i + 0].offset + + offset * 4 * rmesa->state.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.aos[i+0].bo, + voffset, + rmesa->state.aos[i+0].bo->size, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->state.aos[i + 1].offset + + offset * 4 * rmesa->state.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.aos[i+1].bo, + voffset, + rmesa->state.aos[i+1].bo->size, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->state.aos[nr - 1].offset + + offset * 4 * rmesa->state.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->cmdbuf.cs, + rmesa->state.aos[nr-1].bo, + voffset, + rmesa->state.aos[nr-1].bo->size, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } + } + END_BATCH(); +} + +static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(rmesa); + + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + END_BATCH(); } static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, int start, int end, int prim) { + BATCH_LOCALS(rmesa); int type, num_verts; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; - type = r300PrimitiveType(rmesa, ctx, prim); + type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); if (type < 0 || num_verts <= 0) return; - if (rmesa->state.VB.Elts) { - r300EmitAOS(rmesa, rmesa->state.aos_count, start); + /* Make space for at least 64 dwords. + * This is supposed to ensure that we can get all rendering + * commands into a single command buffer. + */ + r300EnsureCmdBufSpace(rmesa, 64, __FUNCTION__); + + if (vb->Elts) { if (num_verts > 65535) { /* not implemented yet */ WARN_ONCE("Too many elts\n"); return; } - r300EmitElts(ctx, rmesa->state.VB.Elts, num_verts, - rmesa->state.VB.elt_size); - r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, - num_verts, type, rmesa->state.VB.elt_size); - } else { + /* Note: The following is incorrect, but it's the best I can do + * without a major refactoring of how DMA memory is handled. + * The problem: Ensuring that both vertex arrays *and* index + * arrays are at the right position, and then ensuring that + * the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted + * at once. + * + * So why is the following incorrect? Well, it seems like + * allocating the index array might actually evict the vertex + * arrays. *sigh* + */ + r300EmitElts(ctx, vb->Elts, num_verts); r300EmitAOS(rmesa, rmesa->state.aos_count, start); - fire_AOS(rmesa, num_verts, type); - } -} - -#define CONV_VB(a, b) rvb->AttribPtr[(a)].size = vb->b->size, \ - rvb->AttribPtr[(a)].type = GL_FLOAT, \ - rvb->AttribPtr[(a)].stride = vb->b->stride, \ - rvb->AttribPtr[(a)].data = vb->b->data - -static void radeon_vb_to_rvb(r300ContextPtr rmesa, - struct radeon_vertex_buffer *rvb, - struct vertex_buffer *vb) -{ - int i; - GLcontext *ctx; - ctx = rmesa->radeon.glCtx; - - memset(rvb, 0, sizeof(*rvb)); - - rvb->Elts = vb->Elts; - rvb->elt_size = 4; - rvb->elt_min = 0; - rvb->elt_max = vb->Count; - - rvb->Count = vb->Count; - - if (hw_tcl_on) { - CONV_VB(VERT_ATTRIB_POS, ObjPtr); + r300FireEB(rmesa, num_verts, type); } else { - assert(vb->ClipPtr); - CONV_VB(VERT_ATTRIB_POS, ClipPtr); + r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300FireAOS(rmesa, num_verts, type); } - - CONV_VB(VERT_ATTRIB_NORMAL, NormalPtr); - CONV_VB(VERT_ATTRIB_COLOR0, ColorPtr[0]); - CONV_VB(VERT_ATTRIB_COLOR1, SecondaryColorPtr[0]); - CONV_VB(VERT_ATTRIB_FOG, FogCoordPtr); - - for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) - CONV_VB(VERT_ATTRIB_TEX0 + i, TexCoordPtr[i]); - - for (i = 0; i < MAX_VERTEX_PROGRAM_ATTRIBS; i++) - CONV_VB(VERT_ATTRIB_GENERIC0 + i, - AttribPtr[VERT_ATTRIB_GENERIC0 + i]); - - rvb->Primitive = vb->Primitive; - rvb->PrimitiveCount = vb->PrimitiveCount; - rvb->LockFirst = rvb->LockCount = 0; - rvb->lock_uptodate = GL_FALSE; + COMMIT_BATCH(); } static GLboolean r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct radeon_vertex_buffer *VB = &rmesa->state.VB; int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if (stage) { - TNLcontext *tnl = TNL_CONTEXT(ctx); - radeon_vb_to_rvb(rmesa, VB, &tnl->vb); - } - r300UpdateShaders(rmesa); if (r300EmitArrays(ctx)) return GL_TRUE; r300UpdateShaderStates(rmesa); - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); - + r300EmitCacheFlush(rmesa); r300EmitState(rmesa); - for (i = 0; i < VB->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&VB->Primitive[i]); - GLuint start = VB->Primitive[i].start; - GLuint length = VB->Primitive[i].count; - GLuint end = VB->Primitive[i].start + VB->Primitive[i].count; + for (i = 0; i < vb->PrimitiveCount; i++) { + GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); + GLuint start = vb->Primitive[i].start; + GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; r300RunRenderPrimitive(rmesa, ctx, start, end, prim); } - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_UNKNOWN_0A); - - reg_start(R300_RB3D_ZCACHE_CTLSTAT, 0); - e32(R300_RB3D_ZCACHE_UNKNOWN_03); - -#ifdef USER_BUFFERS - r300UseArrays(ctx); -#endif + r300EmitCacheFlush(rmesa); r300ReleaseArrays(ctx); @@ -388,13 +430,26 @@ static GLboolean r300RunRender(GLcontext * ctx, static int r300Fallback(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - struct r300_fragment_program *fp = (struct r300_fragment_program *) + /* Do we need to use new-style shaders? + * Also is there a better way to do this? */ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; - - if (fp) { - if (!fp->translated) - r300TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + if (fp) { + if (!fp->translated) { + r500TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } + } else { + struct r300_fragment_program *fp = (struct r300_fragment_program *) + (char *)ctx->FragmentProgram._Current; + if (fp) { + if (!fp->translated) { + r300TranslateFragmentShader(r300, fp); + FALLBACK_IF(!fp->translated); + } + } } FALLBACK_IF(ctx->RenderMode != GL_RENDER); @@ -406,16 +461,12 @@ static int r300Fallback(GLcontext * ctx) || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[1])); - FALLBACK_IF(ctx->Color.ColorLogicOpEnabled); - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) FALLBACK_IF(ctx->Point.PointSprite); if (!r300->disable_lowimpact_fallback) { - FALLBACK_IF(ctx->Polygon.OffsetPoint); - FALLBACK_IF(ctx->Polygon.OffsetLine); FALLBACK_IF(ctx->Polygon.StippleFlag); - FALLBACK_IF(ctx->Multisample.Enabled); + FALLBACK_IF(ctx->Multisample._Enabled); FALLBACK_IF(ctx->Line.StippleFlag); FALLBACK_IF(ctx->Line.SmoothFlag); FALLBACK_IF(ctx->Point.SmoothFlag); @@ -427,12 +478,17 @@ static int r300Fallback(GLcontext * ctx) static GLboolean r300RunNonTCLRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); if (r300Fallback(ctx) >= R300_FALLBACK_RAST) return GL_TRUE; + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + return GL_TRUE; + return r300RunRender(ctx, stage); }