From: Dave Airlie Date: Sun, 8 Feb 2009 17:50:38 +0000 (+1000) Subject: radeon: make more r100 work X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3fafaf8959681cc41c988607bb6e387bab4fe1b5;p=mesa.git radeon: make more r100 work --- diff --git a/src/mesa/drivers/dri/radeon/common_context.h b/src/mesa/drivers/dri/radeon/common_context.h index 90abca0bc10..618e74d4583 100644 --- a/src/mesa/drivers/dri/radeon/common_context.h +++ b/src/mesa/drivers/dri/radeon/common_context.h @@ -269,6 +269,7 @@ struct radeon_swtcl_info { struct radeon_ioctl { GLuint vertex_offset; + struct radeon_bo *bo; GLuint vertex_size; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index 6edbaf69107..358095a2b9d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -334,11 +334,6 @@ struct r100_state { struct radeon_texture_state texture; }; -#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ - (rvb)->address - rmesa->dma.buf0_address + \ - (rvb)->start) - - #define RADEON_CMD_BUF_SZ (8*1024) #define R200_ELT_BUF_SZ (8*1024) /* radeon_tcl.c @@ -352,12 +347,14 @@ struct radeon_tcl_info { */ GLvector4f ObjClean; - struct radeon_dma_region *aos_components[8]; + struct radeon_aos aos[8]; GLuint nr_aos_components; GLuint *Elts; - struct radeon_dma_region indexed_verts; + struct radeon_bo *indexed_bo; + +// struct radeon_dma_region indexed_verts; struct radeon_dma_region obj; struct radeon_dma_region rgba; struct radeon_dma_region spec; @@ -365,8 +362,8 @@ struct radeon_tcl_info { struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS]; struct radeon_dma_region norm; - struct radeon_bo *elt_dma_bo; - int elt_dma_offset; /** Offset into this buffer object, in bytes */ + int elt_cmd_offset; /** Offset into the cmdbuf */ + int elt_cmd_start; int elt_used; }; diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c index 1c885c1612b..fbb93a8ba1e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -313,7 +313,7 @@ static int cs_emit(struct radeon_cs *cs) cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects; } - //dump_cmdbuf(cs); + //dump_cmdbuf(cs); r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); if (r) { diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index 4ab297c5cee..1a33595884e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -59,7 +59,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define RADEON_TIMEOUT 512 #define RADEON_IDLE_RETRY 16 -#define DEBUG_CMDBUF 0 +#define DEBUG_CMDBUF 1 static void radeonSaveHwState( r100ContextPtr rmesa ) { @@ -190,10 +190,6 @@ static INLINE void radeonEmitAtoms(r100ContextPtr r100, GLboolean dirty) void radeonEmitState( r100ContextPtr rmesa ) { - struct radeon_state_atom *atom; - char *dest; - uint32_t dwords; - if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) fprintf(stderr, "%s\n", __FUNCTION__); @@ -212,7 +208,6 @@ void radeonEmitState( r100ContextPtr rmesa ) * radeonAllocCmdBuf code here without all the checks. */ rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->hw.max_state_size, __FUNCTION__); - dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; /* We always always emit zbs, this is due to a bug found by keithw in the hardware and rediscovered after Erics changes by me. @@ -250,12 +245,26 @@ extern void radeonEmitVbufPrim( r100ContextPtr rmesa, radeonEmitState( rmesa ); - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__, - rmesa->store.cmd_used/4); + // if (RADEON_DEBUG & DEBUG_IOCTL) + // fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__, + // rmesa->store.cmd_used/4); + + +#if RADEON_OLD_PACKETS + BEGIN_BATCH(6); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3); + OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(vertex_nr); + OUT_BATCH(vertex_format); + OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_LIST | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); + END_BATCH(); - BEGIN_BATCH(3); - OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 0); +#else + BEGIN_BATCH(4); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1); OUT_BATCH(vertex_format); OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_LIST | @@ -264,45 +273,89 @@ extern void radeonEmitVbufPrim( r100ContextPtr rmesa, RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); END_BATCH(); +#endif } +static void radeonFireEB(r100ContextPtr rmesa, int vertex_count, int vertex_format, int type) +{ + BATCH_LOCALS(&rmesa->radeon); + + if (vertex_count > 0) { + BEGIN_BATCH(8); + OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_DRAW_INDX, 0); + OUT_BATCH(vertex_format); + OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_IND | + ((vertex_count + 0) << 16) | + type); + + assert(0); // RADEON HAS NO INDX_BUFFERs +#if 0 + + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2); + OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810); + OUT_BATCH_RELOC(rmesa->tcl.elt_dma_offset, + rmesa->tcl.elt_dma_bo, + rmesa->tcl.elt_dma_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(vertex_count/2); + } else { + OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2); + OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810); + OUT_BATCH(rmesa->tcl.elt_dma_offset); + OUT_BATCH(vertex_count/2); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->tcl.elt_dma_bo, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } +#endif + END_BATCH(); + } +} void radeonFlushElts( GLcontext *ctx ) { r100ContextPtr rmesa = R100_CONTEXT(ctx); - int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start); + BATCH_LOCALS(&rmesa->radeon); int dwords; -#if RADEON_OLD_PACKETS - int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2; -#else - int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2; -#endif - + uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start); + int nr = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw) * 2; + if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); assert( rmesa->radeon.dma.flush == radeonFlushElts ); rmesa->radeon.dma.flush = NULL; + /* Cope with odd number of elts: */ - rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; - dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; + // rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; + // dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; + dwords = nr / 2; + + rmesa->radeon.cmdbuf.cs->cdw += dwords; #if RADEON_OLD_PACKETS - cmd[1] |= (dwords - 3) << 16; + cmd[1] |= (dwords + 3) << 16; cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; #else - cmd[1] |= (dwords - 3) << 16; + cmd[1] |= (dwords) << 16; cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; #endif + fprintf(stderr,"nr is %d cmd1 is %08x\n", nr, cmd[1]); + + rmesa->radeon.cmdbuf.cs->section_cdw += dwords; + END_BATCH(); + if (RADEON_DEBUG & DEBUG_SYNC) { fprintf(stderr, "%s: Syncing\n", __FUNCTION__); radeonFinish( rmesa->radeon.glCtx ); } -} +} GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, GLuint vertex_format, @@ -310,46 +363,65 @@ GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, GLuint min_nr ) { GLushort *retval; - + BATCH_LOCALS(&rmesa->radeon); if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr); + fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); radeonEmitState( rmesa ); - rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, - 0, R200_ELT_BUF_SZ, 4, - RADEON_GEM_DOMAIN_GTT, 0); - rmesa->tcl.elt_dma_offset = 0; + rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw; + +#if RADEON_OLD_PACKETS + BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(min_nr)/4); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0); + OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(0xffff); + OUT_BATCH(vertex_format); + OUT_BATCH(primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_IND | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); + +#else + BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(min_nr)/4); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0); + OUT_BATCH(vertex_format); + OUT_BATCH(primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_IND | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); +#endif + + + rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw; rmesa->tcl.elt_used = min_nr * 2; - radeon_bo_map(rmesa->tcl.elt_dma_bo, 1); - retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset; + retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset); + + fprintf(stderr," %d elt start %d offset %d\n", min_nr, rmesa->tcl.elt_cmd_start, rmesa->tcl.elt_cmd_offset); if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: header vfmt 0x%x prim %x \n", - __FUNCTION__, - vertex_format, primitive); + fprintf(stderr, "%s: header prim %x \n", + __FUNCTION__, primitive); assert(!rmesa->radeon.dma.flush); rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; rmesa->radeon.dma.flush = radeonFlushElts; - // rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf; - return retval; } - - void radeonEmitVertexAOS( r100ContextPtr rmesa, GLuint vertex_size, + struct radeon_bo *bo, GLuint offset ) { #if RADEON_OLD_PACKETS - rmesa->ioctl.vertex_size = vertex_size; rmesa->ioctl.vertex_offset = offset; + rmesa->ioctl.bo = bo; #else BATCH_LOCALS(&rmesa->radeon); @@ -357,68 +429,124 @@ void radeonEmitVertexAOS( r100ContextPtr rmesa, fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", __FUNCTION__, vertex_size, offset); - BEGIN_BATCH(5); + BEGIN_BATCH(7); OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2); OUT_BATCH(1); OUT_BATCH(vertex_size | (vertex_size << 8)); OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); END_BATCH(); -} + #endif } void radeonEmitAOS( r100ContextPtr rmesa, - struct radeon_dma_region **component, GLuint nr, GLuint offset ) { #if RADEON_OLD_PACKETS assert( nr == 1 ); - assert( component[0]->aos_size == component[0]->aos_stride ); - rmesa->ioctl.vertex_size = component[0]->aos_size; + // assert( rmesa->radeon.aos[0]->aos_size == component[0]->aos_stride ); + // rmesa->ioctl.vertex_offset = + // (component[0]->aos_start + offset * component[0]->aos_stride * 4); + rmesa->ioctl.bo = rmesa->tcl.aos[0].bo; rmesa->ioctl.vertex_offset = - (component[0]->aos_start + offset * component[0]->aos_stride * 4); + (rmesa->tcl.aos[0].offset + offset * rmesa->tcl.aos[0].stride * 4); #else - drm_radeon_cmd_header_t *cmd; - int sz = AOS_BUFSZ(nr); + BATCH_LOCALS(&rmesa->radeon); + uint32_t voffset; + // int sz = AOS_BUFSZ(nr); + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - int *tmp; if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz, - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3; - cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16); - cmd[2].i = nr; - tmp = &cmd[0].i; - cmd += 3; - - for (i = 0 ; i < nr ; i++) { - if (i & 1) { - cmd[0].i |= ((component[i]->aos_stride << 24) | - (component[i]->aos_size << 16)); - cmd[2].i = (component[i]->aos_start + - offset * component[i]->aos_stride * 4); - cmd += 3; + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->tcl.aos[i].components << 0) | + (rmesa->tcl.aos[i].stride << 8) | + (rmesa->tcl.aos[i + 1].components << 16) | + (rmesa->tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->tcl.aos[i + 0].offset + + offset * 4 * rmesa->tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->tcl.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->tcl.aos[i + 1].offset + + offset * 4 * rmesa->tcl.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->tcl.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + + if (nr & 1) { + OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) | + (rmesa->tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->tcl.aos[nr - 1].offset + + offset * 4 * rmesa->tcl.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->tcl.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + } else { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->tcl.aos[i].components << 0) | + (rmesa->tcl.aos[i].stride << 8) | + (rmesa->tcl.aos[i + 1].components << 16) | + (rmesa->tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->tcl.aos[i + 0].offset + + offset * 4 * rmesa->tcl.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->tcl.aos[i + 1].offset + + offset * 4 * rmesa->tcl.aos[i + 1].stride; + OUT_BATCH(voffset); + } + + if (nr & 1) { + OUT_BATCH((rmesa->tcl.aos[nr - 1].components << 0) | + (rmesa->tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->tcl.aos[nr - 1].offset + + offset * 4 * rmesa->tcl.aos[nr - 1].stride; + OUT_BATCH(voffset); } - else { - cmd[0].i = ((component[i]->aos_stride << 8) | - (component[i]->aos_size << 0)); - cmd[1].i = (component[i]->aos_start + - offset * component[i]->aos_stride * 4); + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->tcl.aos[i + 0].offset + + offset * 4 * rmesa->tcl.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->tcl.aos[i+0].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->tcl.aos[i + 1].offset + + offset * 4 * rmesa->tcl.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->tcl.aos[i+1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->tcl.aos[nr - 1].offset + + offset * 4 * rmesa->tcl.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->tcl.aos[nr-1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } } + END_BATCH(); - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "%s:\n", __FUNCTION__); - for (i = 0 ; i < sz ; i++) - fprintf(stderr, " %d: %x\n", i, tmp[i]); - } #endif } diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h index 87d64fb728f..b7f07294946 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extern void radeonEmitState( r100ContextPtr rmesa ); extern void radeonEmitVertexAOS( r100ContextPtr rmesa, GLuint vertex_size, + struct radeon_bo *bo, GLuint offset ); extern void radeonEmitVbufPrim( r100ContextPtr rmesa, @@ -58,8 +59,8 @@ extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, GLuint primitive, GLuint min_nr ); + extern void radeonEmitAOS( r100ContextPtr rmesa, - struct radeon_dma_region **regions, GLuint n, GLuint offset ); diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c index fa3d0335fc2..7f5da16b03d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c +++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c @@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "math/m_translate.h" #include "tnl/tnl.h" -#include "tnl/tcontext.h" #include "radeon_context.h" #include "radeon_ioctl.h" @@ -49,23 +48,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_maos.h" #include "radeon_tcl.h" -static void emit_vecfog( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) +static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, + GLvoid *data, int stride, int count) { int i; - GLfloat *out; - + uint32_t *out; + int size = 1; radeonContextPtr rmesa = RADEON_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d\n", __FUNCTION__, count, stride); - assert (!rvb->buf); - if (stride == 0) { radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 ); count = 1; @@ -125,15 +119,12 @@ static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count) -static void emit_tex_vector( GLcontext *ctx, - struct radeon_aos *aos, - GLvoid *data, - int size, - int stride, - int count ) +static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos, + GLvoid *data, int size, int stride, int count) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); int emitsize; + uint32_t *out; if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); @@ -160,6 +151,7 @@ static void emit_tex_vector( GLcontext *ctx, /* Emit the data */ + out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); switch (size) { case 1: emit_s0_vec( out, data, stride, count ); @@ -188,9 +180,8 @@ static void emit_tex_vector( GLcontext *ctx, */ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb; - struct radeon_dma_region **component = rmesa->tcl.aos_components; GLuint nr = 0; GLuint vfmt = 0; GLuint count = VB->Count; @@ -204,7 +195,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (1) { if (!rmesa->tcl.obj.buf) rcommon_emit_vector( ctx, - &rmesa->tcl.obj, + &(rmesa->tcl.aos[nr]), (char *)VB->ObjPtr->data, VB->ObjPtr->size, VB->ObjPtr->stride, @@ -217,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) default: break; } - component[nr++] = &rmesa->tcl.obj; + nr++; } if (inputs & VERT_BIT_NORMAL) { if (!rmesa->tcl.norm.buf) rcommon_emit_vector( ctx, - &(rmesa->tcl.norm), + &(rmesa->tcl.aos[nr]), (char *)VB->NormalPtr->data, 3, VB->NormalPtr->stride, count); vfmt |= RADEON_CP_VC_FRMT_N0; - component[nr++] = &rmesa->tcl.norm; + nr++; } if (inputs & VERT_BIT_COLOR0) { @@ -250,14 +241,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (!rmesa->tcl.rgba.buf) rcommon_emit_vector( ctx, - &(rmesa->tcl.rgba), + &(rmesa->tcl.aos[nr]), (char *)VB->ColorPtr[0]->data, emitsize, VB->ColorPtr[0]->stride, count); - - component[nr++] = &rmesa->tcl.rgba; + nr++; } @@ -265,7 +255,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (!rmesa->tcl.spec.buf) { rcommon_emit_vector( ctx, - &rmesa->tcl.spec, + &(rmesa->tcl.aos[nr]), (char *)VB->SecondaryColorPtr[0]->data, 3, VB->SecondaryColorPtr[0]->stride, @@ -273,7 +263,7 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) } vfmt |= RADEON_CP_VC_FRMT_FPSPEC; - component[nr++] = &rmesa->tcl.spec; + nr++; } /* FIXME: not sure if this is correct. May need to stitch this together with @@ -282,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (inputs & VERT_BIT_FOG) { if (!rmesa->tcl.fog.buf) emit_vecfog( ctx, - &(rmesa->tcl.fog), + &(rmesa->tcl.aos[nr]), (char *)VB->FogCoordPtr->data, VB->FogCoordPtr->stride, count); vfmt |= RADEON_CP_VC_FRMT_FPFOG; - component[nr++] = &rmesa->tcl.fog; + nr++; } @@ -299,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (inputs & VERT_BIT_TEX(unit)) { if (!rmesa->tcl.tex[unit].buf) emit_tex_vector( ctx, - &(rmesa->tcl.tex[unit]), + &(rmesa->tcl.aos[nr]), (char *)VB->TexCoordPtr[unit]->data, VB->TexCoordPtr[unit]->size, VB->TexCoordPtr[unit]->stride, count ); + nr++; vfmt |= RADEON_ST_BIT(unit); /* assume we need the 3rd coord if texgen is active for r/q OR at least @@ -321,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1))) radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ; } - component[nr++] = &rmesa->tcl.tex[unit]; } } @@ -337,31 +327,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); - GLuint unit; - -#if 0 - if (RADEON_DEBUG & DEBUG_VERTS) - _tnl_print_vert_flags( __FUNCTION__, newinputs ); -#endif - - if (newinputs & VERT_BIT_POS) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ ); - - if (newinputs & VERT_BIT_NORMAL) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ ); - - if (newinputs & VERT_BIT_COLOR0) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ ); - - if (newinputs & VERT_BIT_COLOR1) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ ); - - if (newinputs & VERT_BIT_FOG) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); + int i; - for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { - if (newinputs & VERT_BIT_TEX(unit)) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ ); + for (i = 0; i < rmesa->tcl.nr_aos_components; i++) { + if (rmesa->tcl.aos[i].bo) { + radeon_bo_unref(rmesa->tcl.aos[i].bo); + rmesa->tcl.aos[i].bo = NULL; + } } } diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c index 5a0bbaacc9b..bb90a7b1117 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c +++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c @@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) break; if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format && - rmesa->tcl.indexed_verts.buf) + rmesa->tcl.aos[0].bo) return; - if (rmesa->tcl.indexed_verts.buf) + if (rmesa->tcl.aos[0].bo) radeonReleaseArrays( ctx, ~0 ); radeonAllocDmaRegion( &rmesa->radeon, - 0, &rmesa->tcl.indexed_verts, + &rmesa->tcl.aos[0].bo, + &rmesa->tcl.aos[0].offset, VB->Count * setup_tab[i].vertex_size * 4, 4); @@ -421,15 +422,11 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) setup_tab[i].emit( ctx, 0, VB->Count, - rmesa->tcl.indexed_verts.address + - rmesa->tcl.indexed_verts.start ); + rmesa->tcl.aos[0].bo->ptr + rmesa->tcl.aos[0].offset); + // rmesa->tcl.aos[0].size = setup_tab[i].vertex_size; + rmesa->tcl.aos[0].stride = setup_tab[i].vertex_size; rmesa->tcl.vertex_format = setup_tab[i].vertex_format; - // rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts ); - rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size; - rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size; - - rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts; rmesa->tcl.nr_aos_components = 1; } @@ -438,12 +435,10 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) { r100ContextPtr rmesa = R100_CONTEXT( ctx ); + int i; -#if 0 - if (RADEON_DEBUG & DEBUG_VERTS) - _tnl_print_vert_flags( __FUNCTION__, newinputs ); -#endif - - /// if (newinputs) - /// radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ ); + for (i = 0; i < rmesa->tcl.nr_aos_components; i++) { + radeon_bo_unref(rmesa->tcl.aos[i].bo); + rmesa->tcl.aos[i].bo = NULL; + } } diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 02ea616a59e..0a9e8141911 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -38,14 +38,128 @@ #include "swrast_setup/swrast_setup.h" #include "radeon_context.h" +#include "common_cmdbuf.h" +#include "radeon_cs.h" +#include "radeon_mipmap_tree.h" #include "radeon_ioctl.h" #include "radeon_state.h" #include "radeon_tcl.h" #include "radeon_tex.h" #include "radeon_swtcl.h" +#include "../r200/r200_reg.h" + #include "xmlpool.h" +/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in + * 1.3 cmdbuffers allow all previous state to be updated as well as + * the tcl scalar and vector areas. + */ +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + {RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, + {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, + {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, + {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, + {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, + {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, + {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, + {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, + {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, + {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, + {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, + {RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, + {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, + {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, + {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, + {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, + {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, + {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, + {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, + {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, + {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17, + "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, + {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, + {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, + {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, + {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, + {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, + {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, + {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, + {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, + {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"}, + {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, + {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, + {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, + {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, + {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, + {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"}, + {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, + {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, + {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, + {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, + {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, + {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, + {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, + {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, + {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, + {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, + {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, + {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, + {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, + {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, + "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"}, + {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, + {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, + {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, + {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, + {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, + {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, + {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, + {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, + {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, + {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, + {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, + "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, + {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */ + {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */ + {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, + {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, + {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, + {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, + {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, + {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, + {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, + {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, + {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, + {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, + {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, + {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, + {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, + {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, + {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"}, + {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, + {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, + {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, + {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, + {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, + {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, + {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"}, + {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */ + {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"}, + {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"}, + {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"}, + {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"}, + {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"}, + {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, + {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, + {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, + {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, +}; + /* ============================================================= * State initialization */ @@ -65,12 +179,17 @@ void radeonPrintDirty( r100ContextPtr rmesa, const char *msg ) fprintf(stderr, "\n"); } -static int cmdpkt( int id ) +static int cmdpkt( r100ContextPtr rmesa, int id ) { drm_radeon_cmd_header_t h; - h.i = 0; - h.packet.cmd_type = RADEON_CMD_PACKET; - h.packet.packet_id = id; + + if (rmesa->radeon.radeonScreen->kernel_mm) { + return CP_PACKET0(packet[id].start, packet[id].len - 1); + } else { + h.i = 0; + h.packet.cmd_type = RADEON_CMD_PACKET; + h.packet.packet_id = id; + } return h.i; } @@ -146,7 +265,200 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)) CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)) +#define OUT_VEC(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + h.i = hdr; \ + OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ + OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \ + OUT_BATCH_TABLE((data), h.vectors.count); \ + } while(0) + +#define OUT_SCL(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + h.i = hdr; \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \ + OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ + OUT_BATCH_TABLE((data), h.scalars.count); \ + } while(0) + +static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_SCL(atom->cmd[0], atom->cmd+1); + END_BATCH(); +} + + +static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_VEC(atom->cmd[0], atom->cmd+1); + END_BATCH(); +} + +static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + struct radeon_renderbuffer *rrb; + uint32_t cbpitch; + uint32_t zbpitch; + uint32_t dwords = atom->cmd_size; + GLframebuffer *fb = r100->radeon.dri.drawable->driverPrivate; + + /* output the first 7 bytes of context */ + BEGIN_BATCH_NO_AUTOSTATE(dwords + 4); + OUT_BATCH_TABLE(atom->cmd, 5); + + rrb = r100->radeon.state.depth.rrb; + if (!rrb) { + OUT_BATCH(0); + OUT_BATCH(0); + } else { + zbpitch = (rrb->pitch / rrb->cpp); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH(zbpitch); + } + + OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); + OUT_BATCH(atom->cmd[CTX_CMD_1]); + OUT_BATCH(atom->cmd[CTX_PP_CNTL]); + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + + rrb = r100->radeon.state.color.rrb; + if (r100->radeon.radeonScreen->driScreen->dri2.enabled) { + rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + } + if (!rrb || !rrb->bo) { + OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]); + } else { + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + } + + OUT_BATCH(atom->cmd[CTX_CMD_2]); + + if (!rrb || !rrb->bo) { + OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]); + } else { + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->cpp == 4) + ; + else + ; + if (r100->radeon.sarea->tiling_enabled) + cbpitch |= R200_COLOR_TILE_ENABLE; + OUT_BATCH(cbpitch); + } + + END_BATCH(); +} +static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + struct radeon_renderbuffer *rrb, *drb; + uint32_t cbpitch = 0; + uint32_t zbpitch = 0; + uint32_t dwords = atom->cmd_size; + GLframebuffer *fb = r100->radeon.dri.drawable->driverPrivate; + + rrb = r100->radeon.state.color.rrb; + if (r100->radeon.radeonScreen->driScreen->dri2.enabled) { + rrb = (struct radeon_renderbuffer *)fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + } + if (rrb) { + assert(rrb->bo != NULL); + cbpitch = (rrb->pitch / rrb->cpp); + if (r100->radeon.sarea->tiling_enabled) + cbpitch |= R200_COLOR_TILE_ENABLE; + } + + drb = r100->radeon.state.depth.rrb; + if (drb) + zbpitch = (drb->pitch / drb->cpp); + + /* output the first 7 bytes of context */ + BEGIN_BATCH_NO_AUTOSTATE(dwords); + + /* In the CS case we need to split this up */ + OUT_BATCH(CP_PACKET0(packet[0].start, 3)); + OUT_BATCH_TABLE((atom->cmd + 1), 4); + if (drb) { + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0)); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0)); + OUT_BATCH(zbpitch); + } + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0)); + OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); + OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1)); + OUT_BATCH(atom->cmd[CTX_PP_CNTL]); + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + + + if (rrb) { + OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + } + + if (rrb) { + if (rrb->cpp == 4) + ; + else + ; + OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); + OUT_BATCH(cbpitch); + } + + // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { + // OUT_BATCH_TABLE((atom->cmd + 14), 4); + // } + + END_BATCH(); +} + + + +static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + int i = atom->idx; + radeonTexObj *t = r100->state.texture.unit[i].texobj; + + if (!t) + return; + + BEGIN_BATCH_NO_AUTOSTATE(dwords + 2); + OUT_BATCH_TABLE(atom->cmd, 3); + if (t && !t->image_override) { + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_VRAM, 0, 0); + } else if (!t) { + + + OUT_BATCH(atom->cmd[10]); + } + + OUT_BATCH_TABLE((atom->cmd+4), 5); + END_BATCH(); +} /* Initialize the context's hardware state. */ @@ -221,6 +533,10 @@ void radeonInitState( r100ContextPtr rmesa ) /* Allocate state buffers: */ ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 ); + if (rmesa->radeon.radeonScreen->kernel_mm) + rmesa->hw.ctx.emit = ctx_emit_cs; + else + rmesa->hw.ctx.emit = ctx_emit; ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 ); @@ -236,6 +552,9 @@ void radeonInitState( r100ContextPtr rmesa ) ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 ); ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 ); ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 ); + + for (i = 0; i < 3; i++) + rmesa->hw.tex[i].emit = tex_emit; if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100) { ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 ); @@ -276,35 +595,35 @@ void radeonInitState( r100ContextPtr rmesa ) /* Fill in the packet headers: */ - rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC); - rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL); - rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH); - rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN); - rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH); - rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK); - rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE); - rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL); - rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS); - rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC); - rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0); - rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0); - rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1); - rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1); - rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2); - rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2); - rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0); - rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0); - rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1); - rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1); - rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2); - rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2); - rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR); - rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT); + rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC); + rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL); + rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH); + rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN); + rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH); + rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK); + rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE); + rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL); + rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS); + rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC); + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2); + rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0); + rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0); + rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1); + rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1); + rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2); + rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2); + rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR); + rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT); rmesa->hw.mtl.cmd[MTL_CMD_0] = - cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED); - rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0); - rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1); - rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2); + cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED); + rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0); + rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1); + rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2); rmesa->hw.grd.cmd[GRD_CMD_0] = cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 ); rmesa->hw.fog.cmd[FOG_CMD_0] = diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 4b49c141a0f..615621dd987 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -46,6 +46,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_pipeline.h" #include "radeon_context.h" +#include "common_cmdbuf.h" #include "radeon_ioctl.h" #include "radeon_state.h" #include "radeon_swtcl.h" @@ -291,7 +292,7 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) radeonEmitState(rmesa); radeonEmitVertexAOS( rmesa, rmesa->radeon.swtcl.vertex_size, - // rmesa->radeon.dma.current, + rmesa->radeon.dma.current, current_offset); diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index a54e471065c..4f3acc7dafb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_pipeline.h" #include "radeon_context.h" +#include "common_cmdbuf.h" #include "radeon_state.h" #include "radeon_ioctl.h" #include "radeon_tex.h" @@ -145,19 +146,31 @@ static GLboolean discrete_prim[0x10] = { static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) { - if (rmesa->radeon.dma.flush) - rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); +#if 0 + if (rmesa->radeon.dma.flush == radeonFlushElts && + rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) { - rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + ELTS_BUFSZ(nr)); + GLushort *dest = (GLushort *)(rmesa->tcl.elt_dma_bo->ptr + + rmesa->tcl.elt_used); - radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, 0 ); + rmesa->tcl.elt_used += nr*2; + + return dest; + } + else { +#endif + if (rmesa->radeon.dma.flush) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); + + rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->hw.max_state_size + ELTS_BUFSZ(nr) + + AOS_BUFSZ(rmesa->tcl.nr_aos_components), __FUNCTION__); + + radeonEmitAOS( rmesa, + rmesa->tcl.nr_aos_components, 0 ); - return radeonAllocEltsOpenEnded( rmesa, - rmesa->tcl.vertex_format, - rmesa->tcl.hw_primitive, nr ); + return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format, + rmesa->tcl.hw_primitive, nr ); + // } } #define CLOSE_ELTS() RADEON_NEWPRIM( rmesa ) @@ -179,17 +192,16 @@ static void radeonEmitPrim( GLcontext *ctx, rcommonEnsureCmdBufSpace( &rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + VBUF_BUFSZ ); + rmesa->hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, rmesa->tcl.nr_aos_components, start ); /* Why couldn't this packet have taken an offset param? */ radeonEmitVbufPrim( rmesa, - 0, + rmesa->tcl.vertex_format, rmesa->tcl.hw_primitive, count - start ); } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 9ef08fbe68a..8784dcc5edd 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -315,7 +315,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat *params ) { - radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData; + radeonTexObj* t = radeon_tex_obj(texObj); if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 3c28d70e376..e76b52437ff 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -903,7 +903,7 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, RADEON_TXFORMAT_CUBIC_MAP_ENABLE | \ RADEON_TXFORMAT_NON_POWER2) -#if 0 + static void import_tex_obj_state( r100ContextPtr rmesa, int unit, radeonTexObjPtr texobj ) @@ -921,7 +921,7 @@ static void import_tex_obj_state( r100ContextPtr rmesa, cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset; cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; - if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) { GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */ txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */ @@ -931,10 +931,11 @@ static void import_tex_obj_state( r100ContextPtr rmesa, else { se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit); - if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { - int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; - GLuint bytesPerFace = texobj->base.totalSize / 6; - ASSERT(texobj->base.totalSize % 6 == 0); + if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) { + uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; + // GLuint bytesPerFace = texobj->base.totalSize / 6; + // ASSERT(texobj->base.totalSize % 6 == 0); + GLuint bytesPerFace = 1; // TODO RADEON_STATECHANGE( rmesa, cube[unit] ); cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; @@ -957,8 +958,6 @@ static void import_tex_obj_state( r100ContextPtr rmesa, texobj->dirty_state &= ~(1<dirty_state & (1<validated = GL_TRUE; + return GL_TRUE; +} + static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit ) { + r100ContextPtr rmesa = R100_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + if (!ctx->Texture.Unit[unit]._ReallyEnabled) + return GL_TRUE; + + if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) { + _mesa_warning(ctx, + "failed to validate texture for unit %d.\n", + unit); + rmesa->state.texture.unit[unit].texobj = NULL; + return GL_FALSE; + } + rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); + return GL_TRUE; } void radeonUpdateTextureState( GLcontext *ctx )