#include "drivers/common/driverfuncs.h"
#include "radeon_context.h"
+#include "common_cmdbuf.h"
#include "radeon_ioctl.h"
#include "radeon_state.h"
#include "radeon_span.h"
radeon->vtbl.update_draw_buffer = radeonUpdateDrawBuffer;
radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
radeon->vtbl.emit_state = r100_vtbl_emit_state;
+ radeon->vtbl.swtcl_flush = r100_swtcl_flush;
}
/* Create the device specific context.
rmesa->radeon.texture_depth = ( screen->cpp == 4 ) ?
DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
- rmesa->swtcl.RenderIndex = ~0;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
rmesa->hw.all_dirty = GL_TRUE;
/* Set the maximum texture size small enough that we can guarentee that
radeonReleaseArrays( rmesa->radeon.glCtx, ~0 );
if (rmesa->radeon.dma.current) {
radeonReleaseDmaRegion( &rmesa->radeon );
- radeonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
+ rcommonFlushCmdBuf( &rmesa->radeon, __FUNCTION__ );
}
_mesa_vector4f_free( &rmesa->tcl.ObjClean );
#define RADEON_CMD_BUF_SZ (8*1024)
-
+#define R200_ELT_BUF_SZ (8*1024)
/* radeon_tcl.c
*/
struct radeon_tcl_info {
struct radeon_dma_region fog;
struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS];
struct radeon_dma_region norm;
+
+ struct radeon_bo *elt_dma_bo;
+ int elt_dma_offset; /** Offset into this buffer object, in bytes */
+ int elt_used;
};
/* radeon_swtcl.c
*/
-struct radeon_swtcl_info {
- GLuint RenderIndex;
- GLuint vertex_size;
+struct r100_swtcl_info {
GLuint vertex_format;
- struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
- GLuint vertex_attr_count;
-
GLubyte *verts;
/* Fallback rasterization functions
radeon_line_func draw_line;
radeon_tri_func draw_tri;
- GLuint hw_primitive;
- GLenum render_primitive;
- GLuint numverts;
-
/**
* Offset of the 4UB color data within a hardware (swtcl) vertex.
*/
/* radeon_swtcl.c
*/
- struct radeon_swtcl_info swtcl;
+ struct r100_swtcl_info swtcl;
GLboolean using_hyperz;
GLboolean texmicrotile;
cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
}
- // dump_cmdbuf(cs);
+ //dump_cmdbuf(cs);
r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
if (r) {
#include "swrast/swrast.h"
#include "radeon_context.h"
+#include "common_cmdbuf.h"
+#include "radeon_cs.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
#include "radeon_tcl.h"
* for enough space for the case of emitting all state, and inline the
* radeonAllocCmdBuf code here without all the checks.
*/
- radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size);
+ rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->hw.max_state_size, __FUNCTION__);
dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
/* We always always emit zbs, this is due to a bug found by keithw in
GLuint primitive,
GLuint vertex_nr )
{
- drm_radeon_cmd_header_t *cmd;
-
+ BATCH_LOCALS(&rmesa->radeon);
assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__,
rmesa->store.cmd_used/4);
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ,
- __FUNCTION__ );
-#if RADEON_OLD_PACKETS
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16);
- cmd[2].i = rmesa->ioctl.vertex_offset;
- cmd[3].i = vertex_nr;
- cmd[4].i = vertex_format;
- cmd[5].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n",
- __FUNCTION__,
- cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i);
-#else
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16);
- cmd[2].i = vertex_format;
- cmd[3].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
- (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
-
-
- if (RADEON_DEBUG & DEBUG_PRIMS)
- fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n",
- __FUNCTION__,
- cmd[1].i, cmd[2].i, cmd[3].i);
-#endif
+ BEGIN_BATCH(3);
+ OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 0);
+ OUT_BATCH(vertex_format);
+ OUT_BATCH(primitive |
+ RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+ RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+ RADEON_CP_VC_CNTL_MAOS_ENABLE |
+ RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+ (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+ END_BATCH();
}
radeonEmitState( rmesa );
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa,
- ELTS_BUFSZ(min_nr),
- __FUNCTION__ );
-#if RADEON_OLD_PACKETS
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM;
- cmd[2].i = rmesa->ioctl.vertex_offset;
- cmd[3].i = 0xffff;
- cmd[4].i = vertex_format;
- cmd[5].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_IND |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
- retval = (GLushort *)(cmd+6);
-#else
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP;
- cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX;
- cmd[2].i = vertex_format;
- cmd[3].i = (primitive |
- RADEON_CP_VC_CNTL_PRIM_WALK_IND |
- RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
- RADEON_CP_VC_CNTL_MAOS_ENABLE |
- RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
-
- retval = (GLushort *)(cmd+4);
-#endif
+ rmesa->tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
+ 0, R200_ELT_BUF_SZ, 4,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ rmesa->tcl.elt_dma_offset = 0;
+ rmesa->tcl.elt_used = min_nr * 2;
+
+ radeon_bo_map(rmesa->tcl.elt_dma_bo, 1);
+ retval = rmesa->tcl.elt_dma_bo->ptr + rmesa->tcl.elt_dma_offset;
if (RADEON_DEBUG & DEBUG_PRIMS)
fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n",
rmesa->ioctl.vertex_size = vertex_size;
rmesa->ioctl.vertex_offset = offset;
#else
- drm_radeon_cmd_header_t *cmd;
+ BATCH_LOCALS(&rmesa->radeon);
if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL))
fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n",
__FUNCTION__, vertex_size, offset);
- cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ,
- __FUNCTION__ );
-
- cmd[0].i = 0;
- cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
- cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16);
- cmd[2].i = 1;
- cmd[3].i = vertex_size | (vertex_size << 8);
- cmd[4].i = offset;
+ BEGIN_BATCH(5);
+ OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
+ OUT_BATCH(1);
+ OUT_BATCH(vertex_size | (vertex_size << 8));
+ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+}
#endif
}
#define VBUF_BUFSZ (4 * sizeof(int))
#endif
-/* Ensure that a minimum amount of space is available in the command buffer.
- * This is used to ensure atomicity of state updates with the rendering requests
- * that rely on them.
- *
- * An alternative would be to implement a "soft lock" such that when the buffer
- * wraps at an inopportune time, we grab the lock, flush the current buffer,
- * and hang on to the lock until the critical section is finished and we flush
- * the buffer again and unlock.
- */
-static INLINE void radeonEnsureCmdBufSpace( r100ContextPtr rmesa,
- int bytes )
-{
- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
- assert( bytes <= RADEON_CMD_BUF_SZ );
-}
-/* Alloc space in the command buffer
- */
-static INLINE char *radeonAllocCmdBuf( r100ContextPtr rmesa,
- int bytes, const char *where )
+static inline uint32_t cmdpacket3(int cmd_type)
{
- if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ)
- radeonFlushCmdBuf( rmesa, __FUNCTION__ );
+ drm_radeon_cmd_header_t cmd;
+
+ cmd.i = 0;
+ cmd.header.cmd_type = cmd_type;
+
+ return (uint32_t)cmd.i;
- {
- char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used;
- rmesa->store.cmd_used += bytes;
- return head;
- }
}
+#define OUT_BATCH_PACKET3(packet, num_extra) do { \
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } else { \
+ OUT_BATCH(CP_PACKET2); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } \
+ } while(0)
+
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } else { \
+ OUT_BATCH(CP_PACKET2); \
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } \
+ } while(0)
+
+
#endif /* __RADEON_IOCTL_H__ */
if (rmesa->tcl.indexed_verts.buf)
radeonReleaseArrays( ctx, ~0 );
- radeonAllocDmaRegion( rmesa,
- &rmesa->tcl.indexed_verts,
+ radeonAllocDmaRegion( &rmesa->radeon,
+ 0, &rmesa->tcl.indexed_verts,
VB->Count * setup_tab[i].vertex_size * 4,
4);
rmesa->tcl.indexed_verts.start );
rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
- rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
+ // rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts );
rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size;
rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size;
_tnl_print_vert_flags( __FUNCTION__, newinputs );
#endif
- if (newinputs)
- radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
+ /// if (newinputs)
+ /// radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ );
}
* information.
*/
#define LOCAL_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+ struct radeon_renderbuffer *rrb = (void *) rb; \
+ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
const GLuint bottom = dPriv->h - 1; \
- GLubyte *buf = (GLubyte *) drb->flippedData \
- + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \
GLuint p; \
- (void) p;
+ (void) p;
#define LOCAL_DEPTH_VARS \
- driRenderbuffer *drb = (driRenderbuffer *) rb; \
- const __DRIdrawablePrivate *dPriv = drb->dPriv; \
+ struct radeon_renderbuffer *rrb = (void *) rb; \
+ const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
const GLuint bottom = dPriv->h - 1; \
GLuint xo = dPriv->x; \
- GLuint yo = dPriv->y; \
- GLubyte *buf = (GLubyte *) drb->Base.Data;
+ GLuint yo = dPriv->y;
#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
#define TAG(x) radeon##x##_RGB565
#define TAG2(x,y) radeon##x##_RGB565##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
+#define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
#include "spantmp2.h"
/* 32 bit, ARGB8888 color spanline and pixel functions
#define TAG(x) radeon##x##_ARGB8888
#define TAG2(x,y) radeon##x##_ARGB8888##y
-#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
+#define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
#include "spantmp2.h"
-/* ================================================================
- * Depth buffer
- */
-
-/* The Radeon family has depth tiling on all the time, so we have to convert
- * the x,y coordinates into the memory bus address (mba) in the same
- * manner as the engine. In each case, the linear block address (ba)
- * is calculated, and then wired with x and y to produce the final
- * memory address.
- * The chip will do address translation on its own if the surface registers
- * are set up correctly. It is not quite enough to get it working with hyperz
- * too...
- */
-
-static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 4 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0..1] = 0 */
-
-#ifdef COMPILE_R300
- ba = (y / 8) * (pitch / 8) + (x / 8);
-#else
- ba = (y / 16) * (pitch / 16) + (x / 16);
-#endif
-
- address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */
- address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */
- address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
-
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
-static INLINE GLuint
-radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
-{
- GLuint pitch = drb->pitch;
- if (drb->depthHasSurface) {
- return 2 * (x + y * pitch);
- } else {
- GLuint ba, address = 0; /* a[0] = 0 */
-
- ba = (y / 16) * (pitch / 32) + (x / 32);
-
- address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */
- address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */
- address |= (x & 0x8) << 4; /* a[7] = x[3] */
- address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */
- address |= (y & 0x8) << 7; /* a[10] = y[3] */
- address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */
- address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */
-
- return address;
- }
-}
-
/* 16-bit depth buffer functions
*/
#define VALUE_TYPE GLushort
#define WRITE_DEPTH( _x, _y, d ) \
- *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
+ *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo) = d
#define READ_DEPTH( d, _x, _y ) \
- d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
+ d = *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo)
#define TAG(x) radeon##x##_z16
#include "depthtmp.h"
#else
#define WRITE_DEPTH( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
+ GLuint tmp = *_ptr; \
tmp &= 0xff000000; \
tmp |= ((d) & 0x00ffffff); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
} while (0)
#endif
#ifdef COMPILE_R300
#define READ_DEPTH( d, _x, _y ) \
- do { \
+ do { \
d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
_y + yo )) & 0xffffff00) >> 8; \
}while(0)
#else
#define READ_DEPTH( d, _x, _y ) \
- d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \
- _y + yo )) & 0x00ffffff;
+ do { \
+ d = (*(GLuint*)(radeon_ptr32(rrb, _x + xo, _y + yo)) & 0x00ffffff); \
+ } while (0)
#endif
#define TAG(x) radeon##x##_z24_s8
#else
#define WRITE_STENCIL( _x, _y, d ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
+ GLuint tmp = *_ptr; \
tmp &= 0x00ffffff; \
tmp |= (((d) & 0xff) << 24); \
- *(GLuint *)(buf + offset) = tmp; \
+ *_ptr = tmp; \
} while (0)
#endif
#else
#define READ_STENCIL( d, _x, _y ) \
do { \
- GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \
- GLuint tmp = *(GLuint *)(buf + offset); \
+ GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
+ GLuint tmp = *_ptr; \
d = (tmp & 0xff000000) >> 24; \
} while (0)
#endif
#define TAG(x) radeon##x##_z24_s8
#include "stenciltmp.h"
-/* Move locking out to get reasonable span performance (10x better
- * than doing this in HW_LOCK above). WaitForIdle() is the main
- * culprit.
- */
-
-static void radeonSpanRenderStart(GLcontext * ctx)
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-#ifdef COMPILE_R300
- r300ContextPtr r300 = (r300ContextPtr) rmesa;
- R300_FIREVERTICES(r300);
-#else
- r100ContextPtr r100 = (r100ContextPtr) rmesa;
- RADEON_FIREVERTICES(r100);
-#endif
- LOCK_HARDWARE(rmesa);
- radeonWaitForIdleLocked(rmesa);
-}
-
-static void radeonSpanRenderFinish(GLcontext * ctx)
-{
- radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
- _swrast_flush(ctx);
- UNLOCK_HARDWARE(rmesa);
-}
-
void radeonInitSpanFuncs(GLcontext * ctx)
{
struct swrast_device_driver *swdd =
#include "radeon_tcl.h"
-static void flush_last_swtcl_prim(GLcontext *ctx);
-
/* R100: xyzw, c0, c1/fog, stq[0..2] = 4+1+1+3*3 = 15 right? */
/* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
#define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat)) /* for mesa _tnl stage */
#define EMIT_ATTR( ATTR, STYLE, F0 ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
fmt_0 |= F0; \
} while (0)
#define EMIT_PAD( N ) \
do { \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \
- rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \
- rmesa->swtcl.vertex_attr_count++; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
} while (0)
static GLuint radeon_cp_vc_frmts[3][2] =
}
assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
- rmesa->swtcl.vertex_attr_count = 0;
+ rmesa->radeon.swtcl.vertex_attr_count = 0;
/* EMIT_ATTR's must be in order as they tell t_vertex.c how to
* build up a hardware vertex.
fmt_0 != rmesa->swtcl.vertex_format) {
RADEON_NEWPRIM(rmesa);
rmesa->swtcl.vertex_format = fmt_0;
- rmesa->swtcl.vertex_size =
+ rmesa->radeon.swtcl.vertex_size =
_tnl_install_attrs( ctx,
- rmesa->swtcl.vertex_attrs,
- rmesa->swtcl.vertex_attr_count,
+ rmesa->radeon.swtcl.vertex_attrs,
+ rmesa->radeon.swtcl.vertex_attr_count,
NULL, 0 );
- rmesa->swtcl.vertex_size /= 4;
+ rmesa->radeon.swtcl.vertex_size /= 4;
RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
if (RADEON_DEBUG & DEBUG_VERTS)
fprintf( stderr, "%s: vertex_size= %d floats\n",
- __FUNCTION__, rmesa->swtcl.vertex_size);
+ __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
}
}
radeonSetVertexFormat( ctx );
- if (rmesa->dma.flush != 0 &&
- rmesa->dma.flush != flush_last_swtcl_prim)
- rmesa->dma.flush( ctx );
+ if (rmesa->radeon.dma.flush != 0 &&
+ rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
+ rmesa->radeon.dma.flush( ctx );
}
}
}
-
-/* Flush vertices in the current dma region.
- */
-static void flush_last_swtcl_prim(GLcontext *ctx)
+void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
{
r100ContextPtr rmesa = R100_CONTEXT(ctx);
- if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s\n", __FUNCTION__);
-
- rmesa->dma.flush = NULL;
-
- if (rmesa->dma.current.buf) {
- struct radeon_dma_region *current = &rmesa->dma.current;
- GLuint current_offset = (rmesa->radeon.radeonScreen->gart_buffer_offset +
- current->buf->buf->idx * RADEON_BUFFER_SIZE +
- current->start);
-
- assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
-
- assert (current->start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- current->ptr);
-
- if (rmesa->dma.current.start != rmesa->dma.current.ptr) {
- radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ +
- rmesa->hw.max_state_size + VBUF_BUFSZ );
-
- radeonEmitVertexAOS( rmesa,
- rmesa->swtcl.vertex_size,
- current_offset);
-
- radeonEmitVbufPrim( rmesa,
- rmesa->swtcl.vertex_format,
- rmesa->swtcl.hw_primitive,
- rmesa->swtcl.numverts);
- }
-
- rmesa->swtcl.numverts = 0;
- current->start = current->ptr;
- }
-}
-
-
-/* Alloc space in the current dma region.
- */
-static INLINE void *
-radeonAllocDmaLowVerts( r100ContextPtr rmesa, int nverts, int vsize )
-{
- GLuint bytes = vsize * nverts;
- if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end )
- radeonRefillCurrentDmaRegion( rmesa );
+ rcommonEnsureCmdBufSpace(&rmesa->radeon,
+ rmesa->hw.max_state_size + (12*sizeof(int)),
+ __FUNCTION__);
- if (!rmesa->dma.flush) {
- rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
- rmesa->dma.flush = flush_last_swtcl_prim;
- }
-
- assert( vsize == rmesa->swtcl.vertex_size * 4 );
- assert( rmesa->dma.flush == flush_last_swtcl_prim );
- assert (rmesa->dma.current.start +
- rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
- rmesa->dma.current.ptr);
+ radeonEmitState(rmesa);
+ radeonEmitVertexAOS( rmesa,
+ rmesa->radeon.swtcl.vertex_size,
+ // rmesa->radeon.dma.current,
+ current_offset);
- {
- GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr);
- rmesa->dma.current.ptr += bytes;
- rmesa->swtcl.numverts += nverts;
- return head;
- }
+
+ radeonEmitVbufPrim( rmesa,
+ rmesa->swtcl.vertex_format,
+ rmesa->radeon.swtcl.hw_primitive,
+ rmesa->radeon.swtcl.numverts);
}
-
/*
* Render unclipped vertex buffers by emitting vertices directly to
* dma buffers. Use strip/fan hardware primitives where possible.
radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
{
RADEON_NEWPRIM( rmesa );
- rmesa->swtcl.hw_primitive = hw_prim[prim];
- assert(rmesa->dma.current.ptr == rmesa->dma.current.start);
+ rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
+ // assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
}
#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
#define FLUSH() RADEON_NEWPRIM( rmesa )
-#define GET_CURRENT_VB_MAX_VERTS() \
- (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4))
+#define GET_CURRENT_VB_MAX_VERTS() 10\
+// (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
#define GET_SUBSEQUENT_VB_MAX_VERTS() \
- ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4))
+ ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
#define ALLOC_VERTS( nr ) \
- radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 )
+ rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 )
#define EMIT_VERTS( ctx, j, nr, buf ) \
_tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
if (rmesa->swtcl.indexed_verts.buf)
RELEASE_ELT_VERTS();
- if (rmesa->swtcl.RenderIndex != 0 ||
+ if (rmesa->radeon.swtcl.RenderIndex != 0 ||
!radeon_dma_validate_render( ctx, VB ))
return GL_TRUE;
#undef LOCAL_VARS
#undef ALLOC_VERTS
#define CTX_ARG r100ContextPtr rmesa
-#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size
-#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 )
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 )
#undef LOCAL_VARS
#define LOCAL_VARS \
r100ContextPtr rmesa = R100_CONTEXT(ctx); \
- const char *radeonverts = (char *)rmesa->swtcl.verts;
+ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
#define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
#define VERTEX radeonVertex
#undef TAG
#define VERT_Y(_v) _v->v.y
#define VERT_Z(_v) _v->v.z
#define AREA_IS_CCW( a ) (a < 0)
-#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int)))
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
#define VERT_SET_RGBA( v, c ) \
do { \
***********************************************************************/
#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
-#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
#undef TAG
#define TAG(x) x
#include "tnl_dd/t_dd_unfilled.h"
#undef LOCAL_VARS
#define LOCAL_VARS \
r100ContextPtr rmesa = R100_CONTEXT(ctx); \
- const GLuint vertsize = rmesa->swtcl.vertex_size; \
- const char *radeonverts = (char *)rmesa->swtcl.verts; \
+ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
+ const char *radeonverts = (char *)rmesa->radeon.swtcl.verts; \
const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
const GLboolean stipple = ctx->Line.StippleFlag; \
(void) elt; (void) stipple;
if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
if (flags & DD_TRI_UNFILLED) index |= RADEON_UNFILLED_BIT;
- if (index != rmesa->swtcl.RenderIndex) {
+ if (index != rmesa->radeon.swtcl.RenderIndex) {
tnl->Driver.Render.Points = rast_tab[index].points;
tnl->Driver.Render.Line = rast_tab[index].line;
tnl->Driver.Render.ClippedLine = rast_tab[index].line;
tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
}
- rmesa->swtcl.RenderIndex = index;
+ rmesa->radeon.swtcl.RenderIndex = index;
}
}
{
r100ContextPtr rmesa = R100_CONTEXT(ctx);
- if (rmesa->swtcl.hw_primitive != hwprim) {
+ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
RADEON_NEWPRIM( rmesa );
- rmesa->swtcl.hw_primitive = hwprim;
+ rmesa->radeon.swtcl.hw_primitive = hwprim;
}
}
static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
{
r100ContextPtr rmesa = R100_CONTEXT(ctx);
- rmesa->swtcl.render_primitive = prim;
+ rmesa->radeon.swtcl.render_primitive = prim;
if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
}
RADEON_FIREVERTICES( rmesa );
TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
_swsetup_Wakeup( ctx );
- rmesa->swtcl.RenderIndex = ~0;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
if (RADEON_DEBUG & DEBUG_FALLBACKS) {
fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
bit, getFallbackString(bit));
_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
RADEON_MAX_TNL_VERTEX_SIZE);
- rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
- rmesa->swtcl.RenderIndex = ~0;
- rmesa->swtcl.render_primitive = GL_TRIANGLES;
- rmesa->swtcl.hw_primitive = 0;
+ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+ rmesa->radeon.swtcl.hw_primitive = 0;
}
{
r100ContextPtr rmesa = R100_CONTEXT(ctx);
- if (rmesa->swtcl.indexed_verts.buf)
- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
- __FUNCTION__ );
+ // if (rmesa->swtcl.indexed_verts.buf)
+ // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+ // __FUNCTION__ );
}
extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
-
+extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
#endif
static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr )
{
- if (rmesa->dma.flush)
- rmesa->dma.flush( rmesa->radeon.glCtx );
+ if (rmesa->radeon.dma.flush)
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
+ rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->tcl.nr_aos_components) +
rmesa->hw.max_state_size + ELTS_BUFSZ(nr));
radeonEmitAOS( rmesa,
- rmesa->tcl.aos_components,
- rmesa->tcl.nr_aos_components, 0 );
+ rmesa->tcl.aos_components,
+ rmesa->tcl.nr_aos_components, 0 );
return radeonAllocEltsOpenEnded( rmesa,
rmesa->tcl.vertex_format,
rmesa->hw.max_state_size + VBUF_BUFSZ );
radeonEmitAOS( rmesa,
+ rmesa->tcl.aos_components,
rmesa->tcl.nr_aos_components,
start );
/* Why couldn't this packet have taken an offset param?
*/
radeonEmitVbufPrim( rmesa,
+ 0,
rmesa->tcl.hw_primitive,
count - start );
}
tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
- if ( rmesa->dma.flush )
- rmesa->dma.flush( rmesa->radeon.glCtx );
+ if ( rmesa->radeon.dma.flush )
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
- rmesa->dma.flush = NULL;
+ rmesa->radeon.dma.flush = NULL;
rmesa->swtcl.vertex_format = 0;
- if (rmesa->swtcl.indexed_verts.buf)
- radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
- __FUNCTION__ );
+ // if (rmesa->swtcl.indexed_verts.buf)
+ // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts,
+ // __FUNCTION__ );
if (RADEON_DEBUG & DEBUG_FALLBACKS)
fprintf(stderr, "Radeon end tcl fallback\n");
#include "main/texobj.h"
#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
#include "radeon_swtcl.h"
functions->CompressedTexImage2D = radeonCompressedTexImage2D;
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
- functions->GenerateMipmap = radeon_generate_mipmap;
+ functions->GenerateMipmap = radeonGenerateMipmap;
functions->NewTextureImage = radeonNewTextureImage;
functions->FreeTexImageData = radeonFreeTexImageData;
#include "main/enums.h"
#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
#include "radeon_state.h"
#include "radeon_ioctl.h"
#include "radeon_swtcl.h"
#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
&& (tx_table[f].format != 0xffffffff) )
-static const struct {
+struct tx_table {
GLuint format, filter;
-}
-tx_table[] =
+};
+
+static const struct tx_table tx_table[] =
{
_ALPHA(RGBA8888),
_ALPHA_REV(RGBA8888),
RADEON_TXFORMAT_CUBIC_MAP_ENABLE | \
RADEON_TXFORMAT_NON_POWER2)
-
+#if 0
static void import_tex_obj_state( r100ContextPtr rmesa,
int unit,
radeonTexObjPtr texobj )
{
/* do not use RADEON_DB_STATE to avoid stale texture caches */
- int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+ uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
RADEON_STATECHANGE( rmesa, tex[unit] );
texobj->dirty_state &= ~(1<<unit);
}
-
+#endif
if (!t->image_override) {
if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
- const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
- tx_table_be;
+ const struct tx_table *table = tx_table;
t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
RADEON_TXFORMAT_ALPHA_IN_MAP);
(log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
}
- t->pp_txsize = (((firstImage->Width - 1) << RADEON_PP_TX_WIDTHMASK_SHIFT)
- | ((firstImage->Height - 1) << RADEON_PP_TX_HEIGHTMASK_SHIFT));
+ t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
+ | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
if ( !t->image_override ) {
if (firstImage->IsCompressed)